├── examples
    ├── asrexample
    │   ├── test.pcm
    │   └── asrexample.go
    ├── flashexample
    │   ├── test.pcm
    │   └── flashexample.go
    ├── soeexample
    │   ├── english.wav
    │   └── main.go
    ├── virtual_number_example
    │   ├── test.pcm
    │   └── virtual_number_example.go
    └── ttsexample
    │   ├── ttsexample.go
    │   └── ttswsexample.go
├── tts
    ├── utils.go
    ├── speechsynthesizer.go
    └── speechwssynthesizer.go
├── xcheck-input.json
├── go.mod
├── CHANGELOG.md
├── README.md
├── common
    └── credential.go
├── asr
    ├── flashrecognizer.go
    ├── virtual_number_recogizer.go
    └── speechrecognizer.go
├── LICENSE
└── soe
    └── speaking_assessment.go


/examples/asrexample/test.pcm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentCloud/tencentcloud-speech-sdk-go/HEAD/examples/asrexample/test.pcm


--------------------------------------------------------------------------------
/examples/flashexample/test.pcm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentCloud/tencentcloud-speech-sdk-go/HEAD/examples/flashexample/test.pcm


--------------------------------------------------------------------------------
/examples/soeexample/english.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentCloud/tencentcloud-speech-sdk-go/HEAD/examples/soeexample/english.wav


--------------------------------------------------------------------------------
/examples/virtual_number_example/test.pcm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TencentCloud/tencentcloud-speech-sdk-go/HEAD/examples/virtual_number_example/test.pcm


--------------------------------------------------------------------------------
/tts/utils.go:
--------------------------------------------------------------------------------
 1 | package tts
 2 | 
 3 | import "os"
 4 | 
 5 | func WriteFile(filename string, content []byte) error {
 6 | 	fout, err := os.Create(filename)
 7 | 	defer fout.Close()
 8 | 	if err != nil {
 9 | 		return err
10 | 	}
11 | 
12 | 	_, err = fout.Write(content)
13 | 	if err != nil {
14 | 		return err
15 | 	}
16 | 	return nil
17 | }
18 | 


--------------------------------------------------------------------------------
/xcheck-input.json:
--------------------------------------------------------------------------------
1 | {"server": "http://xcheck.woa.com", "token": "1bbabf40-88ac-421b-9241-92498025832d", "proj-name": "", "proj-dir": "/data/__qci/root-workspaces/__qci-pipeline-10665501-1/tencentcloud-speech-sdk-go", "proj-url": "", "proj-lang": "", "result-type": "json", "output": "output.json", "timeout": 300, "excl-dirs": "", "excl-dir-ptns": "", "diff-files": []}


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/tencentcloud/tencentcloud-speech-sdk-go
 2 | 
 3 | go 1.12
 4 | 
 5 | require (
 6 | 	github.com/BurntSushi/toml v0.3.1 // indirect
 7 | 	github.com/google/uuid v1.1.2
 8 | 	github.com/gorilla/websocket v1.4.2
 9 | 	golang.org/x/net v0.0.0-20200904194848-62affa334b73
10 | 	google.golang.org/genproto v0.0.0-20200925023002-c2d885f95484 // indirect
11 | )
12 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 7 | 
 8 | ## [Unreleased]
 9 | 
10 | ## [1.0.0] - 2020-10-16
11 | 
12 | ### Added
13 | 
14 | - Added asr and tts sdk.
15 | - Added sdk examples.
16 | 
17 | [1.0.0]: https://github.com/TencentCloud/tencentcloud-speech-sdk-go/releases/tag/1.0.0
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 简介
 2 | 
 3 | 欢迎使用腾讯云语音SDK，腾讯云语音SDK为开发者提供了访问腾讯云语音识别、语音合成等语音服务的配套开发工具，简化腾讯云语音服务的接入流程。
 4 | 
 5 | 本项目是腾讯云语音SDK的Go语言版本。
 6 | 
 7 | # 依赖环境
 8 | 
 9 | 1. Go 1.13 版本及以上，推荐使用go mod方式引用安装。
10 | 2. 使用相关产品前需要在腾讯云控制台已开通相关语音产品。
11 | 3. 在腾讯云控制台[账号信息](https://console.cloud.tencent.com/developer)页面查看账号APPID，[访问管理](https://console.cloud.tencent.com/cam/capi)页面获取 SecretID 和 SecretKey 。
12 | 
13 | # 获取安装
14 | 
15 | 推荐使用语言自带的工具安装 SDK ：
16 | 
17 |     go get github.com/tencentcloud/tencentcloud-speech-sdk-go@latest
18 | 
19 | # 示例
20 | 
21 | 参见 [examples](https://github.com/TencentCloud/tencentcloud-speech-sdk-go/tree/master/examples) 目录，该目录下包含各语音服务的示例代码。
22 | 


--------------------------------------------------------------------------------
/common/credential.go:
--------------------------------------------------------------------------------
 1 | package common
 2 | 
 3 | type Credential struct {
 4 | 	SecretId  string
 5 | 	SecretKey string
 6 | 	Token     string
 7 | }
 8 | 
 9 | func NewCredential(secretId, secretKey string) *Credential {
10 | 	return &Credential{
11 | 		SecretId:  secretId,
12 | 		SecretKey: secretKey,
13 | 	}
14 | }
15 | 
16 | func NewTokenCredential(secretId, secretKey, token string) *Credential {
17 | 	return &Credential{
18 | 		SecretId:  secretId,
19 | 		SecretKey: secretKey,
20 | 		Token:     token,
21 | 	}
22 | }
23 | 
24 | func (c *Credential) GetCredentialParams() map[string]string {
25 | 	p := map[string]string{
26 | 		"SecretId": c.SecretId,
27 | 	}
28 | 	if c.Token != "" {
29 | 		p["Token"] = c.Token
30 | 	}
31 | 	return p
32 | }
33 | 


--------------------------------------------------------------------------------
/examples/flashexample/flashexample.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"fmt"
 6 | 	"io/ioutil"
 7 | 	"os"
 8 | 	"sync"
 9 | 	"time"
10 | 
11 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/asr"
12 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
13 | )
14 | 
15 | var (
16 | 	// AppID AppID
17 | 	AppID = "AppID"
18 | 	// SecretID SecretID
19 | 	SecretID = ""
20 | 	// SecretKey SecretKey
21 | 	SecretKey = ""
22 | 	// EngineType EngineType
23 | 	EngineType = "16k_zh"
24 | )
25 | 
26 | func main() {
27 | 	var c = flag.Int("c", 1, "concurrency")
28 | 	var l = flag.Bool("l", false, "loop or not")
29 | 	var f = flag.String("f", "test.pcm", "audio file")
30 | 	flag.Parse()
31 | 
32 | 	var wg sync.WaitGroup
33 | 	for i := 0; i < *c; i++ {
34 | 		fmt.Println("Main: Starting worker", i)
35 | 		wg.Add(1)
36 | 		if *l {
37 | 			go processLoop(i, &wg, *f)
38 | 		} else {
39 | 			go processOnce(i, &wg, *f)
40 | 		}
41 | 	}
42 | 
43 | 	fmt.Println("Main: Waiting for workers to finish")
44 | 	wg.Wait()
45 | 	fmt.Println("Main: Completed")
46 | 
47 | }
48 | 
49 | func processLoop(id int, wg *sync.WaitGroup, file string) {
50 | 	defer wg.Done()
51 | 	for {
52 | 		process(id, file)
53 | 	}
54 | }
55 | 
56 | func processOnce(id int, wg *sync.WaitGroup, file string) {
57 | 	defer wg.Done()
58 | 	process(id, file)
59 | }
60 | 
61 | func process(id int, file string) {
62 | 	audio, err := os.Open(file)
63 | 	defer audio.Close()
64 | 	if err != nil {
65 | 		fmt.Printf("open file error: %v\n", err)
66 | 		return
67 | 	}
68 | 	credential := common.NewCredential(SecretID, SecretKey)
69 | 	recognizer := asr.NewFlashRecognizer(AppID, credential)
70 | 	data, err := ioutil.ReadAll(audio)
71 | 	if err != nil {
72 | 		fmt.Printf("%s|failed read data, error: %v\n", time.Now().Format("2006-01-02 15:04:05"), err)
73 | 		return
74 | 	}
75 | 
76 | 	req := new(asr.FlashRecognitionRequest)
77 | 	req.EngineType = EngineType
78 | 	req.VoiceFormat = "pcm"
79 | 	req.SpeakerDiarization = 0
80 | 	req.FilterDirty = 0
81 | 	req.FilterModal = 0
82 | 	req.FilterPunc = 0
83 | 	req.ConvertNumMode = 1
84 | 	req.FirstChannelOnly = 1
85 | 	req.WordInfo = 0
86 | 
87 | 	resp, err := recognizer.Recognize(req, data)
88 | 	if err != nil {
89 | 		fmt.Printf("%s|failed do recognize, error: %v\n", time.Now().Format("2006-01-02 15:04:05"), err)
90 | 		return
91 | 	}
92 | 	fmt.Printf("request_id: %s\n", resp.RequestId)
93 | 
94 | 	for _, channelResult := range resp.FlashResult {
95 | 		fmt.Printf("channel_id: %d, result: %s\n", channelResult.ChannelId, channelResult.Text)
96 | 	}
97 | }
98 | 


--------------------------------------------------------------------------------
/examples/ttsexample/ttsexample.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"fmt"
 6 | 	"sync"
 7 | 	"time"
 8 | 
 9 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
10 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/tts"
11 | )
12 | 
13 | var (
14 | 	// AppID AppID
15 | 	AppID = 0
16 | 	// SecretID SecretID
17 | 	SecretID = "SecretID"
18 | 	// SecretKey SecretKey
19 | 	SecretKey = "SecretKey"
20 | )
21 | 
22 | // MySpeechSynthesisListener implementation of SpeechSynthesisListener
23 | type MySpeechSynthesisListener struct {
24 | 	ID int
25 | }
26 | 
27 | // OnMessage implementation of SpeechSynthesisListener
28 | func (listener *MySpeechSynthesisListener) OnMessage(response *tts.SpeechSynthesisResponse) {
29 | 	fmt.Printf("%s|%d|OnMessage, size: %d\n", time.Now().Format("2006-01-02 15:04:05"), listener.ID, len(response.Data))
30 | }
31 | 
32 | // OnComplete implementation of SpeechSynthesisListener
33 | func (listener *MySpeechSynthesisListener) OnComplete(response *tts.SpeechSynthesisResponse) {
34 | 	fmt.Printf("%s|%d|OnComplete: %v\n", time.Now().Format("2006-01-02 15:04:05"), listener.ID, response)
35 | }
36 | 
37 | // OnCancel implementation of SpeechSynthesisListener
38 | func (listener *MySpeechSynthesisListener) OnCancel(response *tts.SpeechSynthesisResponse) {
39 | 	fmt.Printf("%s|%d|OnCancel: %v\n", time.Now().Format("2006-01-02 15:04:05"), listener.ID, response)
40 | }
41 | 
42 | // OnFail implementation of SpeechSynthesisListener
43 | func (listener *MySpeechSynthesisListener) OnFail(response *tts.SpeechSynthesisResponse, err error) {
44 | 	fmt.Printf("%s|%d|OnFail: %v, %v\n", time.Now().Format("2006-01-02 15:04:05"), listener.ID, response, err)
45 | }
46 | 
47 | var proxyURL string
48 | 
49 | func main() {
50 | 	var c = flag.Int("c", 1, "concurrency")
51 | 	var p = flag.String("p", "", "proxy url")
52 | 	flag.Parse()
53 | 
54 | 	proxyURL = *p
55 | 	var wg sync.WaitGroup
56 | 	for i := 0; i < *c; i++ {
57 | 		fmt.Println("Main: Starting worker", i)
58 | 		wg.Add(1)
59 | 		go process(i, &wg)
60 | 	}
61 | 
62 | 	fmt.Println("Main: Waiting for workers to finish")
63 | 	wg.Wait()
64 | 	fmt.Println("Main: Completed")
65 | 
66 | }
67 | 
68 | func process(id int, wg *sync.WaitGroup) {
69 | 	defer wg.Done()
70 | 
71 | 	listener := &MySpeechSynthesisListener{
72 | 		ID: id,
73 | 	}
74 | 	credential := common.NewCredential(SecretID, SecretKey)
75 | 	synthesizer := tts.NewSpeechSynthesizer(int64(AppID), credential, listener)
76 | 	synthesizer.VoiceType = 101000
77 | 	text := "语音合成可自定义音量和语速，让发音更自然、更专业、更符合场景需求。满足将文本转化成拟人化语音的需求，打通人机交互闭环。支持多种音色选择，语音合成可广泛应用于语音导航、有声读物、机器人、语音助手、自动新闻播报等场景，提升人机交互体验，提高语音类应用构建效率。"
78 | 	synthesizer.ProxyURL = proxyURL
79 | 	synthesizer.Synthesis(text)
80 | 	synthesizer.Wait()
81 | }
82 | 


--------------------------------------------------------------------------------
/examples/ttsexample/ttswsexample.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"fmt"
 6 | 	"github.com/google/uuid"
 7 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
 8 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/tts"
 9 | 	"path"
10 | 	"strconv"
11 | 	"sync"
12 | 	"time"
13 | )
14 | 
15 | type MySpeechWsSynthesisListener struct {
16 | 	SessionId string
17 | 	Data      []byte
18 | 	Index     int
19 | }
20 | 
21 | func (l *MySpeechWsSynthesisListener) OnSynthesisStart(r *tts.SpeechWsSynthesisResponse) {
22 | 	fmt.Printf("%s|OnSynthesisStart,sessionId:%s response: %s\n", time.Now().Format("2006-01-02 15:04:05"), l.SessionId, r.ToString())
23 | }
24 | 
25 | func (l *MySpeechWsSynthesisListener) OnSynthesisEnd(r *tts.SpeechWsSynthesisResponse) {
26 | 	fileName := fmt.Sprintf("test.mp3")
27 | 	tts.WriteFile(path.Join("./", fileName), l.Data)
28 | 	fmt.Printf("%s|OnSynthesisEnd,sessionId:%s response: %s\n", time.Now().Format("2006-01-02 15:04:05"), l.SessionId, r.ToString())
29 | }
30 | func (l *MySpeechWsSynthesisListener) OnAudioResult(data []byte) {
31 | 	fmt.Printf("%s|OnAudioResult,sessionId:%s index:%d\n", time.Now().Format("2006-01-02 15:04:05"), l.SessionId, l.Index)
32 | 	l.Index = l.Index + 1
33 | 	l.Data = append(l.Data, data...)
34 | }
35 | func (l *MySpeechWsSynthesisListener) OnTextResult(r *tts.SpeechWsSynthesisResponse) {
36 | 	fmt.Printf("%s|OnTextResult,sessionId:%s response: %s\n", time.Now().Format("2006-01-02 15:04:05"), l.SessionId, r.ToString())
37 | }
38 | func (l *MySpeechWsSynthesisListener) OnSynthesisFail(r *tts.SpeechWsSynthesisResponse, err error) {
39 | 	fmt.Printf("%s|OnSynthesisFail,sessionId:%s response: %s err:%s\n", time.Now().Format("2006-01-02 15:04:05"), l.SessionId, r.ToString(), err.Error())
40 | }
41 | 
42 | func main() {
43 | 	var c = flag.Int("c", 1, "concurrency")
44 | 	flag.Parse()
45 | 	var wg sync.WaitGroup
46 | 	for i := 0; i < *c; i++ {
47 | 		fmt.Println("Main: Starting worker", i)
48 | 		wg.Add(1)
49 | 		go processWs(i, &wg)
50 | 	}
51 | 
52 | 	fmt.Println("Main: Waiting for workers to finish")
53 | 	wg.Wait()
54 | 	fmt.Println("Main: Completed")
55 | 
56 | }
57 | 
58 | func processWs(id int, wg *sync.WaitGroup) {
59 | 	defer wg.Done()
60 | 	//在腾讯云控制台账号信息页面查看账号APPID，访问管理页面获取 SecretID 和 SecretKey 。
61 | 	secretId := "替换为自己的secretId"
62 | 	secretKey := "替换为自己的secretKey"
63 | 	AppId := 0 //替换为自己的appid
64 | 
65 | 	sessionId := fmt.Sprintf("%s_%s", strconv.Itoa(id), uuid.New().String())
66 | 	listener := &MySpeechWsSynthesisListener{Data: make([]byte, 0), SessionId: sessionId}
67 | 	credential := common.NewCredential(secretId, secretKey)
68 | 	synthesizer := tts.NewSpeechWsSynthesizer(int64(AppId), credential, listener)
69 | 	synthesizer.SessionId = sessionId
70 | 	synthesizer.VoiceType = 1001
71 | 	synthesizer.Codec = "mp3"
72 | 	synthesizer.Text = "<speak>\n现状是各地的经济水平是<phoneme alphabet=\"py\" ph=\"cen1 ci1 bu4 qi2\">参差不齐</phoneme>的。需要缩小较弱地域和较强地域的<phoneme alphabet=\"py\" ph=\"cha1 ju4\">差距</phoneme>。要做好这个<phoneme alphabet=\"py\" ph=\"chai1 shi4\">差事</phoneme>可不容易啊。\n</speak>\n"
73 | 	synthesizer.EnableSubtitle = true
74 | 	//synthesizer.EmotionCategory = "happy"
75 | 	//synthesizer.EmotionIntensity = 200
76 | 	//synthesizer.Debug = true
77 | 	//synthesizer.DebugFunc = func(message string) { fmt.Println(message) }
78 | 	err := synthesizer.Synthesis()
79 | 	if err != nil {
80 | 		fmt.Println(err.Error())
81 | 		return
82 | 	}
83 | 	synthesizer.Wait()
84 | }
85 | 


--------------------------------------------------------------------------------
/examples/virtual_number_example/virtual_number_example.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/asr"
  7 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
  8 | 	"os"
  9 | 	"sync"
 10 | 	"time"
 11 | )
 12 | 
 13 | var (
 14 | 	//TODO 补充信息
 15 | 	AppID = "AppID"
 16 | 	// SecretID SecretID
 17 | 	SecretID = ""
 18 | 	// SecretKey SecretKey
 19 | 	SecretKey = ""
 20 | 	// SliceSize SliceSize
 21 | 	SliceSize = 3200
 22 | )
 23 | 
 24 | // MyVNRecognitionListener implementation of SpeechRecognitionListener
 25 | type MyVNRecognitionListener struct {
 26 | 	ID int
 27 | }
 28 | 
 29 | // OnVNRecognitionStart implementation of SpeechRecognitionListener
 30 | func (listener *MyVNRecognitionListener) OnVNRecognitionStart(response *asr.VNRecognitionResponse) {
 31 | 	fmt.Printf("%s|%s|OnRecognitionStart\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID)
 32 | }
 33 | 
 34 | // OnVNRecognitionComplete implementation of SpeechRecognitionListener
 35 | func (listener *MyVNRecognitionListener) OnVNRecognitionComplete(response *asr.VNRecognitionResponse) {
 36 | 	fmt.Printf("%s|%s|OnRecognitionComplete｜result:%d\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID, response.Result)
 37 | }
 38 | 
 39 | // OnVNFail implementation of SpeechRecognitionListener
 40 | func (listener *MyVNRecognitionListener) OnVNFail(response *asr.VNRecognitionResponse, err error) {
 41 | 	fmt.Printf("%s|%s|OnFail: %v\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID, err)
 42 | }
 43 | 
 44 | var proxyURL string
 45 | 
 46 | func main() {
 47 | 	var c = flag.Int("c", 1, "concurrency")
 48 | 	var l = flag.Bool("l", false, "loop or not")
 49 | 	var f = flag.String("f", "test.pcm", "audio file")
 50 | 	var p = flag.String("p", "", "proxy url")
 51 | 	flag.Parse()
 52 | 
 53 | 	proxyURL = *p
 54 | 	var wg sync.WaitGroup
 55 | 	for i := 0; i < *c; i++ {
 56 | 		fmt.Println("Main: Starting worker", i)
 57 | 		wg.Add(1)
 58 | 		if *l {
 59 | 			go processLoop(i, &wg, *f)
 60 | 		} else {
 61 | 			go processOnce(i, &wg, *f)
 62 | 		}
 63 | 	}
 64 | 
 65 | 	fmt.Println("Main: Waiting for workers to finish")
 66 | 	wg.Wait()
 67 | 	fmt.Println("Main: Completed")
 68 | 
 69 | }
 70 | 
 71 | func processLoop(id int, wg *sync.WaitGroup, file string) {
 72 | 	defer wg.Done()
 73 | 	for {
 74 | 		err := process(id, file)
 75 | 		if err != nil {
 76 | 			return
 77 | 		}
 78 | 	}
 79 | }
 80 | 
 81 | func processOnce(id int, wg *sync.WaitGroup, file string) {
 82 | 	defer wg.Done()
 83 | 	process(id, file)
 84 | }
 85 | 
 86 | func process(id int, file string) error {
 87 | 	audio, err := os.Open(file)
 88 | 	defer audio.Close()
 89 | 	if err != nil {
 90 | 		fmt.Printf("open file error: %v\n", err)
 91 | 		return err
 92 | 	}
 93 | 
 94 | 	listener := &MyVNRecognitionListener{
 95 | 		ID: id,
 96 | 	}
 97 | 	credential := common.NewCredential(SecretID, SecretKey)
 98 | 	recognizer := asr.NewVNRecognizer(AppID, credential, listener)
 99 | 	recognizer.ProxyURL = proxyURL
100 | 	recognizer.VoiceFormat = asr.AudioFormatPCM
101 | 	recognizer.WaitTime = 30000
102 | 	//握手阶段
103 | 	err = recognizer.Start()
104 | 	if err != nil {
105 | 		fmt.Printf("%s|recognizer start failed, error: %v\n", time.Now().Format("2006-01-02 15:04:05"), err)
106 | 		return err
107 | 	}
108 | 	for {
109 | 		data := make([]byte, SliceSize)
110 | 		n, err := audio.Read(data)
111 | 		if err != nil {
112 | 			if err.Error() == "EOF" {
113 | 				break
114 | 			}
115 | 			fmt.Printf("read file error: %v\n", err)
116 | 			break
117 | 		}
118 | 		if n <= 0 {
119 | 			break
120 | 		}
121 | 		err, end := recognizer.Write(data)
122 | 		if err != nil || end {
123 | 			break
124 | 		}
125 | 		//模拟真实场景，200ms产生200ms数据
126 | 		time.Sleep(200 * time.Millisecond)
127 | 	}
128 | 	recognizer.Stop()
129 | 	return nil
130 | }
131 | 


--------------------------------------------------------------------------------
/examples/asrexample/asrexample.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"sync"
  8 | 	"time"
  9 | 
 10 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/asr"
 11 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
 12 | )
 13 | 
 14 | var (
 15 | 	// AppID AppID
 16 | 	AppID = ""
 17 | 	// SecretID SecretID
 18 | 	SecretID = ""
 19 | 	// SecretKey SecretKey
 20 | 	SecretKey = ""
 21 | 	// EngineModelType EngineModelType
 22 | 	EngineModelType = "16k_zh"
 23 | 	// SliceSize SliceSize
 24 | 	SliceSize = 6400
 25 | )
 26 | 
 27 | // MySpeechRecognitionListener implementation of SpeechRecognitionListener
 28 | type MySpeechRecognitionListener struct {
 29 | 	ID int
 30 | }
 31 | 
 32 | // OnRecognitionStart implementation of SpeechRecognitionListener
 33 | func (listener *MySpeechRecognitionListener) OnRecognitionStart(response *asr.SpeechRecognitionResponse) {
 34 | 	fmt.Printf("%s|%s|OnRecognitionStart\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID)
 35 | }
 36 | 
 37 | // OnSentenceBegin implementation of SpeechRecognitionListener
 38 | func (listener *MySpeechRecognitionListener) OnSentenceBegin(response *asr.SpeechRecognitionResponse) {
 39 | 	fmt.Printf("%s|%s|OnSentenceBegin: %v\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID, response)
 40 | }
 41 | 
 42 | // OnRecognitionResultChange implementation of SpeechRecognitionListener
 43 | func (listener *MySpeechRecognitionListener) OnRecognitionResultChange(response *asr.SpeechRecognitionResponse) {
 44 | 	fmt.Printf("%s|%s|OnRecognitionResultChange: %v\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID, response)
 45 | }
 46 | 
 47 | // OnSentenceEnd implementation of SpeechRecognitionListener
 48 | func (listener *MySpeechRecognitionListener) OnSentenceEnd(response *asr.SpeechRecognitionResponse) {
 49 | 	fmt.Printf("%s|%s|OnSentenceEnd: %v\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID, response)
 50 | }
 51 | 
 52 | // OnRecognitionComplete implementation of SpeechRecognitionListener
 53 | func (listener *MySpeechRecognitionListener) OnRecognitionComplete(response *asr.SpeechRecognitionResponse) {
 54 | 	fmt.Printf("%s|%s|OnRecognitionComplete\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID)
 55 | }
 56 | 
 57 | // OnFail implementation of SpeechRecognitionListener
 58 | func (listener *MySpeechRecognitionListener) OnFail(response *asr.SpeechRecognitionResponse, err error) {
 59 | 	fmt.Printf("%s|%s|OnFail: %v\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID, err)
 60 | }
 61 | 
 62 | var proxyURL string
 63 | 
 64 | func main() {
 65 | 	var c = flag.Int("c", 1, "concurrency")
 66 | 	var l = flag.Bool("l", false, "loop or not")
 67 | 	var f = flag.String("f", "test.pcm", "audio file")
 68 | 	var p = flag.String("p", "", "proxy url")
 69 | 	flag.Parse()
 70 | 
 71 | 	proxyURL = *p
 72 | 	var wg sync.WaitGroup
 73 | 	for i := 0; i < *c; i++ {
 74 | 		fmt.Println("Main: Starting worker", i)
 75 | 		wg.Add(1)
 76 | 		if *l {
 77 | 			go processLoop(i, &wg, *f)
 78 | 		} else {
 79 | 			go processOnce(i, &wg, *f)
 80 | 		}
 81 | 	}
 82 | 
 83 | 	fmt.Println("Main: Waiting for workers to finish")
 84 | 	wg.Wait()
 85 | 	fmt.Println("Main: Completed")
 86 | 
 87 | }
 88 | 
 89 | func processLoop(id int, wg *sync.WaitGroup, file string) {
 90 | 	defer wg.Done()
 91 | 	for {
 92 | 		err := process(id, file)
 93 | 		if err != nil {
 94 | 			return
 95 | 		}
 96 | 	}
 97 | }
 98 | 
 99 | func processOnce(id int, wg *sync.WaitGroup, file string) {
100 | 	defer wg.Done()
101 | 	process(id, file)
102 | }
103 | 
104 | func process(id int, file string) error {
105 | 	audio, err := os.Open(file)
106 | 	defer audio.Close()
107 | 	if err != nil {
108 | 		fmt.Printf("open file error: %v\n", err)
109 | 		return err
110 | 	}
111 | 
112 | 	listener := &MySpeechRecognitionListener{
113 | 		ID: id,
114 | 	}
115 | 	credential := common.NewCredential(SecretID, SecretKey)
116 | 	recognizer := asr.NewSpeechRecognizer(AppID, credential, EngineModelType, listener)
117 | 	recognizer.ProxyURL = proxyURL
118 | 	recognizer.VoiceFormat = asr.AudioFormatPCM
119 | 	err = recognizer.Start()
120 | 	if err != nil {
121 | 		fmt.Printf("%s|recognizer start failed, error: %v\n", time.Now().Format("2006-01-02 15:04:05"), err)
122 | 		return err
123 | 	}
124 | 	for {
125 | 		data := make([]byte, SliceSize)
126 | 		n, err := audio.Read(data)
127 | 		if err != nil {
128 | 			if err.Error() == "EOF" {
129 | 				break
130 | 			}
131 | 			fmt.Printf("read file error: %v\n", err)
132 | 			break
133 | 		}
134 | 		if n <= 0 {
135 | 			break
136 | 		}
137 | 		err = recognizer.Write(data[:n])
138 | 		if err != nil {
139 | 			break
140 | 		}
141 | 		//模拟真实场景，200ms产生200ms数据
142 | 		//注意：该行sleep代码用于模拟实时音频流1:1产生音频数据(每200ms产生200ms音频)，实际音频流场景建议删除该行代码，或业务根据自己的需求情况自行调整
143 | 		time.Sleep(200 * time.Millisecond)
144 | 	}
145 | 	recognizer.Stop()
146 | 	return nil
147 | }
148 | 


--------------------------------------------------------------------------------
/examples/soeexample/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"sync"
  8 | 	"time"
  9 | 
 10 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
 11 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/soe"
 12 | )
 13 | 
 14 | var (
 15 | 	//TODO 补充信息
 16 | 	// AppID AppID
 17 | 	AppID = ""
 18 | 	//SecretID SecretID
 19 | 	SecretID = ""
 20 | 	//SecretKey SecretKey
 21 | 	SecretKey = ""
 22 | 	// Token 只有临时秘钥鉴权需要
 23 | 	Token = ""
 24 | 
 25 | 	// SliceSize SliceSize
 26 | 	SliceSize = 1600
 27 | )
 28 | 
 29 | // MySpeakingAssessmentListener implementation of SpeakingAssessmentListener
 30 | type MySpeakingAssessmentListener struct {
 31 | 	ID int
 32 | }
 33 | 
 34 | // OnRecognitionStart implementation of SpeakingAssessmentListener
 35 | func (listener *MySpeakingAssessmentListener) OnRecognitionStart(response *soe.SpeakingAssessmentResponse) {
 36 | 	fmt.Printf("%s|%s|OnRecognitionStart\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID)
 37 | }
 38 | 
 39 | // OnIntermediateResults implementation of SpeakingAssessmentListener
 40 | func (listener *MySpeakingAssessmentListener) OnIntermediateResults(response *soe.SpeakingAssessmentResponse) {
 41 | 	fmt.Printf("%s|%s|OnIntermediateResults｜result:%+v\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID, response.Result)
 42 | }
 43 | 
 44 | // OnRecognitionComplete implementation of SpeakingAssessmentListener
 45 | func (listener *MySpeakingAssessmentListener) OnRecognitionComplete(response *soe.SpeakingAssessmentResponse) {
 46 | 	fmt.Printf("%s|%s|OnRecognitionComplete｜result:%+v\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID, response.Result)
 47 | }
 48 | 
 49 | // OnFail implementation of SpeakingAssessmentListener
 50 | func (listener *MySpeakingAssessmentListener) OnFail(response *soe.SpeakingAssessmentResponse, err error) {
 51 | 	fmt.Printf("%s|%s|OnFail: %v\n", time.Now().Format("2006-01-02 15:04:05"), response.VoiceID, err)
 52 | }
 53 | 
 54 | var proxyURL string
 55 | var recFlag = flag.Bool("rec", false, "enable rec mode")
 56 | 
 57 | func main() {
 58 | 	var c = flag.Int("c", 1, "concurrency")
 59 | 	var l = flag.Bool("l", false, "loop or not")
 60 | 	var f = flag.String("f", "english.wav", "audio file")
 61 | 	var p = flag.String("p", "", "proxy url")
 62 | 	flag.Parse()
 63 | 
 64 | 	proxyURL = *p
 65 | 	var wg sync.WaitGroup
 66 | 	for i := 0; i < *c; i++ {
 67 | 		fmt.Println("Main: Starting worker", i)
 68 | 		wg.Add(1)
 69 | 		if *l {
 70 | 			go processLoop(i, &wg, *f)
 71 | 		} else {
 72 | 			go processOnce(i, &wg, *f)
 73 | 		}
 74 | 	}
 75 | 
 76 | 	fmt.Println("Main: Waiting for workers to finish")
 77 | 	wg.Wait()
 78 | 	fmt.Println("Main: Completed")
 79 | 
 80 | }
 81 | 
 82 | func processLoop(id int, wg *sync.WaitGroup, file string) {
 83 | 	defer wg.Done()
 84 | 	for {
 85 | 		err := process(id, file)
 86 | 		if err != nil {
 87 | 			return
 88 | 		}
 89 | 	}
 90 | }
 91 | 
 92 | func processOnce(id int, wg *sync.WaitGroup, file string) {
 93 | 	defer wg.Done()
 94 | 	process(id, file)
 95 | }
 96 | 
 97 | func process(id int, file string) error {
 98 | 	audio, err := os.Open(file)
 99 | 	if err != nil {
100 | 		fmt.Printf("open file error: %v\n", err)
101 | 		return err
102 | 	}
103 | 	defer audio.Close()
104 | 
105 | 	listener := &MySpeakingAssessmentListener{
106 | 		ID: id,
107 | 	}
108 | 	// 临时秘钥鉴权需要使用带token的方式 credential := common.NewTokenCredential(SecretID, SecretKey, Token)
109 | 	credential := common.NewCredential(SecretID, SecretKey)
110 | 	recognizer := soe.NewSpeechRecognizer(AppID, credential, listener)
111 | 	recognizer.ProxyURL = proxyURL
112 | 	recognizer.VoiceFormat = soe.AudioFormatWav
113 | 	recognizer.RefText = "beautiful"
114 | 	recognizer.ServerEngineType = "16k_en"
115 | 	recognizer.ScoreCoeff = 1.1
116 | 	recognizer.EvalMode = 0
117 | 	recognizer.Keyword = ""
118 | 	recognizer.SentenceInfoEnabled = 0
119 | 	recognizer.TextMode = 0
120 | 	if *recFlag {
121 | 		// 录音识别模式下可发送单个大长度分片(上限300s）
122 | 		// 单次连接只能发一个分片,得到识别结果后需要关闭此条websocket连接，再次识别需要重新建立连接
123 | 		// 录音识别模式适合已经存在完整录音文件数据需要一次性返回最终结果的场景
124 | 		// 更推荐使用流式识别模式，流式识别可以相对更快的得到识别结果，有更可靠的实时率保障
125 | 		recognizer.RecMode = 1
126 | 	}
127 | 	//握手阶段
128 | 	err = recognizer.Start()
129 | 	if err != nil {
130 | 		fmt.Printf("%s|recognizer start failed, error: %v\n", time.Now().Format("2006-01-02 15:04:05"), err)
131 | 		return err
132 | 	}
133 | 	seq := 0
134 | 	if *recFlag {
135 | 		// 录音识别模式可以一次性发送全部数据
136 | 		fileDataAll, err := os.ReadFile(file)
137 | 		if err != nil {
138 | 			fmt.Printf("read file error: %v\n", err)
139 | 			return err
140 | 		}
141 | 		if err = recognizer.Write(fileDataAll); err != nil {
142 | 			fmt.Printf("write data error: %v\n", err)
143 | 			return err
144 | 		}
145 | 	} else {
146 | 		// 流式识别模式，需要分片发送音频数据
147 | 		for {
148 | 			data := make([]byte, SliceSize)
149 | 			n, err := audio.Read(data)
150 | 			if err != nil {
151 | 				if err.Error() == "EOF" {
152 | 					break
153 | 				}
154 | 				fmt.Printf("read file error: %v\n", err)
155 | 				break
156 | 			}
157 | 			if n <= 0 {
158 | 				break
159 | 			}
160 | 			err = recognizer.Write(data)
161 | 			if err != nil {
162 | 				break
163 | 			}
164 | 			//模拟真实场景，200ms产生200ms数据
165 | 			time.Sleep(200 * time.Millisecond)
166 | 			seq++
167 | 		}
168 | 	}
169 | 
170 | 	recognizer.Stop()
171 | 	return nil
172 | }
173 | 


--------------------------------------------------------------------------------
/asr/flashrecognizer.go:
--------------------------------------------------------------------------------
  1 | package asr
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"crypto/hmac"
  6 | 	"crypto/sha1"
  7 | 	"encoding/base64"
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 	"io/ioutil"
 11 | 	"net"
 12 | 	"net/http"
 13 | 	"net/url"
 14 | 	"sort"
 15 | 	"strconv"
 16 | 	"sync"
 17 | 	"time"
 18 | 
 19 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
 20 | )
 21 | 
 22 | // FlashRecognitionRequest FlashRecognitionRequest
 23 | type FlashRecognitionRequest struct {
 24 | 	EngineType         string `json:"engine_type"`
 25 | 	VoiceFormat        string `json:"voice_format"`
 26 | 	SpeakerDiarization uint32 `json:"speaker_diarization"`
 27 | 	HotwordId          string `json:"hotword_id"`
 28 | 	HotwordList        string `json:"hotword_list"`
 29 | 	CustomizationId    string `json:"customization_id"`
 30 | 	FilterDirty        int32  `json:"filter_dirty"`
 31 | 	FilterModal        int32  `json:"filter_modal"`
 32 | 	FilterPunc         int32  `json:"filter_punc"`
 33 | 	ConvertNumMode     int32  `json:"convert_num_mode"`
 34 | 	WordInfo           int32  `json:"word_info"`
 35 | 	FirstChannelOnly   int32  `json:"first_channel_only"`
 36 | 	ReinforceHotword   int32  `json:"reinforce_hotword"`
 37 | 	SentenceMaxLength  int32  `json:"sentence_max_length"`
 38 | }
 39 | 
 40 | // FlashRecognitionResponse FlashRecognitionResponse
 41 | type FlashRecognitionResponse struct {
 42 | 	RequestId     string                    `json:"request_id"`
 43 | 	Code          int                       `json:"code"`
 44 | 	Message       string                    `json:"message"`
 45 | 	AudioDuration int64                     `json:"audio_duration"`
 46 | 	FlashResult   []*FlashRecognitionResult `json:"flash_result,omitempty"`
 47 | }
 48 | 
 49 | // FlashRecognitionResult FlashRecognitionResult
 50 | type FlashRecognitionResult struct {
 51 | 	Text         string                      `json:"text"`
 52 | 	ChannelId    int32                       `json:"channel_id"`
 53 | 	SentenceList []*FlashRecognitionSentence `json:"sentence_list,omitempty"`
 54 | }
 55 | 
 56 | // FlashRecognitionSentence FlashRecognitionSentence
 57 | type FlashRecognitionSentence struct {
 58 | 	Text      string           `json:"text"`
 59 | 	StartTime uint32           `json:"start_time"`
 60 | 	EndTime   uint32           `json:"end_time"`
 61 | 	SpeakerId int32            `json:"speaker_id"`
 62 | 	WordList  []*FlashWordData `json:"word_list,omitempty"`
 63 | }
 64 | 
 65 | // FlashWordData FlashWordData
 66 | type FlashWordData struct {
 67 | 	Word       string `json:"word"`
 68 | 	StartTime  uint32 `json:"start_time"`
 69 | 	EndTime    uint32 `json:"end_time"`
 70 | 	StableFlag uint32 `json:"stable_flag"`
 71 | }
 72 | 
 73 | // newFlashRecognitionResponse newFlashRecognitionResponse
 74 | func newFlashRecognitionResponse(code int, message string) *FlashRecognitionResponse {
 75 | 	return &FlashRecognitionResponse{
 76 | 		Code:    code,
 77 | 		Message: message,
 78 | 	}
 79 | }
 80 | 
 81 | var (
 82 | 	flashHost  = "asr.cloud.tencent.com"
 83 | 	httpClient *http.Client
 84 | 
 85 | 	connTimeout         = 1
 86 | 	rwTimeout           = 600
 87 | 	maxIdleConns        = 100
 88 | 	maxIdleConnsPerHost = 2
 89 | 	idleConnTimeout     = time.Duration(180) * time.Second
 90 | 
 91 | 	//once : for once init
 92 | 	once sync.Once
 93 | )
 94 | 
 95 | // initHttpClient init http client
 96 | func initHttpClient() {
 97 | 	once.Do(func() {
 98 | 		transport := &http.Transport{
 99 | 			Proxy: http.ProxyFromEnvironment,
100 | 			DialContext: (&net.Dialer{
101 | 				Timeout:   time.Duration(connTimeout) * time.Second,
102 | 				KeepAlive: time.Duration(rwTimeout*10) * time.Second,
103 | 				DualStack: true,
104 | 			}).DialContext,
105 | 			MaxIdleConns:          maxIdleConns,
106 | 			IdleConnTimeout:       idleConnTimeout,
107 | 			TLSHandshakeTimeout:   time.Duration(connTimeout) * time.Second,
108 | 			ExpectContinueTimeout: 1 * time.Second,
109 | 		}
110 | 		httpClient = new(http.Client)
111 | 		httpClient.Transport = transport
112 | 		httpClient.Timeout = time.Duration(rwTimeout) * time.Second
113 | 	})
114 | }
115 | 
116 | // FlashRecognizer is the entry for ASR flash recognizer
117 | type FlashRecognizer struct {
118 | 	AppID string
119 | 
120 | 	//for proxy
121 | 	ProxyURL string
122 | 
123 | 	Credential *common.Credential
124 | }
125 | 
126 | // NewFlashRecognizer creates instance of FlashRecognizer
127 | func NewFlashRecognizer(appID string, credential *common.Credential) *FlashRecognizer {
128 | 	initHttpClient()
129 | 	return &FlashRecognizer{
130 | 		AppID:      appID,
131 | 		Credential: credential,
132 | 	}
133 | }
134 | 
135 | // Recognize  Recognize
136 | func (recognizer *FlashRecognizer) Recognize(req *FlashRecognitionRequest,
137 | 	videoData []byte) (*FlashRecognitionResponse, error) {
138 | 
139 | 	signStr, reqUrl := recognizer.buildURL(req)
140 | 	signature := recognizer.genSignature(signStr)
141 | 
142 | 	headers := make(map[string]string)
143 | 	headers["Host"] = flashHost
144 | 	headers["Authorization"] = signature
145 | 
146 | 	if len(recognizer.ProxyURL) > 0 {
147 | 		proxyURL, _ := url.Parse(recognizer.ProxyURL)
148 | 		httpClient.Transport.(*http.Transport).Proxy = http.ProxyURL(proxyURL)
149 | 	}
150 | 
151 | 	httpReq, err := http.NewRequest("POST", reqUrl, bytes.NewReader(videoData))
152 | 	if err != nil {
153 | 		return nil, fmt.Errorf("failed create http request, error: %s", err.Error())
154 | 	}
155 | 	for k, v := range headers {
156 | 		httpReq.Header.Set(k, v)
157 | 	}
158 | 	httpResp, err := httpClient.Do(httpReq)
159 | 	if err != nil {
160 | 		return nil, fmt.Errorf("failed do request, error: %s", err.Error())
161 | 	}
162 | 	defer httpResp.Body.Close()
163 | 	respData, err := ioutil.ReadAll(httpResp.Body)
164 | 	if err != nil {
165 | 		return nil, fmt.Errorf("failed read body, error: %s", err.Error())
166 | 	}
167 | 	if httpResp.StatusCode != 200 {
168 | 		return nil, fmt.Errorf("http code not 200, respData: %s", string(respData))
169 | 	}
170 | 	resp := &FlashRecognitionResponse{}
171 | 	err = json.Unmarshal(respData, &resp)
172 | 	if err != nil {
173 | 		return nil, fmt.Errorf("failed unmarshal, respData: %s, error: %s", respData, err.Error())
174 | 	}
175 | 	if resp.Code != 0 {
176 | 		return resp, fmt.Errorf("request_id: %s, code: %d, message: %s", resp.RequestId, resp.Code, resp.Message)
177 | 	}
178 | 	return resp, nil
179 | }
180 | 
181 | // buildURL buildURL
182 | func (recognizer *FlashRecognizer) buildURL(req *FlashRecognitionRequest) (string, string) {
183 | 	var queryMap = make(map[string]string)
184 | 	queryMap["secretid"] = recognizer.Credential.SecretId
185 | 	queryMap["engine_type"] = req.EngineType
186 | 	queryMap["voice_format"] = req.VoiceFormat
187 | 	queryMap["speaker_diarization"] = strconv.FormatInt(int64(req.SpeakerDiarization), 10)
188 | 	queryMap["hotword_id"] = req.HotwordId
189 | 	queryMap["hotword_list"] = req.HotwordList
190 | 	queryMap["customization_id"] = req.CustomizationId
191 | 	queryMap["filter_dirty"] = strconv.FormatInt(int64(req.FilterDirty), 10)
192 | 	queryMap["filter_modal"] = strconv.FormatInt(int64(req.FilterModal), 10)
193 | 	queryMap["filter_punc"] = strconv.FormatInt(int64(req.FilterPunc), 10)
194 | 	queryMap["convert_num_mode"] = strconv.FormatInt(int64(req.ConvertNumMode), 10)
195 | 	queryMap["word_info"] = strconv.FormatInt(int64(req.WordInfo), 10)
196 | 	queryMap["first_channel_only"] = strconv.FormatInt(int64(req.FirstChannelOnly), 10)
197 | 	queryMap["reinforce_hotword"] = strconv.FormatInt(int64(req.ReinforceHotword), 10)
198 | 	queryMap["sentence_max_length"] = strconv.FormatInt(int64(req.SentenceMaxLength), 10)
199 | 	var timestamp = time.Now().Unix()
200 | 	var timestampStr = strconv.FormatInt(timestamp, 10)
201 | 	queryMap["timestamp"] = timestampStr
202 | 
203 | 	var keys []string
204 | 	for k := range queryMap {
205 | 		keys = append(keys, k)
206 | 	}
207 | 	sort.Strings(keys)
208 | 
209 | 	var queryStrBuffer bytes.Buffer
210 | 	for _, k := range keys {
211 | 		queryStrBuffer.WriteString(k)
212 | 		queryStrBuffer.WriteString("=")
213 | 		queryStrBuffer.WriteString(queryMap[k])
214 | 		queryStrBuffer.WriteString("&")
215 | 	}
216 | 
217 | 	rs := []rune(queryStrBuffer.String())
218 | 	rsLen := len(rs)
219 | 	queryStr := string(rs[0 : rsLen-1])
220 | 
221 | 	url := fmt.Sprintf("%s/asr/flash/v1/%s?%s", flashHost, recognizer.AppID, queryStr)
222 | 	signStr := fmt.Sprintf("POST%s", url)
223 | 	reqUrl := fmt.Sprintf("https://%s", url)
224 | 	return signStr, reqUrl
225 | }
226 | 
227 | // genSignature genSignature
228 | func (recognizer *FlashRecognizer) genSignature(url string) string {
229 | 	hmac := hmac.New(sha1.New, []byte(recognizer.Credential.SecretKey))
230 | 	signURL := url
231 | 	hmac.Write([]byte(signURL))
232 | 	encryptedStr := hmac.Sum([]byte(nil))
233 | 	var signature = base64.StdEncoding.EncodeToString(encryptedStr)
234 | 	return signature
235 | }
236 | 


--------------------------------------------------------------------------------
/tts/speechsynthesizer.go:
--------------------------------------------------------------------------------
  1 | package tts
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"crypto/hmac"
  6 | 	"crypto/sha1"
  7 | 	"encoding/base64"
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 	"io/ioutil"
 11 | 	"net"
 12 | 	"net/http"
 13 | 	"net/url"
 14 | 	"sort"
 15 | 	"strconv"
 16 | 	"sync"
 17 | 	"time"
 18 | 
 19 | 	"github.com/google/uuid"
 20 | 
 21 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
 22 | )
 23 | 
 24 | // SpeechSynthesisResponse SpeechSynthesisResponse
 25 | type SpeechSynthesisResponse struct {
 26 | 	SessionID string
 27 | 	Data      []byte
 28 | }
 29 | 
 30 | // SpeechSynthesisListener is the listener of
 31 | type SpeechSynthesisListener interface {
 32 | 	OnMessage(*SpeechSynthesisResponse)
 33 | 	OnComplete(*SpeechSynthesisResponse)
 34 | 	OnCancel(*SpeechSynthesisResponse)
 35 | 	OnFail(*SpeechSynthesisResponse, error)
 36 | }
 37 | 
 38 | // SpeechSynthesizer is the entry for TTS service
 39 | type SpeechSynthesizer struct {
 40 | 	AppID      int64
 41 | 	Credential *common.Credential
 42 | 	VoiceType  int64
 43 | 	SampleRate int64
 44 | 	Codec      string
 45 | 
 46 | 	ProxyURL string
 47 | 
 48 | 	mutex sync.Mutex
 49 | 
 50 | 	eventChan chan speechSynthesisEvent
 51 | 	eventEnd  chan int
 52 | 	sessionID string
 53 | 	listener  SpeechSynthesisListener
 54 | 
 55 | 	// 0 - idle, 1 - running, 2 - cancalled
 56 | 	status      int
 57 | 	statusMutex sync.Mutex
 58 | }
 59 | 
 60 | // TTSRequest TTSRequest
 61 | type ttsRequest struct {
 62 | 	Action     string `json:"Action"`
 63 | 	AppID      int64  `json:"AppId"`
 64 | 	SecretID   string `json:"SecretId"`
 65 | 	Timestamp  int64  `json:"Timestamp"`
 66 | 	Expired    int64  `json:"Expired"`
 67 | 	Text       string `json:"Text"`
 68 | 	SessionID  string `json:"SessionId"`
 69 | 	ModelType  int64  `json:"ModelType"`
 70 | 	VoiceType  int64  `json:"VoiceType"`
 71 | 	SampleRate int64  `json:"SampleRate"`
 72 | 	Codec      string `json:"Codec"`
 73 | }
 74 | 
 75 | type ttsErrorJSONResponseError struct {
 76 | 	Code    string `json:"Code"`
 77 | 	Message string `json:"Message"`
 78 | }
 79 | 
 80 | type ttsErrorJSONResponse struct {
 81 | 	RequestID string                    `json:"RequestId"`
 82 | 	Error     ttsErrorJSONResponseError `json:"Error"`
 83 | }
 84 | 
 85 | type ttsErrorJSON struct {
 86 | 	Response ttsErrorJSONResponse `json:"Response"`
 87 | }
 88 | 
 89 | const (
 90 | 	defaultVoiceType  = 0
 91 | 	defaultSampleRate = 16000
 92 | 	defaultCodec      = "pcm"
 93 | 	defaultAction     = "TextToStreamAudio"
 94 | 
 95 | 	httpConnectTimeout    = 2000
 96 | 	httpReadHeaderTimeout = 2000
 97 | 
 98 | 	maxMessageSize = 10240
 99 | 
100 | 	protocol = "https"
101 | 	host     = "tts.cloud.tencent.com"
102 | 	path     = "/stream"
103 | )
104 | 
105 | const (
106 | 	eventTypeMessage  = 0
107 | 	eventTypeComplete = 1
108 | 	eventTypeCancel   = 2
109 | 	eventTypeFail     = 3
110 | )
111 | 
112 | type eventType int
113 | 
114 | type speechSynthesisEvent struct {
115 | 	t   eventType
116 | 	r   *SpeechSynthesisResponse
117 | 	err error
118 | }
119 | 
120 | // NewSpeechSynthesizer creates instance of SpeechSynthesizer
121 | func NewSpeechSynthesizer(appID int64, credential *common.Credential, listener SpeechSynthesisListener) *SpeechSynthesizer {
122 | 	return &SpeechSynthesizer{
123 | 		AppID:      appID,
124 | 		Credential: credential,
125 | 		VoiceType:  defaultVoiceType,
126 | 		SampleRate: defaultSampleRate,
127 | 		Codec:      defaultCodec,
128 | 
129 | 		listener: listener,
130 | 
131 | 		status: 0,
132 | 	}
133 | }
134 | 
135 | // Synthesis Synthesis
136 | func (synthesizer *SpeechSynthesizer) Synthesis(text string) error {
137 | 	synthesizer.mutex.Lock()
138 | 	defer synthesizer.mutex.Unlock()
139 | 	if synthesizer.getStatus() != 0 {
140 | 		return fmt.Errorf("synthesizer already started")
141 | 	}
142 | 
143 | 	synthesizer.eventChan = make(chan speechSynthesisEvent, 10)
144 | 	synthesizer.eventEnd = make(chan int)
145 | 	go synthesizer.sendRequest(text)
146 | 	go synthesizer.eventDispatch()
147 | 	synthesizer.setStatus(1)
148 | 	return nil
149 | }
150 | 
151 | // Cancel Cancel
152 | func (synthesizer *SpeechSynthesizer) Cancel() error {
153 | 	synthesizer.mutex.Lock()
154 | 	defer synthesizer.mutex.Unlock()
155 | 	synthesizer.setStatus(2)
156 | 	<-synthesizer.eventEnd
157 | 	return nil
158 | }
159 | 
160 | // Wait Wait
161 | func (synthesizer *SpeechSynthesizer) Wait() error {
162 | 	synthesizer.mutex.Lock()
163 | 	defer synthesizer.mutex.Unlock()
164 | 	<-synthesizer.eventEnd
165 | 	return nil
166 | }
167 | 
168 | func (synthesizer *SpeechSynthesizer) getStatus() int {
169 | 	synthesizer.statusMutex.Lock()
170 | 	defer synthesizer.statusMutex.Unlock()
171 | 	status := synthesizer.status
172 | 	return status
173 | }
174 | 
175 | func (synthesizer *SpeechSynthesizer) setStatus(status int) {
176 | 	synthesizer.statusMutex.Lock()
177 | 	defer synthesizer.statusMutex.Unlock()
178 | 	synthesizer.status = status
179 | }
180 | 
181 | func (synthesizer *SpeechSynthesizer) eventDispatch() {
182 | 	for e := range synthesizer.eventChan {
183 | 		switch e.t {
184 | 		case eventTypeMessage:
185 | 			synthesizer.listener.OnMessage(e.r)
186 | 		case eventTypeComplete:
187 | 			synthesizer.listener.OnComplete(e.r)
188 | 		case eventTypeCancel:
189 | 			synthesizer.listener.OnCancel(e.r)
190 | 		case eventTypeFail:
191 | 			synthesizer.listener.OnFail(e.r, e.err)
192 | 		}
193 | 	}
194 | 	synthesizer.setStatus(0)
195 | 	close(synthesizer.eventEnd)
196 | }
197 | 
198 | func (synthesizer *SpeechSynthesizer) sendRequest(text string) {
199 | 	defer func() {
200 | 		close(synthesizer.eventChan)
201 | 	}()
202 | 
203 | 	url := fmt.Sprintf("%s%s", host, path)
204 | 	var timestamp = time.Now().Unix()
205 | 	sessionID := uuid.New().String()
206 | 	req := ttsRequest{
207 | 		Action:     defaultAction,
208 | 		AppID:      synthesizer.AppID,
209 | 		SecretID:   synthesizer.Credential.SecretId,
210 | 		Timestamp:  timestamp,
211 | 		Expired:    timestamp + 24*60*60,
212 | 		Text:       text,
213 | 		SessionID:  sessionID,
214 | 		ModelType:  1,
215 | 		VoiceType:  synthesizer.VoiceType,
216 | 		SampleRate: synthesizer.SampleRate,
217 | 		Codec:      synthesizer.Codec,
218 | 	}
219 | 	signature := genSignature(url, &req, synthesizer.Credential.SecretKey)
220 | 	url = fmt.Sprintf("https://%s", url)
221 | 	postBody, err := json.Marshal(req)
222 | 	if err != nil {
223 | 		synthesizer.onError(err)
224 | 		return
225 | 	}
226 | 	httpReq, err := http.NewRequest("POST", url, bytes.NewReader(postBody))
227 | 
228 | 	if err != nil {
229 | 		synthesizer.onError(err)
230 | 		return
231 | 	}
232 | 	synthesizer.sessionID = sessionID
233 | 	httpReq.Header.Add("Content-Type", "application/json; charset=UTF-8")
234 | 	httpReq.Header.Add("Authorization", signature)
235 | 	httpClient := synthesizer.createHTTPClient()
236 | 	rsp, err := httpClient.Do(httpReq)
237 | 	if err != nil {
238 | 		synthesizer.onError(err)
239 | 		return
240 | 	}
241 | 	defer rsp.Body.Close()
242 | 	if rsp.StatusCode != 200 {
243 | 		synthesizer.onError(err)
244 | 		return
245 | 	}
246 | 	if len(rsp.Header["Content-Type"]) < 1 || rsp.Header["Content-Type"][0] != "application/octet-stream" {
247 | 		rspBody, _ := ioutil.ReadAll(rsp.Body)
248 | 		synthesizer.onError(fmt.Errorf(string(rspBody)))
249 | 		return
250 | 	}
251 | 	buffer := make([]byte, maxMessageSize, maxMessageSize)
252 | 	for {
253 | 		if synthesizer.getStatus() == 2 {
254 | 			synthesizer.onCancel()
255 | 			return
256 | 		}
257 | 		n, err := rsp.Body.Read(buffer)
258 | 		if err != nil {
259 | 			if err.Error() == "EOF" {
260 | 				break
261 | 			}
262 | 			synthesizer.onError(err)
263 | 			return
264 | 		}
265 | 		if n == 0 {
266 | 			continue
267 | 		}
268 | 		copyBuf := make([]byte, n, n)
269 | 		copy(copyBuf, buffer)
270 | 		synthesizer.onMessage(copyBuf)
271 | 	}
272 | 	synthesizer.onComplete()
273 | }
274 | 
275 | func (synthesizer *SpeechSynthesizer) onMessage(data []byte) {
276 | 	r := &SpeechSynthesisResponse{
277 | 		SessionID: synthesizer.sessionID,
278 | 		Data:      data,
279 | 	}
280 | 	event := speechSynthesisEvent{
281 | 		t:   eventTypeMessage,
282 | 		r:   r,
283 | 		err: nil,
284 | 	}
285 | 	synthesizer.eventChan <- event
286 | }
287 | 
288 | func (synthesizer *SpeechSynthesizer) onComplete() {
289 | 	r := &SpeechSynthesisResponse{
290 | 		SessionID: synthesizer.sessionID,
291 | 	}
292 | 	event := speechSynthesisEvent{
293 | 		t:   eventTypeComplete,
294 | 		r:   r,
295 | 		err: nil,
296 | 	}
297 | 	synthesizer.eventChan <- event
298 | }
299 | 
300 | func (synthesizer *SpeechSynthesizer) onCancel() {
301 | 	r := &SpeechSynthesisResponse{
302 | 		SessionID: synthesizer.sessionID,
303 | 	}
304 | 	synthesizer.eventChan <- speechSynthesisEvent{
305 | 		t:   eventTypeCancel,
306 | 		r:   r,
307 | 		err: nil,
308 | 	}
309 | }
310 | 
311 | func (synthesizer *SpeechSynthesizer) onError(err error) {
312 | 	r := &SpeechSynthesisResponse{
313 | 		SessionID: synthesizer.sessionID,
314 | 	}
315 | 	synthesizer.eventChan <- speechSynthesisEvent{
316 | 		t:   eventTypeFail,
317 | 		r:   r,
318 | 		err: err,
319 | 	}
320 | }
321 | 
322 | func (synthesizer *SpeechSynthesizer) createHTTPClient() *http.Client {
323 | 	httpTransport := &http.Transport{
324 | 		Dial: (&net.Dialer{
325 | 			Timeout: httpConnectTimeout * time.Millisecond,
326 | 		}).Dial,
327 | 		MaxIdleConns:          1,
328 | 		ResponseHeaderTimeout: httpReadHeaderTimeout * time.Millisecond,
329 | 	}
330 | 	if synthesizer.ProxyURL != "" {
331 | 		proxyURL, _ := url.Parse(synthesizer.ProxyURL)
332 | 		httpTransport.Proxy = http.ProxyURL(proxyURL)
333 | 	}
334 | 	return &http.Client{Transport: httpTransport}
335 | }
336 | 
337 | func genSignature(url string, request *ttsRequest, secretKey string) string {
338 | 	var queryMap = make(map[string]string)
339 | 	queryMap["Action"] = request.Action
340 | 	queryMap["AppId"] = strconv.FormatInt(int64(request.AppID), 10)
341 | 	queryMap["SecretId"] = request.SecretID
342 | 	queryMap["Timestamp"] = strconv.FormatInt(int64(request.Timestamp), 10)
343 | 	queryMap["Expired"] = strconv.FormatInt(request.Expired, 10)
344 | 	queryMap["Text"] = request.Text
345 | 	queryMap["SessionId"] = request.SessionID
346 | 	queryMap["ModelType"] = strconv.FormatInt(int64(request.ModelType), 10)
347 | 	queryMap["VoiceType"] = strconv.FormatInt(int64(request.VoiceType), 10)
348 | 	queryMap["SampleRate"] = strconv.FormatInt(int64(request.SampleRate), 10)
349 | 	queryMap["Codec"] = request.Codec
350 | 
351 | 	var keys []string
352 | 	for k := range queryMap {
353 | 		keys = append(keys, k)
354 | 	}
355 | 	sort.Strings(keys)
356 | 
357 | 	var queryStrBuffer bytes.Buffer
358 | 	for _, k := range keys {
359 | 		queryStrBuffer.WriteString(k)
360 | 		queryStrBuffer.WriteString("=")
361 | 		queryStrBuffer.WriteString(queryMap[k])
362 | 		queryStrBuffer.WriteString("&")
363 | 	}
364 | 
365 | 	rs := []rune(queryStrBuffer.String())
366 | 	rsLen := len(rs)
367 | 	queryStr := string(rs[0 : rsLen-1])
368 | 
369 | 	signURL := fmt.Sprintf("%s?%s", url, queryStr)
370 | 
371 | 	hmac := hmac.New(sha1.New, []byte(secretKey))
372 | 	signURL = "POST" + signURL
373 | 	hmac.Write([]byte(signURL))
374 | 	encryptedStr := hmac.Sum([]byte(nil))
375 | 	return base64.StdEncoding.EncodeToString(encryptedStr)
376 | }
377 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright (c) 2017-2018 Tencent Ltd.
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/asr/virtual_number_recogizer.go:
--------------------------------------------------------------------------------
  1 | package asr
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"crypto/hmac"
  6 | 	"crypto/sha1"
  7 | 	"encoding/base64"
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
 11 | 	"net/http"
 12 | 	"net/url"
 13 | 	"runtime/debug"
 14 | 	"sort"
 15 | 	"strconv"
 16 | 	"sync"
 17 | 	"time"
 18 | 
 19 | 	"github.com/google/uuid"
 20 | 	"github.com/gorilla/websocket"
 21 | )
 22 | 
 23 | // VNRecognitionListener User must impletement it. Get recognition result
 24 | type VNRecognitionListener interface {
 25 | 	OnVNRecognitionStart(*VNRecognitionResponse)
 26 | 	OnVNRecognitionComplete(*VNRecognitionResponse)
 27 | 	OnVNFail(*VNRecognitionResponse, error)
 28 | }
 29 | 
 30 | // VNRecognitionResponse is the reponse of asr service
 31 | type VNRecognitionResponse struct {
 32 | 	Code      int    `json:"code"`
 33 | 	Message   string `json:"message"`
 34 | 	VoiceID   string `json:"voice_id,omitempty"`
 35 | 	MessageID string `json:"message_id,omitempty"`
 36 | 	Final     uint32 `json:"final,omitempty"`
 37 | 	Result    uint32 `json:"result"`
 38 | }
 39 | 
 40 | // VNRecognizer is the entry for ASR service
 41 | type VNRecognizer struct {
 42 | 	//request params
 43 | 	AppID       string
 44 | 	VoiceFormat int
 45 | 	WaitTime    uint32 //等待时长 填0 后台默认30秒 最大60秒 单位毫秒
 46 | 
 47 | 	Credential *common.Credential
 48 | 	//listener
 49 | 	listener VNRecognitionListener
 50 | 	//uuid for voice
 51 | 	VoiceID string
 52 | 
 53 | 	//for proxy
 54 | 	ProxyURL string
 55 | 
 56 | 	//for websocet connection
 57 | 	conn *websocket.Conn
 58 | 
 59 | 	//send data channel
 60 | 	dataChan chan []byte
 61 | 	//for listener get response message
 62 | 	eventChan chan VNRecognitionEvent
 63 | 
 64 | 	//used in stop function, waiting for stop all goroutines
 65 | 	sendEnd    chan int
 66 | 	receiveEnd chan int
 67 | 	eventEnd   chan int
 68 | 
 69 | 	mutex   sync.Mutex
 70 | 	started bool
 71 | 	hasEnd  bool
 72 | }
 73 | 
 74 | const (
 75 | 	gDefaultVoiceFormat = 1
 76 | 
 77 | 	gProtocol = "wss"
 78 | 	gHost     = "asr.cloud.tencent.com"
 79 | 	gPath     = ""
 80 | )
 81 | 
 82 | const (
 83 | 	eventTypeVNRecognitionStart    = 1
 84 | 	eventTypeVNRecognitionComplete = 2
 85 | 	eventTypeVNFail                = 3
 86 | )
 87 | 
 88 | type eventTypeVN int
 89 | 
 90 | type VNRecognitionEvent struct {
 91 | 	t   eventTypeVN
 92 | 	r   *VNRecognitionResponse
 93 | 	err error
 94 | }
 95 | 
 96 | // NewVNRecognizer creates instance of VNRecognizer
 97 | func NewVNRecognizer(appID string, credential *common.Credential,
 98 | 	listener VNRecognitionListener) *VNRecognizer {
 99 | 
100 | 	reco := &VNRecognizer{
101 | 		AppID:       appID,
102 | 		Credential:  credential,
103 | 		VoiceFormat: gDefaultVoiceFormat,
104 | 
105 | 		dataChan:  make(chan []byte, 6400),
106 | 		eventChan: make(chan VNRecognitionEvent, 10),
107 | 
108 | 		sendEnd:    make(chan int),
109 | 		receiveEnd: make(chan int),
110 | 		eventEnd:   make(chan int),
111 | 
112 | 		listener: listener,
113 | 		started:  false,
114 | 	}
115 | 	return reco
116 | }
117 | 
118 | // Start connects to server and start a recognition session
119 | func (recognizer *VNRecognizer) Start() error {
120 | 	recognizer.mutex.Lock()
121 | 	defer recognizer.mutex.Unlock()
122 | 
123 | 	if recognizer.started {
124 | 		return fmt.Errorf("recognizer is already started")
125 | 	}
126 | 	if recognizer.VoiceID == "" {
127 | 		voiceID := uuid.New().String()
128 | 		recognizer.VoiceID = voiceID
129 | 	}
130 | 	serverURL := recognizer.buildSignatureURL(recognizer.VoiceID)
131 | 	signature := recognizer.genSignature(recognizer.VoiceID)
132 | 	dialer := websocket.Dialer{}
133 | 	if len(recognizer.ProxyURL) > 0 {
134 | 		proxyURL, _ := url.Parse(recognizer.ProxyURL)
135 | 		dialer.Proxy = http.ProxyURL(proxyURL)
136 | 	}
137 | 
138 | 	header := http.Header(make(map[string][]string))
139 | 	urlStr := fmt.Sprintf("%s://%s&signature=%s", gProtocol, serverURL, url.QueryEscape(signature))
140 | 	conn, _, err := dialer.Dial(urlStr, header)
141 | 	if err != nil {
142 | 		return fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error())
143 | 	}
144 | 	_, data, err := conn.ReadMessage()
145 | 	if err != nil {
146 | 		conn.Close()
147 | 		return fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error())
148 | 	}
149 | 	msg := VNRecognitionResponse{}
150 | 	err = json.Unmarshal(data, &msg)
151 | 	if err != nil {
152 | 		conn.Close()
153 | 		return fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error())
154 | 	}
155 | 	if msg.Code != 0 {
156 | 		conn.Close()
157 | 		return fmt.Errorf("voice_id: %s, code: %d, message: %s",
158 | 			recognizer.VoiceID, msg.Code, msg.Message)
159 | 	}
160 | 
161 | 	recognizer.conn = conn
162 | 	go recognizer.send()
163 | 	go recognizer.receive()
164 | 	go recognizer.eventDispatch()
165 | 	recognizer.started = true
166 | 
167 | 	recognizer.eventChan <- VNRecognitionEvent{
168 | 		t: eventTypeVNRecognitionStart,
169 | 		r: newVNRecognitionResponse(0, "sucess", recognizer.VoiceID,
170 | 			fmt.Sprintf("%s-RecognitionStart", recognizer.VoiceID), 0),
171 | 		err: nil,
172 | 	}
173 | 	return nil
174 | }
175 | 
176 | // Write : write data in channel
177 | func (recognizer *VNRecognizer) Write(data []byte) (error, bool) {
178 | 	recognizer.mutex.Lock()
179 | 	defer recognizer.mutex.Unlock()
180 | 	if !recognizer.started {
181 | 		return fmt.Errorf("recognizer not running"), false
182 | 	}
183 | 
184 | 	if recognizer.hasEnd {
185 | 		return nil, true
186 | 	}
187 | 	recognizer.dataChan <- data
188 | 	return nil, false
189 | }
190 | 
191 | // Stop wait for the recognition process to complete
192 | func (recognizer *VNRecognizer) Stop() error {
193 | 	err := recognizer.stopInternal()
194 | 	if err != nil {
195 | 		return err
196 | 	}
197 | 	return nil
198 | }
199 | 
200 | func (recognizer *VNRecognizer) stopInternal() error {
201 | 	recognizer.mutex.Lock()
202 | 	defer recognizer.mutex.Unlock()
203 | 	if !recognizer.started {
204 | 		return fmt.Errorf("recognizer is not running")
205 | 	}
206 | 	close(recognizer.dataChan)
207 | 	<-recognizer.receiveEnd
208 | 	<-recognizer.sendEnd
209 | 	<-recognizer.eventEnd
210 | 	recognizer.started = false
211 | 	err := recognizer.conn.Close()
212 | 	if err != nil {
213 | 		return err
214 | 	}
215 | 	return nil
216 | }
217 | 
218 | func (recognizer *VNRecognizer) onError(code int, message string, err error) {
219 | 	//recognizer.mutex.Lock()
220 | 	if !recognizer.started {
221 | 		return
222 | 	}
223 | 
224 | 	recognizer.listener.OnVNFail(newVNRecognitionResponse(code, message, recognizer.VoiceID,
225 | 		fmt.Sprintf("%s-Error", recognizer.VoiceID), 0), err)
226 | 	/*
227 | 			recognizer.eventChan <- VNRecognitionEvent{
228 | 				t: eventTypeVNFail,
229 | 				r: newVNRecognitionResponse(code, message, recognizer.VoiceID,
230 | 					fmt.Sprintf("%s-Error", recognizer.VoiceID), 0),
231 | 				err: err,
232 | 			}
233 | 		    recognizer.mutex.Unlock()
234 | 	*/
235 | 	go recognizer.stopInternal()
236 | }
237 | 
238 | func (recognizer *VNRecognizer) send() {
239 | 	defer func() {
240 | 		// handle panic
241 | 		recognizer.genRecoverFunc()()
242 | 		close(recognizer.sendEnd)
243 | 	}()
244 | 	//send data
245 | 	for data := range recognizer.dataChan {
246 | 		if err := recognizer.conn.WriteMessage(websocket.BinaryMessage, data); err != nil {
247 | 			recognizer.onError(-1, "send error", fmt.Errorf("voice_id: %s, error: %s",
248 | 				recognizer.VoiceID, err.Error()))
249 | 			return
250 | 		}
251 | 	}
252 | 	//send stop msg
253 | 	if err := recognizer.conn.WriteMessage(websocket.TextMessage, []byte("{\"type\":\"end\"}")); err != nil {
254 | 		recognizer.onError(-1, "send error", fmt.Errorf("voice_id: %s, error: %s",
255 | 			recognizer.VoiceID, err.Error()))
256 | 	}
257 | }
258 | 
259 | func (recognizer *VNRecognizer) eventDispatch() {
260 | 	defer func() {
261 | 		// handle panic
262 | 		recognizer.genRecoverFunc()()
263 | 		close(recognizer.eventEnd)
264 | 	}()
265 | 	for e := range recognizer.eventChan {
266 | 		switch e.t {
267 | 		case eventTypeVNRecognitionStart:
268 | 			recognizer.listener.OnVNRecognitionStart(e.r)
269 | 		case eventTypeVNRecognitionComplete:
270 | 			recognizer.listener.OnVNRecognitionComplete(e.r)
271 | 		case eventTypeVNFail:
272 | 			recognizer.listener.OnVNFail(e.r, e.err)
273 | 		}
274 | 	}
275 | }
276 | 
277 | func (recognizer *VNRecognizer) receive() {
278 | 	defer func() {
279 | 		// handle panic
280 | 		recognizer.genRecoverFunc()()
281 | 		close(recognizer.eventChan)
282 | 		close(recognizer.receiveEnd)
283 | 	}()
284 | 	for {
285 | 		_, data, err := recognizer.conn.ReadMessage()
286 | 		if err != nil {
287 | 			recognizer.onError(-1, "receive error", fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error()))
288 | 			break
289 | 		}
290 | 
291 | 		//fmt.Printf("%s", data)
292 | 		msg := VNRecognitionResponse{}
293 | 		err = json.Unmarshal(data, &msg)
294 | 		if err != nil {
295 | 			recognizer.onError(-1, "receive error",
296 | 				fmt.Errorf("voice_id: %s, error: %s",
297 | 					recognizer.VoiceID, err.Error()))
298 | 			break
299 | 		}
300 | 		if msg.Code != 0 {
301 | 			recognizer.onError(msg.Code, msg.Message,
302 | 				fmt.Errorf("VoiceID: %s, error code %d, message: %s",
303 | 					recognizer.VoiceID, msg.Code, msg.Message))
304 | 			break
305 | 		}
306 | 		fmt.Println("receive data:", msg)
307 | 		if msg.Final == 1 {
308 | 			recognizer.hasEnd = true
309 | 			recognizer.eventChan <- VNRecognitionEvent{
310 | 				t:   eventTypeVNRecognitionComplete,
311 | 				r:   &msg,
312 | 				err: nil,
313 | 			}
314 | 			break
315 | 		}
316 | 	}
317 | }
318 | 
319 | func (recognizer *VNRecognizer) buildURL(voiceID string) string {
320 | 	var queryMap = make(map[string]string)
321 | 	queryMap["secretid"] = recognizer.Credential.SecretId
322 | 	var timestamp = time.Now().Unix()
323 | 	var timestampStr = strconv.FormatInt(timestamp, 10)
324 | 	queryMap["timestamp"] = timestampStr
325 | 	queryMap["expired"] = strconv.FormatInt(timestamp+24*60*60, 10)
326 | 	queryMap["nonce"] = timestampStr
327 | 	queryMap["appid"] = recognizer.AppID
328 | 	//params
329 | 	queryMap["voice_id"] = voiceID
330 | 	queryMap["voice_format"] = strconv.FormatInt(int64(recognizer.VoiceFormat), 10)
331 | 	queryMap["wait_time"] = strconv.FormatUint(uint64(recognizer.WaitTime), 10)
332 | 	var keys []string
333 | 	for k := range queryMap {
334 | 		keys = append(keys, k)
335 | 	}
336 | 	sort.Strings(keys)
337 | 
338 | 	var queryStrBuffer bytes.Buffer
339 | 	for _, k := range keys {
340 | 		queryStrBuffer.WriteString(k)
341 | 		queryStrBuffer.WriteString("=")
342 | 		queryStrBuffer.WriteString(queryMap[k])
343 | 		queryStrBuffer.WriteString("&")
344 | 	}
345 | 
346 | 	rs := []rune(queryStrBuffer.String())
347 | 	rsLen := len(rs)
348 | 	queryStr := string(rs[0 : rsLen-1])
349 | 
350 | 	//gen url
351 | 	url := fmt.Sprintf("%s/VirtualNumberTransfer?%s", gHost, queryStr)
352 | 	//url := fmt.Sprintf("%s/VirtualNumberTransfer/%s?%s", gHost, recognizer.AppID, queryStr)
353 | 	return url
354 | }
355 | 
356 | func (recognizer *VNRecognizer) buildSignatureURL(voiceID string) string {
357 | 	var queryMap = make(map[string]string)
358 | 	queryMap["secretid"] = recognizer.Credential.SecretId
359 | 	var timestamp = time.Now().Unix()
360 | 	var timestampStr = strconv.FormatInt(timestamp, 10)
361 | 	queryMap["timestamp"] = timestampStr
362 | 	queryMap["expired"] = strconv.FormatInt(timestamp+24*60*60, 10)
363 | 	queryMap["nonce"] = timestampStr
364 | 	//params
365 | 	queryMap["voice_id"] = voiceID
366 | 	queryMap["voice_format"] = strconv.FormatInt(int64(recognizer.VoiceFormat), 10)
367 | 	var keys []string
368 | 	for k := range queryMap {
369 | 		keys = append(keys, k)
370 | 	}
371 | 	sort.Strings(keys)
372 | 
373 | 	var queryStrBuffer bytes.Buffer
374 | 	for _, k := range keys {
375 | 		queryStrBuffer.WriteString(k)
376 | 		queryStrBuffer.WriteString("=")
377 | 		queryStrBuffer.WriteString(queryMap[k])
378 | 		queryStrBuffer.WriteString("&")
379 | 	}
380 | 
381 | 	rs := []rune(queryStrBuffer.String())
382 | 	rsLen := len(rs)
383 | 	queryStr := string(rs[0 : rsLen-1])
384 | 
385 | 	url := fmt.Sprintf("%s/asr/virtual_number/v1/%s?%s", gHost, recognizer.AppID, queryStr)
386 | 	return url
387 | }
388 | 
389 | func (recognizer *VNRecognizer) genSignature(voiceID string) string {
390 | 	url := recognizer.buildSignatureURL(voiceID)
391 | 	hmac := hmac.New(sha1.New, []byte(recognizer.Credential.SecretKey))
392 | 	signURL := url
393 | 	hmac.Write([]byte(signURL))
394 | 	encryptedStr := hmac.Sum([]byte(nil))
395 | 	var signature = base64.StdEncoding.EncodeToString(encryptedStr)
396 | 
397 | 	return signature
398 | }
399 | 
400 | func newVNRecognitionResponse(code int, message string, voiceID string,
401 | 	messageID string, final uint32) *VNRecognitionResponse {
402 | 	return &VNRecognitionResponse{
403 | 		Code:      code,
404 | 		Message:   message,
405 | 		VoiceID:   voiceID,
406 | 		MessageID: messageID,
407 | 		Final:     final,
408 | 	}
409 | }
410 | 
411 | func (recognizer *VNRecognizer) genRecoverFunc() func() {
412 | 	return func() {
413 | 		if r := recover(); r != nil {
414 | 			var err error
415 | 			switch r := r.(type) {
416 | 			case error:
417 | 				err = r
418 | 			default:
419 | 				err = fmt.Errorf("%v", r)
420 | 			}
421 | 			retErr := fmt.Errorf("panic error ocurred! [err: %s] [stack: %s]",
422 | 				err.Error(), string(debug.Stack()))
423 | 			recognizer.eventChan <- VNRecognitionEvent{
424 | 				t: eventTypeVNFail,
425 | 				r: newVNRecognitionResponse(-1, "panic error", recognizer.VoiceID,
426 | 					fmt.Sprintf("%s-Error", recognizer.VoiceID), 0),
427 | 				err: retErr,
428 | 			}
429 | 		}
430 | 	}
431 | }
432 | 


--------------------------------------------------------------------------------
/tts/speechwssynthesizer.go:
--------------------------------------------------------------------------------
  1 | package tts
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"crypto/hmac"
  6 | 	"crypto/sha1"
  7 | 	"encoding/base64"
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 	"github.com/gorilla/websocket"
 11 | 	"net/http"
 12 | 	"net/url"
 13 | 	"runtime/debug"
 14 | 	"sort"
 15 | 	"strconv"
 16 | 	"sync"
 17 | 	"time"
 18 | 
 19 | 	"github.com/google/uuid"
 20 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
 21 | )
 22 | 
 23 | // SpeechWsSynthesisResponse response
 24 | type SpeechWsSynthesisResponse struct {
 25 | 	SessionId string             `json:"session_id"` //音频流唯一 id，由客户端在握手阶段生成并赋值在调用参数中
 26 | 	RequestId string             `json:"request_id"` //音频流唯一 id，由服务端在握手阶段自动生成
 27 | 	MessageId string             `json:"message_id"` //本 message 唯一 id
 28 | 	Code      int                `json:"code"`       //状态码，0代表正常，非0值表示发生错误
 29 | 	Message   string             `json:"message"`    //错误说明，发生错误时显示这个错误发生的具体原因，随着业务发展或体验优化，此文本可能会经常保持变更或更新
 30 | 	Result    SynthesisSubtitles `json:"result"`     //最新语音合成文本结果
 31 | 	Final     int                `json:"final"`      //该字段返回1时表示文本全部合成结束，客户端收到后需主动关闭 websocket 连接
 32 | }
 33 | 
 34 | func (s *SpeechWsSynthesisResponse) ToString() string {
 35 | 	d, _ := json.Marshal(s)
 36 | 	return string(d)
 37 | }
 38 | 
 39 | // SynthesisSubtitles subtitles
 40 | type SynthesisSubtitles struct {
 41 | 	Subtitles []SynthesisSubtitle `json:"subtitles"`
 42 | }
 43 | 
 44 | // SynthesisSubtitle  Subtitle
 45 | type SynthesisSubtitle struct {
 46 | 	Text       string
 47 | 	Phoneme    string
 48 | 	BeginTime  int64
 49 | 	EndTime    int64
 50 | 	BeginIndex int
 51 | 	EndIndex   int
 52 | }
 53 | 
 54 | // SpeechWsSynthesizer is the entry for TTS websocket service
 55 | type SpeechWsSynthesizer struct {
 56 | 	Credential       *common.Credential
 57 | 	action           string  `json:"Action"`
 58 | 	AppID            int64   `json:"AppId"`
 59 | 	Timestamp        int64   `json:"Timestamp"`
 60 | 	Expired          int64   `json:"Expired"`
 61 | 	SessionId        string  `json:"SessionId"`
 62 | 	Text             string  `json:"Text"`
 63 | 	ModelType        int64   `json:"ModelType"`
 64 | 	VoiceType        int64   `json:"VoiceType"`
 65 | 	SampleRate       int64   `json:"SampleRate"`
 66 | 	Codec            string  `json:"Codec"`
 67 | 	Speed            float64 `json:"Speed"`
 68 | 	Volume           float64 `json:"Volume"`
 69 | 	EnableSubtitle   bool    `json:"EnableSubtitle"`
 70 | 	EmotionCategory  string  `json:"EmotionCategory"`
 71 | 	EmotionIntensity int64   `json:"EmotionIntensity"`
 72 | 	SegmentRate      int64   `json:"SegmentRate"`
 73 | 	FastVoiceType    string  `json:"FastVoiceType"`
 74 | 	ExtParam         map[string]string
 75 | 
 76 | 	ProxyURL    string
 77 | 	mutex       sync.Mutex
 78 | 	receiveEnd  chan int
 79 | 	eventChan   chan speechWsSynthesisEvent
 80 | 	eventEnd    chan int
 81 | 	listener    SpeechWsSynthesisListener
 82 | 	status      int
 83 | 	statusMutex sync.Mutex
 84 | 	conn        *websocket.Conn //for websocet connection
 85 | 	started     bool
 86 | 
 87 | 	Debug     bool //是否debug
 88 | 	DebugFunc func(message string)
 89 | }
 90 | 
 91 | // SpeechWsSynthesisListener is the listener of
 92 | type SpeechWsSynthesisListener interface {
 93 | 	OnSynthesisStart(*SpeechWsSynthesisResponse)
 94 | 	OnSynthesisEnd(*SpeechWsSynthesisResponse)
 95 | 	OnAudioResult(data []byte)
 96 | 	OnTextResult(*SpeechWsSynthesisResponse)
 97 | 	OnSynthesisFail(*SpeechWsSynthesisResponse, error)
 98 | }
 99 | 
100 | const (
101 | 	defaultWsVoiceType  = 0
102 | 	defaultWsSampleRate = 16000
103 | 	defaultWsCodec      = "pcm"
104 | 	defaultWsAction     = "TextToStreamAudioWS"
105 | 	wsConnectTimeout    = 2000
106 | 	wsReadHeaderTimeout = 2000
107 | 	maxWsMessageSize    = 10240
108 | 	wsProtocol          = "wss"
109 | 	wsHost              = "tts.cloud.tencent.com"
110 | 	wsPath              = "/stream_ws"
111 | )
112 | 
113 | const (
114 | 	eventTypeWsStart = iota
115 | 	eventTypeWsEnd
116 | 	eventTypeWsAudioResult
117 | 	eventTypeWsTextResult
118 | 	eventTypeWsFail
119 | )
120 | 
121 | type eventWsType int
122 | 
123 | type speechWsSynthesisEvent struct {
124 | 	t   eventWsType
125 | 	r   *SpeechWsSynthesisResponse
126 | 	d   []byte
127 | 	err error
128 | }
129 | 
130 | // NewSpeechWsSynthesizer creates instance of SpeechWsSynthesizer
131 | func NewSpeechWsSynthesizer(appID int64, credential *common.Credential, listener SpeechWsSynthesisListener) *SpeechWsSynthesizer {
132 | 	return &SpeechWsSynthesizer{
133 | 		AppID:      appID,
134 | 		Credential: credential,
135 | 		action:     defaultWsAction,
136 | 		VoiceType:  defaultWsVoiceType,
137 | 		SampleRate: defaultWsSampleRate,
138 | 		Codec:      defaultWsCodec,
139 | 		listener:   listener,
140 | 		status:     0,
141 | 		receiveEnd: make(chan int),
142 | 		eventChan:  make(chan speechWsSynthesisEvent, 10),
143 | 		eventEnd:   make(chan int),
144 | 	}
145 | }
146 | 
147 | // Synthesis Start connects to server and start a synthesizer session
148 | func (synthesizer *SpeechWsSynthesizer) Synthesis() error {
149 | 	synthesizer.mutex.Lock()
150 | 	defer synthesizer.mutex.Unlock()
151 | 
152 | 	if synthesizer.started {
153 | 		return fmt.Errorf("synthesizer is already started")
154 | 	}
155 | 	if synthesizer.SessionId == "" {
156 | 		SessionId := uuid.New().String()
157 | 		synthesizer.SessionId = SessionId
158 | 	}
159 | 	var timestamp = time.Now().Unix()
160 | 	synthesizer.Timestamp = timestamp
161 | 	synthesizer.Expired = timestamp + 24*60*60
162 | 	serverURL := synthesizer.buildURL(false)
163 | 	signature := synthesizer.genWsSignature(serverURL, synthesizer.Credential.SecretKey)
164 | 	if synthesizer.Debug && synthesizer.DebugFunc != nil {
165 | 		logMsg := fmt.Sprintf("serverURL:%s , signature:%s", serverURL, signature)
166 | 		synthesizer.DebugFunc(logMsg)
167 | 	}
168 | 	dialer := websocket.Dialer{}
169 | 	if len(synthesizer.ProxyURL) > 0 {
170 | 		proxyURL, _ := url.Parse(synthesizer.ProxyURL)
171 | 		dialer.Proxy = http.ProxyURL(proxyURL)
172 | 	}
173 | 	serverURL = synthesizer.buildURL(true)
174 | 	header := http.Header(make(map[string][]string))
175 | 	urlStr := fmt.Sprintf("%s://%s&Signature=%s", wsProtocol, serverURL, url.QueryEscape(signature))
176 | 	if synthesizer.Debug && synthesizer.DebugFunc != nil {
177 | 		logMsg := fmt.Sprintf("urlStr:%s ", urlStr)
178 | 		synthesizer.DebugFunc(logMsg)
179 | 	}
180 | 	conn, _, err := dialer.Dial(urlStr, header)
181 | 	if err != nil {
182 | 		return fmt.Errorf("session_id: %s, error: %s", synthesizer.SessionId, err.Error())
183 | 	}
184 | 	_, data, err := conn.ReadMessage()
185 | 	if err != nil {
186 | 		conn.Close()
187 | 		return fmt.Errorf("session_id: %s, error: %s", synthesizer.SessionId, err.Error())
188 | 	}
189 | 	msg := SpeechWsSynthesisResponse{}
190 | 	err = json.Unmarshal(data, &msg)
191 | 	if err != nil {
192 | 		conn.Close()
193 | 		return fmt.Errorf("session_id: %s, error: %s", synthesizer.SessionId, err.Error())
194 | 	}
195 | 	if msg.Code != 0 {
196 | 		conn.Close()
197 | 		return fmt.Errorf("session_id: %s, code: %d, message: %s",
198 | 			synthesizer.SessionId, msg.Code, msg.Message)
199 | 	}
200 | 	msg.SessionId = synthesizer.SessionId
201 | 	synthesizer.conn = conn
202 | 	go synthesizer.receive()
203 | 	go synthesizer.eventDispatch()
204 | 	synthesizer.started = true
205 | 	synthesizer.setStatus(eventTypeWsStart)
206 | 	synthesizer.eventChan <- speechWsSynthesisEvent{
207 | 		t:   eventTypeWsStart,
208 | 		r:   &msg,
209 | 		err: nil,
210 | 	}
211 | 	return nil
212 | }
213 | 
214 | func (synthesizer *SpeechWsSynthesizer) receive() {
215 | 	defer func() {
216 | 		// handle panic
217 | 		synthesizer.genRecoverFunc()()
218 | 		close(synthesizer.eventChan)
219 | 		close(synthesizer.receiveEnd)
220 | 	}()
221 | 	for {
222 | 		optCode, data, err := synthesizer.conn.ReadMessage()
223 | 		if err != nil {
224 | 			synthesizer.onError(fmt.Errorf("SessionId: %s, error: %s", synthesizer.SessionId, err.Error()))
225 | 			break
226 | 		}
227 | 		if optCode == websocket.BinaryMessage {
228 | 			if synthesizer.Debug && synthesizer.DebugFunc != nil {
229 | 				synthesizer.DebugFunc(fmt.Sprintf("[%s] receive binary message size: %d", synthesizer.SessionId, len(data)))
230 | 			}
231 | 			msg := SpeechWsSynthesisResponse{SessionId: synthesizer.SessionId}
232 | 			synthesizer.eventChan <- speechWsSynthesisEvent{
233 | 				t:   eventTypeWsAudioResult,
234 | 				r:   &msg,
235 | 				d:   data,
236 | 				err: nil,
237 | 			}
238 | 		}
239 | 		if optCode == websocket.TextMessage {
240 | 			if synthesizer.Debug && synthesizer.DebugFunc != nil {
241 | 				synthesizer.DebugFunc(fmt.Sprintf("[%s]  receive text message: %s", synthesizer.SessionId, string(data)))
242 | 			}
243 | 			msg := SpeechWsSynthesisResponse{}
244 | 			err = json.Unmarshal(data, &msg)
245 | 			if err != nil {
246 | 				synthesizer.onError(fmt.Errorf("SessionId: %s, error: %s",
247 | 					synthesizer.SessionId, err.Error()))
248 | 				break
249 | 			}
250 | 			msg.SessionId = synthesizer.SessionId
251 | 			if msg.Code != 0 {
252 | 				synthesizer.onErrorResp(msg, fmt.Errorf("VoiceID: %s, error code %d, message: %s",
253 | 					synthesizer.SessionId, msg.Code, msg.Message))
254 | 				break
255 | 			}
256 | 			if msg.Final == 1 {
257 | 				synthesizer.setStatus(eventTypeWsEnd)
258 | 				synthesizer.closeConn()
259 | 				synthesizer.eventChan <- speechWsSynthesisEvent{
260 | 					t:   eventTypeWsEnd,
261 | 					r:   &msg,
262 | 					err: nil,
263 | 				}
264 | 				break
265 | 			}
266 | 			synthesizer.eventChan <- speechWsSynthesisEvent{
267 | 				t:   eventTypeWsTextResult,
268 | 				r:   &msg,
269 | 				err: nil,
270 | 			}
271 | 		}
272 | 	}
273 | }
274 | 
275 | func (synthesizer *SpeechWsSynthesizer) eventDispatch() {
276 | 	defer func() {
277 | 		// handle panic
278 | 		synthesizer.genRecoverFunc()()
279 | 		close(synthesizer.eventEnd)
280 | 	}()
281 | 	for e := range synthesizer.eventChan {
282 | 		switch e.t {
283 | 		case eventTypeWsStart:
284 | 			synthesizer.listener.OnSynthesisStart(e.r)
285 | 		case eventTypeWsEnd:
286 | 			synthesizer.listener.OnSynthesisEnd(e.r)
287 | 		case eventTypeWsAudioResult:
288 | 			synthesizer.listener.OnAudioResult(e.d)
289 | 		case eventTypeWsTextResult:
290 | 			synthesizer.listener.OnTextResult(e.r)
291 | 		case eventTypeWsFail:
292 | 			synthesizer.listener.OnSynthesisFail(e.r, e.err)
293 | 		}
294 | 	}
295 | }
296 | 
297 | // Wait Wait
298 | func (synthesizer *SpeechWsSynthesizer) Wait() error {
299 | 	synthesizer.mutex.Lock()
300 | 	defer synthesizer.mutex.Unlock()
301 | 	<-synthesizer.eventEnd
302 | 	<-synthesizer.receiveEnd
303 | 	return nil
304 | }
305 | 
306 | func (synthesizer *SpeechWsSynthesizer) getStatus() int {
307 | 	synthesizer.statusMutex.Lock()
308 | 	defer synthesizer.statusMutex.Unlock()
309 | 	status := synthesizer.status
310 | 	return status
311 | }
312 | 
313 | func (synthesizer *SpeechWsSynthesizer) setStatus(status int) {
314 | 	synthesizer.statusMutex.Lock()
315 | 	defer synthesizer.statusMutex.Unlock()
316 | 	synthesizer.status = status
317 | }
318 | 
319 | func (synthesizer *SpeechWsSynthesizer) onError(err error) {
320 | 	r := &SpeechWsSynthesisResponse{
321 | 		SessionId: synthesizer.SessionId,
322 | 	}
323 | 	synthesizer.closeConn()
324 | 	synthesizer.eventChan <- speechWsSynthesisEvent{
325 | 		t:   eventTypeWsFail,
326 | 		r:   r,
327 | 		err: err,
328 | 	}
329 | }
330 | 
331 | func (synthesizer *SpeechWsSynthesizer) onErrorResp(resp SpeechWsSynthesisResponse, err error) {
332 | 	synthesizer.closeConn()
333 | 	synthesizer.eventChan <- speechWsSynthesisEvent{
334 | 		t:   eventTypeWsFail,
335 | 		r:   &resp,
336 | 		err: err,
337 | 	}
338 | }
339 | 
340 | func (synthesizer *SpeechWsSynthesizer) buildURL(escape bool) string {
341 | 	var queryMap = make(map[string]string)
342 | 	queryMap["Action"] = synthesizer.action
343 | 	queryMap["AppId"] = strconv.FormatInt(synthesizer.AppID, 10)
344 | 	queryMap["SecretId"] = synthesizer.Credential.SecretId
345 | 	queryMap["Timestamp"] = strconv.FormatInt(synthesizer.Timestamp, 10)
346 | 	queryMap["Expired"] = strconv.FormatInt(synthesizer.Expired, 10)
347 | 	if escape {
348 | 		//url escapes the string so it can be safely placed
349 | 		queryMap["Text"] = url.QueryEscape(synthesizer.Text)
350 | 	} else {
351 | 		queryMap["Text"] = synthesizer.Text
352 | 	}
353 | 	queryMap["FastVoiceType"] = synthesizer.FastVoiceType
354 | 	queryMap["SessionId"] = synthesizer.SessionId
355 | 	queryMap["ModelType"] = strconv.FormatInt(synthesizer.ModelType, 10)
356 | 	queryMap["VoiceType"] = strconv.FormatInt(synthesizer.VoiceType, 10)
357 | 	queryMap["SampleRate"] = strconv.FormatInt(synthesizer.SampleRate, 10)
358 | 	queryMap["Speed"] = strconv.FormatFloat(synthesizer.Speed, 'g', -1, 64)
359 | 	queryMap["Volume"] = strconv.FormatFloat(synthesizer.Volume, 'g', -1, 64)
360 | 	queryMap["Codec"] = synthesizer.Codec
361 | 	queryMap["EnableSubtitle"] = strconv.FormatBool(synthesizer.EnableSubtitle)
362 | 	queryMap["EmotionCategory"] = synthesizer.EmotionCategory
363 | 	queryMap["EmotionIntensity"] = strconv.FormatInt(synthesizer.EmotionIntensity, 10)
364 | 	queryMap["SegmentRate"] = strconv.FormatInt(synthesizer.SegmentRate, 10)
365 | 	for k, v := range synthesizer.ExtParam {
366 | 		queryMap[k] = v
367 | 	}
368 | 	var keys []string
369 | 	for k := range queryMap {
370 | 		keys = append(keys, k)
371 | 	}
372 | 	sort.Strings(keys)
373 | 
374 | 	var queryStrBuffer bytes.Buffer
375 | 	for _, k := range keys {
376 | 		queryStrBuffer.WriteString(k)
377 | 		queryStrBuffer.WriteString("=")
378 | 		queryStrBuffer.WriteString(queryMap[k])
379 | 		queryStrBuffer.WriteString("&")
380 | 	}
381 | 	rs := []rune(queryStrBuffer.String())
382 | 	rsLen := len(rs)
383 | 	queryStr := string(rs[0 : rsLen-1])
384 | 	serverURL := fmt.Sprintf("%s%s", wsHost, wsPath)
385 | 	signURL := fmt.Sprintf("%s?%s", serverURL, queryStr)
386 | 	return signURL
387 | }
388 | 
389 | func (synthesizer *SpeechWsSynthesizer) genWsSignature(signURL string, secretKey string) string {
390 | 	hmac := hmac.New(sha1.New, []byte(secretKey))
391 | 	signURL = "GET" + signURL
392 | 	hmac.Write([]byte(signURL))
393 | 	encryptedStr := hmac.Sum([]byte(nil))
394 | 	return base64.StdEncoding.EncodeToString(encryptedStr)
395 | }
396 | 
397 | func (synthesizer *SpeechWsSynthesizer) genRecoverFunc() func() {
398 | 	return func() {
399 | 		if r := recover(); r != nil {
400 | 			var err error
401 | 			switch r := r.(type) {
402 | 			case error:
403 | 				err = r
404 | 			default:
405 | 				err = fmt.Errorf("%v", r)
406 | 			}
407 | 			retErr := fmt.Errorf("panic error ocurred! [err: %s] [stack: %s]",
408 | 				err.Error(), string(debug.Stack()))
409 | 			msg := SpeechWsSynthesisResponse{
410 | 				SessionId: synthesizer.SessionId,
411 | 			}
412 | 			synthesizer.eventChan <- speechWsSynthesisEvent{
413 | 				t:   eventTypeWsFail,
414 | 				r:   &msg,
415 | 				err: retErr,
416 | 			}
417 | 		}
418 | 	}
419 | }
420 | 
421 | // CloseConn close connection
422 | func (synthesizer *SpeechWsSynthesizer) CloseConn() {
423 | 	synthesizer.closeConn()
424 | }
425 | 
426 | func (synthesizer *SpeechWsSynthesizer) closeConn() {
427 | 	err := synthesizer.conn.Close()
428 | 	if err != nil && synthesizer.Debug && synthesizer.DebugFunc != nil {
429 | 		synthesizer.DebugFunc(fmt.Sprintf("%s %s", time.Now().String(), err.Error()))
430 | 	}
431 | }
432 | 


--------------------------------------------------------------------------------
/soe/speaking_assessment.go:
--------------------------------------------------------------------------------
  1 | package soe
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"crypto/hmac"
  6 | 	"crypto/sha1"
  7 | 	"encoding/base64"
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 	"net/http"
 11 | 	"net/url"
 12 | 	"runtime/debug"
 13 | 	"sort"
 14 | 	"strconv"
 15 | 	"strings"
 16 | 	"sync"
 17 | 	"time"
 18 | 
 19 | 	"github.com/google/uuid"
 20 | 	"github.com/gorilla/websocket"
 21 | 
 22 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
 23 | )
 24 | 
 25 | // SpeakingAssessmentListener User must impletement it. Get recognition result
 26 | type SpeakingAssessmentListener interface {
 27 | 	OnRecognitionStart(*SpeakingAssessmentResponse)
 28 | 	OnRecognitionComplete(*SpeakingAssessmentResponse)
 29 | 	OnIntermediateResults(*SpeakingAssessmentResponse)
 30 | 	OnFail(*SpeakingAssessmentResponse, error)
 31 | }
 32 | 
 33 | // SpeakingAssessmentResponse is the reponse of asr service
 34 | type SpeakingAssessmentResponse struct {
 35 | 	Code      int          `json:"code"`
 36 | 	Message   string       `json:"message"`
 37 | 	VoiceID   string       `json:"voice_id,omitempty"`
 38 | 	MessageID string       `json:"message_id,omitempty"`
 39 | 	Final     uint32       `json:"final,omitempty"`
 40 | 	Result    SentenceInfo `json:"result"`
 41 | }
 42 | 
 43 | // SentenceInfo ...
 44 | type SentenceInfo struct {
 45 | 	SuggestedScore float64   `json:"SuggestedScore"`
 46 | 	PronAccuracy   float64   `json:"PronAccuracy"`
 47 | 	PronFluency    float64   `json:"PronFluency"`
 48 | 	PronCompletion float64   `json:"PronCompletion"`
 49 | 	Words          []WordRsp `json:"Words"`
 50 | 	SentenceId     int64     `json:"SentenceId"`
 51 | 	RefTextId      int64     `json:"RefTextId"`
 52 | 	KeyWordHits    []float32 `json:"KeyWordHits"`
 53 | 	UnKeyWordHits  []float32 `json:"UnKeyWordHits"`
 54 | }
 55 | 
 56 | // PhoneInfoTypeRsp is a struct/interface
 57 | type PhoneInfoTypeRsp struct {
 58 | 	Mbtm            int64   `json:"MemBeginTime"`
 59 | 	Metm            int64   `json:"MemEndTime"`
 60 | 	PronAccuracy    float64 `json:"PronAccuracy"`
 61 | 	DetectedStress  bool    `json:"DetectedStress"`
 62 | 	Phone           string  `json:"Phone"`
 63 | 	ReferencePhone  string  `json:"ReferencePhone"`
 64 | 	ReferenceLetter string  `json:"ReferenceLetter"`
 65 | 	Stress          bool    `json:"Stress"`
 66 | 	Tag             int64   `json:"MatchTag"`
 67 | }
 68 | 
 69 | // Tone 中文声调检测结果
 70 | type Tone struct {
 71 | 	Valid   bool `json:"Valid"`
 72 | 	RefTone int  `json:"RefTone"`
 73 | 	HypTone int  `json:"HypothesisTone"`
 74 | 	// Confidence float32 `json:"Confidence"`
 75 | }
 76 | 
 77 | // WordRsp is a struct/interface
 78 | type WordRsp struct {
 79 | 	Mbtm          int64              `json:"MemBeginTime"`
 80 | 	Metm          int64              `json:"MemEndTime"`
 81 | 	PronAccuracy  float64            `json:"PronAccuracy"`
 82 | 	PronFluency   float64            `json:"PronFluency"`
 83 | 	ReferenceWord string             `json:"ReferenceWord"`
 84 | 	Word          string             `json:"Word"`
 85 | 	Tag           int64              `json:"MatchTag"`
 86 | 	KeywordTag    int64              `json:"KeywordTag"`
 87 | 	PhoneInfo     []PhoneInfoTypeRsp `json:"PhoneInfos"`
 88 | 	Tone          Tone               `json:"Tone"`
 89 | }
 90 | 
 91 | // AudioFormat type
 92 | const (
 93 | 	AudioFormatPCM   = 0
 94 | 	AudioFormatWav   = 1
 95 | 	AudioFormatMp3   = 2
 96 | 	AudioFormatSilk  = 3
 97 | 	AudioFormatSpeex = 4
 98 | )
 99 | 
100 | // SpeechRecognizer is the entry for ASR service
101 | type SpeechRecognizer struct {
102 | 	//request params
103 | 	AppID               string
104 | 	VoiceFormat         int
105 | 	End                 int
106 | 	Timestamp           int
107 | 	Nonce               int
108 | 	Signature           string
109 | 	VoiceData           []byte
110 | 	Expired             int
111 | 	TextMode            int64
112 | 	RefText             string
113 | 	Keyword             string
114 | 	EvalMode            int64
115 | 	ScoreCoeff          float64
116 | 	ServerEngineType    string
117 | 	SentenceInfoEnabled int64
118 | 	// 录音识别模式，0:实时识别 1:录音识别
119 | 	// 录音识别下可发送单个大长度分片，但是单次连接只能发一个分片,对音频的大小有限制，得到识别结果后需要重新建立连接
120 | 	// 推荐使用实时识别模式
121 | 	RecMode int
122 | 
123 | 	Credential *common.Credential
124 | 	//listener
125 | 	listener SpeakingAssessmentListener
126 | 	//uuid for voice
127 | 	VoiceID string
128 | 	//for proxy
129 | 	ProxyURL string
130 | 	//for websocet connection
131 | 	conn *websocket.Conn
132 | 	//send data channel
133 | 	dataChan chan []byte
134 | 	//for listener get response message
135 | 	eventChan chan speechRecognitionEvent
136 | 
137 | 	//used in stop function, waiting for stop all goroutines
138 | 	sendEnd    chan int
139 | 	receiveEnd chan int
140 | 	eventEnd   chan int
141 | 
142 | 	mutex   sync.Mutex
143 | 	started bool
144 | 	hasEnd  bool
145 | }
146 | 
147 | const (
148 | 	defaultVoiceFormat = 1
149 | 
150 | 	protocol = "wss"
151 | 	host     = "soe.cloud.tencent.com"
152 | 	path     = "soe/api"
153 | )
154 | 
155 | const (
156 | 	eventTypeRecognitionStart    = 1
157 | 	eventTypeIntermediateResults = 2
158 | 	eventTypeRecognitionComplete = 3
159 | 	eventTypeFail                = 4
160 | )
161 | 
162 | type eventType int
163 | 
164 | type speechRecognitionEvent struct {
165 | 	t   eventType
166 | 	r   *SpeakingAssessmentResponse
167 | 	err error
168 | }
169 | 
170 | // NewSpeechRecognizer creates instance of SpeechRecognizer
171 | func NewSpeechRecognizer(appID string, credential *common.Credential,
172 | 	listener SpeakingAssessmentListener) *SpeechRecognizer {
173 | 
174 | 	reco := &SpeechRecognizer{
175 | 		AppID:               appID,
176 | 		VoiceFormat:         defaultVoiceFormat,
177 | 		End:                 0,
178 | 		Timestamp:           0,
179 | 		Nonce:               0,
180 | 		Signature:           "",
181 | 		VoiceData:           nil,
182 | 		Expired:             0,
183 | 		TextMode:            0,
184 | 		RefText:             "",
185 | 		Keyword:             "",
186 | 		EvalMode:            0,
187 | 		ScoreCoeff:          1.0,
188 | 		RecMode:             0,
189 | 		ServerEngineType:    "16k_en",
190 | 		SentenceInfoEnabled: 0,
191 | 		Credential:          credential,
192 | 		listener:            listener,
193 | 		VoiceID:             "",
194 | 		ProxyURL:            "",
195 | 		conn:                nil,
196 | 		dataChan:            make(chan []byte, 6400),
197 | 		eventChan:           make(chan speechRecognitionEvent, 10),
198 | 		sendEnd:             make(chan int),
199 | 		receiveEnd:          make(chan int),
200 | 		eventEnd:            make(chan int),
201 | 		mutex:               sync.Mutex{},
202 | 		started:             false,
203 | 		hasEnd:              false,
204 | 	}
205 | 	return reco
206 | }
207 | 
208 | // Start connects to server and start a recognition session
209 | func (recognizer *SpeechRecognizer) Start() error {
210 | 	recognizer.mutex.Lock()
211 | 	defer recognizer.mutex.Unlock()
212 | 
213 | 	if recognizer.started {
214 | 		return fmt.Errorf("recognizer is already started")
215 | 	}
216 | 	if recognizer.VoiceID == "" {
217 | 		voiceID := uuid.New().String()
218 | 		recognizer.VoiceID = voiceID
219 | 	}
220 | 	serverURL := recognizer.buildURL(recognizer.VoiceID)
221 | 	signature := recognizer.genSignature(serverURL)
222 | 	serverURL = serverURL[strings.Index(serverURL, "?")+1:]
223 | 	//请求参数进行转义
224 | 	serverURL = fmt.Sprintf("%s/%s/%s?%s", host, path, recognizer.AppID, url.PathEscape(serverURL))
225 | 	dialer := websocket.Dialer{}
226 | 	if len(recognizer.ProxyURL) > 0 {
227 | 		proxyURL, _ := url.Parse(recognizer.ProxyURL)
228 | 		dialer.Proxy = http.ProxyURL(proxyURL)
229 | 	}
230 | 
231 | 	header := http.Header(make(map[string][]string))
232 | 	urlStr := fmt.Sprintf("%s://%s&signature=%s", protocol, serverURL, url.QueryEscape(signature))
233 | 	conn, _, err := dialer.Dial(urlStr, header)
234 | 	if err != nil {
235 | 		return fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error())
236 | 	}
237 | 	_, data, err := conn.ReadMessage()
238 | 	if err != nil {
239 | 		conn.Close()
240 | 		return fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error())
241 | 	}
242 | 	msg := SpeakingAssessmentResponse{}
243 | 	err = json.Unmarshal(data, &msg)
244 | 	if err != nil {
245 | 		conn.Close()
246 | 		return fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error())
247 | 	}
248 | 	if msg.Code != 0 {
249 | 		conn.Close()
250 | 		return fmt.Errorf("voice_id: %s, code: %d, message: %s",
251 | 			recognizer.VoiceID, msg.Code, msg.Message)
252 | 	}
253 | 
254 | 	recognizer.conn = conn
255 | 	go recognizer.send()
256 | 	go recognizer.receive()
257 | 	go recognizer.eventDispatch()
258 | 	recognizer.started = true
259 | 
260 | 	recognizer.eventChan <- speechRecognitionEvent{
261 | 		t: eventTypeRecognitionStart,
262 | 		r: newSpeechRecognitionResponse(0, "success", recognizer.VoiceID,
263 | 			fmt.Sprintf("%s-RecognitionStart", recognizer.VoiceID), 0),
264 | 		err: nil,
265 | 	}
266 | 	return nil
267 | }
268 | 
269 | // Write : write data in channel
270 | func (recognizer *SpeechRecognizer) Write(data []byte) error {
271 | 	recognizer.mutex.Lock()
272 | 	defer recognizer.mutex.Unlock()
273 | 	if !recognizer.started {
274 | 		return fmt.Errorf("recognizer not running")
275 | 	}
276 | 	recognizer.dataChan <- data
277 | 	return nil
278 | }
279 | 
280 | // Stop wait for the recognition process to complete
281 | func (recognizer *SpeechRecognizer) Stop() error {
282 | 	err := recognizer.stopInternal()
283 | 	if err != nil {
284 | 		return err
285 | 	}
286 | 	return nil
287 | }
288 | 
289 | func (recognizer *SpeechRecognizer) stopInternal() error {
290 | 	recognizer.mutex.Lock()
291 | 	defer recognizer.mutex.Unlock()
292 | 	if !recognizer.started {
293 | 		return fmt.Errorf("recognizer is not running")
294 | 	}
295 | 	close(recognizer.dataChan)
296 | 	<-recognizer.receiveEnd
297 | 	<-recognizer.sendEnd
298 | 	<-recognizer.eventEnd
299 | 	recognizer.started = false
300 | 	err := recognizer.conn.Close()
301 | 	if err != nil {
302 | 		return err
303 | 	}
304 | 	return nil
305 | }
306 | 
307 | func (recognizer *SpeechRecognizer) onError(code int, message string, err error) {
308 | 	if !recognizer.started {
309 | 		return
310 | 	}
311 | 	recognizer.listener.OnFail(newSpeechRecognitionResponse(code, message, recognizer.VoiceID,
312 | 		fmt.Sprintf("%s-Error", recognizer.VoiceID), 0), err)
313 | 	go recognizer.stopInternal()
314 | }
315 | 
316 | func (recognizer *SpeechRecognizer) send() {
317 | 	defer func() {
318 | 		// handle panic
319 | 		recognizer.genRecoverFunc()()
320 | 		close(recognizer.sendEnd)
321 | 	}()
322 | 	//send data
323 | 	for data := range recognizer.dataChan {
324 | 		if err := recognizer.conn.WriteMessage(websocket.BinaryMessage, data); err != nil {
325 | 			recognizer.onError(-1, "send error", fmt.Errorf("voice_id: %s, error: %s",
326 | 				recognizer.VoiceID, err.Error()))
327 | 			return
328 | 		}
329 | 	}
330 | 	//send stop msg
331 | 	if err := recognizer.conn.WriteMessage(websocket.TextMessage, []byte("{\"type\":\"end\"}")); err != nil {
332 | 		recognizer.onError(-1, "send error", fmt.Errorf("voice_id: %s, error: %s",
333 | 			recognizer.VoiceID, err.Error()))
334 | 	}
335 | }
336 | 
337 | func (recognizer *SpeechRecognizer) eventDispatch() {
338 | 	defer func() {
339 | 		// handle panic
340 | 		recognizer.genRecoverFunc()()
341 | 		close(recognizer.eventEnd)
342 | 	}()
343 | 	for e := range recognizer.eventChan {
344 | 		switch e.t {
345 | 		case eventTypeRecognitionStart:
346 | 			recognizer.listener.OnRecognitionStart(e.r)
347 | 		case eventTypeIntermediateResults:
348 | 			recognizer.listener.OnIntermediateResults(e.r)
349 | 		case eventTypeRecognitionComplete:
350 | 			recognizer.listener.OnRecognitionComplete(e.r)
351 | 		case eventTypeFail:
352 | 			recognizer.listener.OnFail(e.r, e.err)
353 | 		}
354 | 	}
355 | }
356 | 
357 | func (recognizer *SpeechRecognizer) receive() {
358 | 	defer func() {
359 | 		// handle panic
360 | 		recognizer.genRecoverFunc()()
361 | 		close(recognizer.eventChan)
362 | 		close(recognizer.receiveEnd)
363 | 	}()
364 | 	for {
365 | 		_, data, err := recognizer.conn.ReadMessage()
366 | 		if err != nil {
367 | 			recognizer.onError(-1, "receive error", fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error()))
368 | 			break
369 | 		}
370 | 
371 | 		//fmt.Printf("%s", data)
372 | 		msg := SpeakingAssessmentResponse{}
373 | 		err = json.Unmarshal(data, &msg)
374 | 		if err != nil {
375 | 			recognizer.onError(-1, "receive error",
376 | 				fmt.Errorf("voice_id: %s, error: %s",
377 | 					recognizer.VoiceID, err.Error()))
378 | 			break
379 | 		}
380 | 		if msg.Code != 0 {
381 | 			recognizer.onError(msg.Code, msg.Message,
382 | 				fmt.Errorf("VoiceID: %s, error code %d, message: %s",
383 | 					recognizer.VoiceID, msg.Code, msg.Message))
384 | 			break
385 | 		}
386 | 		if msg.Final == 1 {
387 | 			recognizer.hasEnd = true
388 | 			recognizer.eventChan <- speechRecognitionEvent{
389 | 				t:   eventTypeRecognitionComplete,
390 | 				r:   &msg,
391 | 				err: nil,
392 | 			}
393 | 			break
394 | 		} else {
395 | 			recognizer.eventChan <- speechRecognitionEvent{
396 | 				t:   eventTypeIntermediateResults,
397 | 				r:   &msg,
398 | 				err: nil,
399 | 			}
400 | 		}
401 | 	}
402 | }
403 | 
404 | func (recognizer *SpeechRecognizer) buildURL(voiceID string) string {
405 | 	var queryMap = make(map[string]string)
406 | 	queryMap["secretid"] = recognizer.Credential.SecretId
407 | 	// token参数用于临时秘钥鉴权
408 | 	if recognizer.Credential.Token != "" {
409 | 		queryMap["token"] = recognizer.Credential.Token
410 | 	}
411 | 	var timestamp = time.Now().Unix()
412 | 	var timestampStr = strconv.FormatInt(timestamp, 10)
413 | 	queryMap["timestamp"] = timestampStr
414 | 	queryMap["expired"] = strconv.FormatInt(timestamp+24*60*60, 10)
415 | 	queryMap["nonce"] = timestampStr
416 | 	//params
417 | 	queryMap["voice_id"] = voiceID
418 | 	queryMap["voice_format"] = strconv.FormatInt(int64(recognizer.VoiceFormat), 10)
419 | 	queryMap["text_mode"] = strconv.FormatInt(recognizer.TextMode, 10)
420 | 	queryMap["ref_text"] = recognizer.RefText
421 | 	queryMap["keyword"] = recognizer.Keyword
422 | 	queryMap["eval_mode"] = strconv.FormatInt(recognizer.EvalMode, 10)
423 | 	queryMap["score_coeff"] = fmt.Sprintf("%1f", recognizer.ScoreCoeff)
424 | 	queryMap["server_engine_type"] = recognizer.ServerEngineType
425 | 	queryMap["sentence_info_enabled"] = strconv.FormatInt(int64(recognizer.SentenceInfoEnabled), 10)
426 | 	queryMap["rec_mode"] = strconv.FormatInt(int64(recognizer.RecMode), 10)
427 | 
428 | 	var keys []string
429 | 	for k := range queryMap {
430 | 		keys = append(keys, k)
431 | 	}
432 | 	sort.Strings(keys)
433 | 
434 | 	var queryStrBuffer bytes.Buffer
435 | 	for _, k := range keys {
436 | 		queryStrBuffer.WriteString(k)
437 | 		queryStrBuffer.WriteString("=")
438 | 		queryStrBuffer.WriteString(queryMap[k])
439 | 		queryStrBuffer.WriteString("&")
440 | 	}
441 | 
442 | 	rs := []rune(queryStrBuffer.String())
443 | 	rsLen := len(rs)
444 | 	queryStr := string(rs[0 : rsLen-1])
445 | 
446 | 	//gen url
447 | 	url := fmt.Sprintf("%s/%s/%s?%s", host, path, recognizer.AppID, queryStr)
448 | 	return url
449 | }
450 | 
451 | func (recognizer *SpeechRecognizer) genSignature(url string) string {
452 | 	hmac := hmac.New(sha1.New, []byte(recognizer.Credential.SecretKey))
453 | 	signURL := url
454 | 	hmac.Write([]byte(signURL))
455 | 	encryptedStr := hmac.Sum([]byte(nil))
456 | 	var signature = base64.StdEncoding.EncodeToString(encryptedStr)
457 | 
458 | 	return signature
459 | }
460 | 
461 | func newSpeechRecognitionResponse(code int, message string, voiceID string,
462 | 	messageID string, final uint32) *SpeakingAssessmentResponse {
463 | 	return &SpeakingAssessmentResponse{
464 | 		Code:      code,
465 | 		Message:   message,
466 | 		VoiceID:   voiceID,
467 | 		MessageID: messageID,
468 | 		Final:     final,
469 | 	}
470 | }
471 | 
472 | func (recognizer *SpeechRecognizer) genRecoverFunc() func() {
473 | 	return func() {
474 | 		if r := recover(); r != nil {
475 | 			var err error
476 | 			switch r := r.(type) {
477 | 			case error:
478 | 				err = r
479 | 			default:
480 | 				err = fmt.Errorf("%v", r)
481 | 			}
482 | 			retErr := fmt.Errorf("panic error ocurred! [err: %s] [stack: %s]",
483 | 				err.Error(), string(debug.Stack()))
484 | 			recognizer.eventChan <- speechRecognitionEvent{
485 | 				t: eventTypeFail,
486 | 				r: newSpeechRecognitionResponse(-1, "panic error", recognizer.VoiceID,
487 | 					fmt.Sprintf("%s-Error", recognizer.VoiceID), 0),
488 | 				err: retErr,
489 | 			}
490 | 		}
491 | 	}
492 | }
493 | 


--------------------------------------------------------------------------------
/asr/speechrecognizer.go:
--------------------------------------------------------------------------------
  1 | package asr
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"crypto/hmac"
  6 | 	"crypto/sha1"
  7 | 	"encoding/base64"
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 	"net/http"
 11 | 	"net/url"
 12 | 	"runtime/debug"
 13 | 	"sort"
 14 | 	"strconv"
 15 | 	"sync"
 16 | 	"time"
 17 | 
 18 | 	"github.com/google/uuid"
 19 | 	"github.com/gorilla/websocket"
 20 | 
 21 | 	"github.com/tencentcloud/tencentcloud-speech-sdk-go/common"
 22 | )
 23 | 
 24 | // SpeechRecognitionListener User must impletement it. Get recognition result
 25 | type SpeechRecognitionListener interface {
 26 | 	OnRecognitionStart(*SpeechRecognitionResponse)
 27 | 	OnSentenceBegin(*SpeechRecognitionResponse)
 28 | 	OnRecognitionResultChange(*SpeechRecognitionResponse)
 29 | 	OnSentenceEnd(*SpeechRecognitionResponse)
 30 | 	OnRecognitionComplete(*SpeechRecognitionResponse)
 31 | 	OnFail(*SpeechRecognitionResponse, error)
 32 | }
 33 | 
 34 | // SpeechRecognitionResponse is the reponse of asr service
 35 | type SpeechRecognitionResponse struct {
 36 | 	Code      int                             `json:"code"`
 37 | 	Message   string                          `json:"message"`
 38 | 	VoiceID   string                          `json:"voice_id,omitempty"`
 39 | 	MessageID string                          `json:"message_id,omitempty"`
 40 | 	Final     uint32                          `json:"final,omitempty"`
 41 | 	Result    SpeechRecognitionResponseResult `json:"result,omitempty"`
 42 | }
 43 | 
 44 | // SpeechRecognitionResponseResult SpeechRecognitionResponseResult
 45 | type SpeechRecognitionResponseResult struct {
 46 | 	SliceType    uint32                                `json:"slice_type"`
 47 | 	Index        int                                   `json:"index"`
 48 | 	StartTime    uint32                                `json:"start_time"`
 49 | 	EndTime      uint32                                `json:"end_time"`
 50 | 	VoiceTextStr string                                `json:"voice_text_str"`
 51 | 	WordSize     uint32                                `json:"word_size"`
 52 | 	WordList     []SpeechRecognitionResponseResultWord `json:"word_list"`
 53 | }
 54 | 
 55 | // SpeechRecognitionResponseResultWord SpeechRecognitionResponseResultWord
 56 | type SpeechRecognitionResponseResultWord struct {
 57 | 	Word       string `json:"word"`
 58 | 	StartTime  uint32 `json:"start_time"`
 59 | 	EndTime    uint32 `json:"end_time"`
 60 | 	StableFlag uint32 `json:"stable_flag"`
 61 | }
 62 | 
 63 | // AudioFormat type
 64 | const (
 65 | 	AudioFormatPCM   = 1
 66 | 	AudioFormatSpeex = 4
 67 | 	AudioFormatSilk  = 6
 68 | 	AudioFormatMp3   = 8
 69 | 	AudioFormatOpus  = 10
 70 | 	AudioFormatWav   = 12
 71 | 	AudioFormatM4A   = 14
 72 | 	AudioFormatAAC   = 16
 73 | )
 74 | 
 75 | // SpeechRecognizer is the entry for ASR service
 76 | type SpeechRecognizer struct {
 77 | 	//request params
 78 | 	AppID             string
 79 | 	EngineModelType   string
 80 | 	VoiceFormat       int
 81 | 	NeedVad           int
 82 | 	HotwordId         string
 83 | 	HotwordList       string
 84 | 	CustomizationId   string
 85 | 	FilterDirty       int
 86 | 	FilterModal       int
 87 | 	FilterPunc        int
 88 | 	ConvertNumMode    int
 89 | 	WordInfo          int
 90 | 	VadSilenceTime    int
 91 | 	ReinforceHotword  int
 92 | 	NoiseThreshold    float64
 93 | 	FilterEmptyResult int
 94 | 	MaxSpeakTime      int
 95 | 	ReplaceTextId     string
 96 | 	ChatVadEnable     int
 97 | 
 98 | 	Credential *common.Credential
 99 | 	//listener
100 | 	listener SpeechRecognitionListener
101 | 	//uuid for voice
102 | 	VoiceID string
103 | 
104 | 	//for proxy
105 | 	ProxyURL string
106 | 
107 | 	//for websocet connection
108 | 	conn *websocket.Conn
109 | 
110 | 	//send data channel
111 | 	dataChan chan []byte
112 | 	//for listener get response message
113 | 	eventChan chan speechRecognitionEvent
114 | 
115 | 	//used in stop function, waiting for stop all goroutines
116 | 	sendEnd    chan int
117 | 	receiveEnd chan int
118 | 	eventEnd   chan int
119 | 
120 | 	mutex   sync.Mutex
121 | 	started bool
122 | }
123 | 
124 | const (
125 | 	defaultVoiceFormat       = 1
126 | 	defaultNeedVad           = 1
127 | 	defaultWordInfo          = 0
128 | 	defaultFilterDirty       = 0
129 | 	defaultFilterModal       = 0
130 | 	defaultFilterPunc        = 0
131 | 	defaultConvertNumMode    = 1
132 | 	defaultReinforceHotword  = 0
133 | 	defaultFilterEmptyResult = 1
134 | 	defaultMaxSpeakTime      = 0
135 | 
136 | 	protocol = "wss"
137 | 	host     = "asr.cloud.tencent.com"
138 | 	path     = ""
139 | )
140 | 
141 | const (
142 | 	eventTypeRecognitionStart        = 0
143 | 	eventTypeSentenceBegin           = 1
144 | 	eventTypeRecognitionResultChange = 2
145 | 	eventTypeSentenceEnd             = 3
146 | 	eventTypeRecognitionComplete     = 4
147 | 	eventTypeFail                    = 5
148 | )
149 | 
150 | type eventType int
151 | 
152 | type speechRecognitionEvent struct {
153 | 	t   eventType
154 | 	r   *SpeechRecognitionResponse
155 | 	err error
156 | }
157 | 
158 | // NewSpeechRecognizer creates instance of SpeechRecognizer
159 | func NewSpeechRecognizer(appID string, credential *common.Credential, engineModelType string,
160 | 	listener SpeechRecognitionListener) *SpeechRecognizer {
161 | 
162 | 	reco := &SpeechRecognizer{
163 | 		AppID:             appID,
164 | 		Credential:        credential,
165 | 		EngineModelType:   engineModelType,
166 | 		VoiceFormat:       defaultVoiceFormat,
167 | 		NeedVad:           defaultNeedVad,
168 | 		FilterDirty:       defaultFilterDirty,
169 | 		FilterModal:       defaultFilterModal,
170 | 		FilterPunc:        defaultFilterPunc,
171 | 		ConvertNumMode:    defaultConvertNumMode,
172 | 		WordInfo:          defaultWordInfo,
173 | 		ReinforceHotword:  defaultReinforceHotword,
174 | 		FilterEmptyResult: defaultFilterEmptyResult,
175 | 		MaxSpeakTime:      defaultMaxSpeakTime,
176 | 
177 | 		dataChan:  make(chan []byte, 6400),
178 | 		eventChan: make(chan speechRecognitionEvent, 10),
179 | 
180 | 		sendEnd:    make(chan int),
181 | 		receiveEnd: make(chan int),
182 | 		eventEnd:   make(chan int),
183 | 
184 | 		listener: listener,
185 | 		started:  false,
186 | 	}
187 | 	return reco
188 | }
189 | 
190 | // Start connects to server and start a recognition session
191 | func (recognizer *SpeechRecognizer) Start() error {
192 | 	recognizer.mutex.Lock()
193 | 	defer recognizer.mutex.Unlock()
194 | 
195 | 	if recognizer.started {
196 | 		return fmt.Errorf("recognizer is already started")
197 | 	}
198 | 	if recognizer.VoiceID == "" {
199 | 		voiceID := uuid.New().String()
200 | 		recognizer.VoiceID = voiceID
201 | 	}
202 | 	serverURL := recognizer.buildURL(recognizer.VoiceID)
203 | 	signature := recognizer.genSignature(serverURL)
204 | 
205 | 	dialer := websocket.Dialer{}
206 | 	if len(recognizer.ProxyURL) > 0 {
207 | 		proxyURL, _ := url.Parse(recognizer.ProxyURL)
208 | 		dialer.Proxy = http.ProxyURL(proxyURL)
209 | 	}
210 | 
211 | 	header := http.Header(make(map[string][]string))
212 | 	urlStr := fmt.Sprintf("%s://%s&signature=%s", protocol, serverURL, url.QueryEscape(signature))
213 | 	conn, _, err := dialer.Dial(urlStr, header)
214 | 	if err != nil {
215 | 		return fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error())
216 | 	}
217 | 	_, data, err := conn.ReadMessage()
218 | 	if err != nil {
219 | 		conn.Close()
220 | 		return fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error())
221 | 	}
222 | 	msg := SpeechRecognitionResponse{}
223 | 	err = json.Unmarshal(data, &msg)
224 | 	if err != nil {
225 | 		conn.Close()
226 | 		return fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error())
227 | 	}
228 | 	if msg.Code != 0 {
229 | 		conn.Close()
230 | 		return fmt.Errorf("voice_id: %s, code: %d, message: %s",
231 | 			recognizer.VoiceID, msg.Code, msg.Message)
232 | 	}
233 | 
234 | 	recognizer.conn = conn
235 | 	go recognizer.send()
236 | 	go recognizer.receive()
237 | 	go recognizer.eventDispatch()
238 | 	recognizer.started = true
239 | 
240 | 	recognizer.eventChan <- speechRecognitionEvent{
241 | 		t: eventTypeRecognitionStart,
242 | 		r: newSpeechRecognitionResponse(0, "sucess", recognizer.VoiceID,
243 | 			fmt.Sprintf("%s-RecognitionStart", recognizer.VoiceID), 0),
244 | 		err: nil,
245 | 	}
246 | 	return nil
247 | }
248 | 
249 | // Write : write data in channel
250 | func (recognizer *SpeechRecognizer) Write(data []byte) error {
251 | 	recognizer.mutex.Lock()
252 | 	defer recognizer.mutex.Unlock()
253 | 	if !recognizer.started {
254 | 		return fmt.Errorf("recognizer not running")
255 | 	}
256 | 
257 | 	recognizer.dataChan <- data
258 | 	return nil
259 | }
260 | 
261 | // Stop wait for the recognition process to complete
262 | func (recognizer *SpeechRecognizer) Stop() error {
263 | 	err := recognizer.stopInternal()
264 | 	if err != nil {
265 | 		return err
266 | 	}
267 | 	return nil
268 | }
269 | 
270 | func (recognizer *SpeechRecognizer) stopInternal() error {
271 | 	recognizer.mutex.Lock()
272 | 	defer recognizer.mutex.Unlock()
273 | 	if !recognizer.started {
274 | 		return fmt.Errorf("recognizer is not running")
275 | 	}
276 | 	close(recognizer.dataChan)
277 | 	<-recognizer.receiveEnd
278 | 	<-recognizer.sendEnd
279 | 	<-recognizer.eventEnd
280 | 	recognizer.started = false
281 | 	return nil
282 | }
283 | 
284 | func (recognizer *SpeechRecognizer) onError(code int, message string, err error) {
285 | 	//recognizer.mutex.Lock()
286 | 	if !recognizer.started {
287 | 		return
288 | 	}
289 | 
290 | 	recognizer.listener.OnFail(newSpeechRecognitionResponse(code, message, recognizer.VoiceID,
291 | 		fmt.Sprintf("%s-Error", recognizer.VoiceID), 0), err)
292 | 	/*
293 | 			recognizer.eventChan <- speechRecognitionEvent{
294 | 				t: eventTypeFail,
295 | 				r: newSpeechRecognitionResponse(code, message, recognizer.VoiceID,
296 | 					fmt.Sprintf("%s-Error", recognizer.VoiceID), 0),
297 | 				err: err,
298 | 			}
299 | 		    recognizer.mutex.Unlock()
300 | 	*/
301 | 	go recognizer.stopInternal()
302 | }
303 | 
304 | func (recognizer *SpeechRecognizer) send() {
305 | 	defer func() {
306 | 		// handle panic
307 | 		recognizer.genRecoverFunc()()
308 | 		close(recognizer.sendEnd)
309 | 	}()
310 | 	//send data
311 | 	for data := range recognizer.dataChan {
312 | 		if err := recognizer.conn.WriteMessage(websocket.BinaryMessage, data); err != nil {
313 | 			recognizer.onError(-1, "send error", fmt.Errorf("voice_id: %s, error: %s",
314 | 				recognizer.VoiceID, err.Error()))
315 | 			return
316 | 		}
317 | 	}
318 | 	//send stop msg
319 | 	if err := recognizer.conn.WriteMessage(websocket.TextMessage, []byte("{\"type\":\"end\"}")); err != nil {
320 | 		recognizer.onError(-1, "send error", fmt.Errorf("voice_id: %s, error: %s",
321 | 			recognizer.VoiceID, err.Error()))
322 | 	}
323 | }
324 | 
325 | func (recognizer *SpeechRecognizer) eventDispatch() {
326 | 	defer func() {
327 | 		// handle panic
328 | 		recognizer.genRecoverFunc()()
329 | 		close(recognizer.eventEnd)
330 | 	}()
331 | 	for e := range recognizer.eventChan {
332 | 		switch e.t {
333 | 		case eventTypeRecognitionStart:
334 | 			recognizer.listener.OnRecognitionStart(e.r)
335 | 		case eventTypeSentenceBegin:
336 | 			recognizer.listener.OnSentenceBegin(e.r)
337 | 		case eventTypeRecognitionResultChange:
338 | 			recognizer.listener.OnRecognitionResultChange(e.r)
339 | 		case eventTypeSentenceEnd:
340 | 			recognizer.listener.OnSentenceEnd(e.r)
341 | 		case eventTypeRecognitionComplete:
342 | 			recognizer.listener.OnRecognitionComplete(e.r)
343 | 		case eventTypeFail:
344 | 			recognizer.listener.OnFail(e.r, e.err)
345 | 		}
346 | 	}
347 | }
348 | 
349 | func (recognizer *SpeechRecognizer) receive() {
350 | 	defer func() {
351 | 		// handle panic
352 | 		recognizer.genRecoverFunc()()
353 | 		close(recognizer.eventChan)
354 | 		close(recognizer.receiveEnd)
355 | 	}()
356 | 	index := -1
357 | 	for {
358 | 		_, data, err := recognizer.conn.ReadMessage()
359 | 		if err != nil {
360 | 			recognizer.onError(-1, "receive error", fmt.Errorf("voice_id: %s, error: %s", recognizer.VoiceID, err.Error()))
361 | 			break
362 | 		}
363 | 
364 | 		//fmt.Printf("%s", data)
365 | 		msg := SpeechRecognitionResponse{}
366 | 		err = json.Unmarshal(data, &msg)
367 | 		if err != nil {
368 | 			recognizer.onError(-1, "receive error",
369 | 				fmt.Errorf("voice_id: %s, error: %s",
370 | 					recognizer.VoiceID, err.Error()))
371 | 			break
372 | 		}
373 | 		if msg.Code != 0 {
374 | 			recognizer.onError(msg.Code, msg.Message,
375 | 				fmt.Errorf("VoiceID: %s, error code %d, message: %s",
376 | 					recognizer.VoiceID, msg.Code, msg.Message))
377 | 			break
378 | 		}
379 | 
380 | 		if msg.Final == 1 {
381 | 			recognizer.eventChan <- speechRecognitionEvent{
382 | 				t:   eventTypeRecognitionComplete,
383 | 				r:   &msg,
384 | 				err: nil,
385 | 			}
386 | 			break
387 | 		}
388 | 
389 | 		beginOrEnd := false
390 | 		if msg.Result.Index != index || msg.Result.SliceType == 0 {
391 | 			index = msg.Result.Index
392 | 			recognizer.eventChan <- speechRecognitionEvent{
393 | 				t:   eventTypeSentenceBegin,
394 | 				r:   &msg,
395 | 				err: nil,
396 | 			}
397 | 			beginOrEnd = true
398 | 		}
399 | 		if msg.Result.SliceType == 2 {
400 | 			recognizer.eventChan <- speechRecognitionEvent{
401 | 				t:   eventTypeSentenceEnd,
402 | 				r:   &msg,
403 | 				err: nil,
404 | 			}
405 | 			beginOrEnd = true
406 | 		}
407 | 		if !beginOrEnd {
408 | 			recognizer.eventChan <- speechRecognitionEvent{
409 | 				t:   eventTypeRecognitionResultChange,
410 | 				r:   &msg,
411 | 				err: nil,
412 | 			}
413 | 		}
414 | 	}
415 | }
416 | 
417 | func (recognizer *SpeechRecognizer) buildURL(voiceID string) string {
418 | 	var queryMap = make(map[string]string)
419 | 	queryMap["secretid"] = recognizer.Credential.SecretId
420 | 	var timestamp = time.Now().Unix()
421 | 	var timestampStr = strconv.FormatInt(timestamp, 10)
422 | 	queryMap["timestamp"] = timestampStr
423 | 	queryMap["expired"] = strconv.FormatInt(timestamp+24*60*60, 10)
424 | 	queryMap["nonce"] = timestampStr
425 | 
426 | 	//params
427 | 	queryMap["engine_model_type"] = recognizer.EngineModelType
428 | 	queryMap["voice_id"] = voiceID
429 | 	queryMap["voice_format"] = strconv.FormatInt(int64(recognizer.VoiceFormat), 10)
430 | 	queryMap["needvad"] = strconv.FormatInt(int64(recognizer.NeedVad), 10)
431 | 	if recognizer.HotwordId != "" {
432 | 		queryMap["hotword_id"] = recognizer.HotwordId
433 | 	}
434 | 	if recognizer.HotwordList != "" {
435 | 		queryMap["hotword_list"] = recognizer.HotwordList
436 | 	}
437 | 	if recognizer.CustomizationId != "" {
438 | 		queryMap["customization_id"] = recognizer.CustomizationId
439 | 	}
440 | 
441 | 	if recognizer.ReplaceTextId != "" {
442 | 		queryMap["replace_text_id"] = recognizer.ReplaceTextId
443 | 	}
444 | 
445 | 	queryMap["filter_dirty"] = strconv.FormatInt(int64(recognizer.FilterDirty), 10)
446 | 	queryMap["filter_modal"] = strconv.FormatInt(int64(recognizer.FilterModal), 10)
447 | 	queryMap["filter_punc"] = strconv.FormatInt(int64(recognizer.FilterPunc), 10)
448 | 	queryMap["filter_empty_result"] = strconv.FormatInt(int64(recognizer.FilterEmptyResult), 10)
449 | 	queryMap["convert_num_mode"] = strconv.FormatInt(int64(recognizer.ConvertNumMode), 10)
450 | 	queryMap["word_info"] = strconv.FormatInt(int64(recognizer.WordInfo), 10)
451 | 	queryMap["reinforce_hotword"] = strconv.FormatInt(int64(recognizer.ReinforceHotword), 10)
452 | 	queryMap["max_speak_time"] = strconv.FormatInt(int64(recognizer.MaxSpeakTime), 10)
453 | 	if recognizer.VadSilenceTime > 0 {
454 | 		queryMap["vad_silence_time"] = strconv.FormatInt(int64(recognizer.VadSilenceTime), 10)
455 | 	}
456 | 	if recognizer.NoiseThreshold != 0 {
457 | 		queryMap["noise_threshold"] = strconv.FormatFloat(recognizer.NoiseThreshold, 'f', 3, 64)
458 | 	}
459 | 	if recognizer.ChatVadEnable > 0 {
460 | 		queryMap["chat_vad_enable"] =  strconv.FormatInt(int64(recognizer.ChatVadEnable), 10)
461 | 	}
462 | 
463 | 	var keys []string
464 | 	for k := range queryMap {
465 | 		keys = append(keys, k)
466 | 	}
467 | 	sort.Strings(keys)
468 | 
469 | 	var queryStrBuffer bytes.Buffer
470 | 	for _, k := range keys {
471 | 		queryStrBuffer.WriteString(k)
472 | 		queryStrBuffer.WriteString("=")
473 | 		queryStrBuffer.WriteString(queryMap[k])
474 | 		queryStrBuffer.WriteString("&")
475 | 	}
476 | 
477 | 	rs := []rune(queryStrBuffer.String())
478 | 	rsLen := len(rs)
479 | 	queryStr := string(rs[0 : rsLen-1])
480 | 
481 | 	//gen url
482 | 	url := fmt.Sprintf("%s/asr/v2/%s?%s", host, recognizer.AppID, queryStr)
483 | 	return url
484 | }
485 | 
486 | func (recognizer *SpeechRecognizer) genSignature(url string) string {
487 | 	hmac := hmac.New(sha1.New, []byte(recognizer.Credential.SecretKey))
488 | 	signURL := url
489 | 	hmac.Write([]byte(signURL))
490 | 	encryptedStr := hmac.Sum([]byte(nil))
491 | 	var signature = base64.StdEncoding.EncodeToString(encryptedStr)
492 | 
493 | 	return signature
494 | }
495 | 
496 | func newSpeechRecognitionResponse(code int, message string, voiceID string,
497 | 	messageID string, final uint32) *SpeechRecognitionResponse {
498 | 	return &SpeechRecognitionResponse{
499 | 		Code:      code,
500 | 		Message:   message,
501 | 		VoiceID:   voiceID,
502 | 		MessageID: messageID,
503 | 		Final:     final,
504 | 	}
505 | }
506 | 
507 | func (recognizer *SpeechRecognizer) genRecoverFunc() func() {
508 | 	return func() {
509 | 		if r := recover(); r != nil {
510 | 			var err error
511 | 			switch r := r.(type) {
512 | 			case error:
513 | 				err = r
514 | 			default:
515 | 				err = fmt.Errorf("%v", r)
516 | 			}
517 | 			retErr := fmt.Errorf("panic error ocurred! [err: %s] [stack: %s]",
518 | 				err.Error(), string(debug.Stack()))
519 | 			recognizer.eventChan <- speechRecognitionEvent{
520 | 				t: eventTypeFail,
521 | 				r: newSpeechRecognitionResponse(-1, "panic error", recognizer.VoiceID,
522 | 					fmt.Sprintf("%s-Error", recognizer.VoiceID), 0),
523 | 				err: retErr,
524 | 			}
525 | 		}
526 | 	}
527 | }
528 | 


--------------------------------------------------------------------------------