├── tests ├── test1.pcm ├── tts │ └── test_tts.go ├── sr │ └── test_sr.go └── st │ └── test_st.go ├── go.mod ├── token.go ├── log.go ├── utils.go ├── go.sum ├── ws.go ├── core.go ├── tts.go ├── sr.go ├── st.go ├── LICENSE ├── docs ├── TTS.md ├── SR.md └── ST.md └── README.md /tests/test1.pcm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-nls-go-sdk/HEAD/tests/test1.pcm -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/aliyun/alibabacloud-nls-go-sdk 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/aliyun/alibaba-cloud-sdk-go v1.61.1376 7 | github.com/gorilla/websocket v1.4.2 8 | github.com/jmespath/go-jmespath v0.4.0 // indirect 9 | github.com/json-iterator/go v1.1.12 // indirect 10 | github.com/satori/go.uuid v1.2.0 11 | gopkg.in/ini.v1 v1.66.2 // indirect 12 | ) 13 | -------------------------------------------------------------------------------- /token.go: -------------------------------------------------------------------------------- 1 | /* 2 | token.go 3 | 4 | Copyright 1999-present Alibaba Group Holding Ltd. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | 20 | 21 | package nls 22 | 23 | import ( 24 | "encoding/json" 25 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk" 26 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests" 27 | ) 28 | 29 | func GetToken(dist string, domain string, akid string, akkey string, version string) (*TokenResultMessage, error) { 30 | client, err := sdk.NewClientWithAccessKey(dist, akid, akkey) 31 | if err != nil { 32 | return nil, err 33 | } 34 | 35 | request := requests.NewCommonRequest() 36 | request.Method = "POST" 37 | request.Domain = domain 38 | request.ApiName = "CreateToken" 39 | request.Version = version 40 | response, err := client.ProcessCommonRequest(request) 41 | if err != nil { 42 | return nil, err 43 | } 44 | 45 | message := new(TokenResultMessage) 46 | err = json.Unmarshal(response.GetHttpContentBytes(), message) 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | return message, nil 52 | } 53 | 54 | 55 | -------------------------------------------------------------------------------- /log.go: -------------------------------------------------------------------------------- 1 | /* 2 | log.go 3 | 4 | Copyright 1999-present Alibaba Group Holding Ltd. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package nls 20 | 21 | import ( 22 | "io" 23 | "log" 24 | "os" 25 | ) 26 | 27 | type NlsLogger struct { 28 | logger *log.Logger 29 | sil bool 30 | debug bool 31 | } 32 | 33 | var defaultLog *NlsLogger 34 | var defaultStd = stdoutLogger(log.LstdFlags|log.Lmicroseconds, "NLS") 35 | 36 | func DefaultNlsLog() *NlsLogger { 37 | return defaultStd 38 | } 39 | 40 | func stdoutLogger(flag int, tag string) *NlsLogger { 41 | logger := new(NlsLogger) 42 | logger.logger = log.New(os.Stderr, tag, flag) 43 | logger.sil = false 44 | logger.debug = false 45 | return logger 46 | } 47 | 48 | func NewNlsLogger(w io.Writer, tag string, flag int) *NlsLogger { 49 | logger := new(NlsLogger) 50 | logger.logger = log.New(w, tag, flag) 51 | logger.sil = false 52 | logger.debug = false 53 | return logger 54 | } 55 | 56 | func (l *NlsLogger) SetLogSil(sil bool) { 57 | l.sil = sil 58 | } 59 | 60 | func (l *NlsLogger) SetDebug(debug bool) { 61 | l.debug = debug 62 | } 63 | 64 | func (l *NlsLogger) SetOutput(w io.Writer) { 65 | l.logger.SetOutput(w) 66 | } 67 | 68 | func (l *NlsLogger) Fatal(v ...interface{}) { 69 | l.logger.Fatal(v...) 70 | } 71 | 72 | func (l *NlsLogger) Fatalf(format string, v ...interface{}) { 73 | l.logger.Fatalf(format, v...) 74 | } 75 | 76 | func (l *NlsLogger) Fatalln(v ...interface{}) { 77 | l.logger.Fatalln(v...) 78 | } 79 | 80 | func (l *NlsLogger) Panic(v ...interface{}) { 81 | l.logger.Panic(v...) 82 | } 83 | 84 | func (l *NlsLogger) Panicf(format string, v ...interface{}) { 85 | l.logger.Panicf(format, v...) 86 | } 87 | 88 | func (l *NlsLogger) panicln(v ...interface{}) { 89 | l.logger.Panicln(v...) 90 | } 91 | 92 | func (l *NlsLogger) Print(v ...interface{}) { 93 | if l.sil { 94 | return 95 | } 96 | l.logger.Print(v...) 97 | } 98 | 99 | func (l *NlsLogger) Printf(format string, v ...interface{}) { 100 | if l.sil { 101 | return 102 | } 103 | l.logger.Printf(format, v...) 104 | } 105 | 106 | func (l *NlsLogger) Println(v ...interface{}) { 107 | if l.sil { 108 | return 109 | } 110 | l.logger.Println(v...) 111 | } 112 | 113 | func (l *NlsLogger) Debugln(v ...interface{}) { 114 | if l.debug { 115 | l.logger.Println(v...) 116 | } 117 | } 118 | 119 | func (l *NlsLogger) Debugf(format string, v ...interface{}) { 120 | if l.debug { 121 | l.logger.Printf(format, v...) 122 | } 123 | } 124 | 125 | func (l *NlsLogger) SetFlags(flags int) { 126 | l.logger.SetFlags(flags) 127 | } 128 | 129 | func (l *NlsLogger) SetPrefix(prefix string) { 130 | l.logger.SetPrefix(prefix) 131 | } 132 | -------------------------------------------------------------------------------- /tests/tts/test_tts.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "net/http" 10 | _ "net/http/pprof" 11 | "os" 12 | "os/signal" 13 | "sync" 14 | "time" 15 | 16 | "github.com/aliyun/alibabacloud-nls-go-sdk" 17 | ) 18 | 19 | const ( 20 | AKID = "Your AKID" 21 | AKKEY = "Your AKKEY" 22 | //online key 23 | APPKEY = "Your APPKEY" 24 | TOKEN = "TEST TOKEN" 25 | ) 26 | 27 | type TtsUserParam struct { 28 | F io.Writer 29 | Logger *nls.NlsLogger 30 | } 31 | 32 | func onTaskFailed(text string, param interface{}) { 33 | p, ok := param.(*TtsUserParam) 34 | if !ok { 35 | log.Default().Fatal("invalid logger") 36 | return 37 | } 38 | 39 | p.Logger.Println("TaskFailed:", text) 40 | } 41 | 42 | func onSynthesisResult(data []byte, param interface{}) { 43 | p, ok := param.(*TtsUserParam) 44 | if !ok { 45 | log.Default().Fatal("invalid logger") 46 | return 47 | } 48 | p.F.Write(data) 49 | } 50 | 51 | func onCompleted(text string, param interface{}) { 52 | p, ok := param.(*TtsUserParam) 53 | if !ok { 54 | log.Default().Fatal("invalid logger") 55 | return 56 | } 57 | 58 | p.Logger.Println("onCompleted:", text) 59 | } 60 | 61 | func onClose(param interface{}) { 62 | p, ok := param.(*TtsUserParam) 63 | if !ok { 64 | log.Default().Fatal("invalid logger") 65 | return 66 | } 67 | 68 | p.Logger.Println("onClosed:") 69 | } 70 | 71 | func waitReady(ch chan bool, logger *nls.NlsLogger) error { 72 | select { 73 | case done := <-ch: 74 | { 75 | if !done { 76 | logger.Println("Wait failed") 77 | return errors.New("wait failed") 78 | } 79 | logger.Println("Wait done") 80 | } 81 | case <-time.After(60 * time.Second): 82 | { 83 | logger.Println("Wait timeout") 84 | return errors.New("wait timeout") 85 | } 86 | } 87 | return nil 88 | } 89 | 90 | var lk sync.Mutex 91 | var fail = 0 92 | var reqNum = 0 93 | 94 | const ( 95 | TEXT = "你好小德,今天天气怎么样。" 96 | ) 97 | 98 | func testMultiInstance(num int) { 99 | param := nls.DefaultSpeechSynthesisParam() 100 | config := nls.NewConnectionConfigWithToken(nls.DEFAULT_URL, 101 | APPKEY, TOKEN) 102 | var wg sync.WaitGroup 103 | for i := 0; i < num; i++ { 104 | wg.Add(1) 105 | go func(id int) { 106 | defer wg.Done() 107 | strId := fmt.Sprintf("ID%d ", id) 108 | fname := fmt.Sprintf("ttsdump%d.wav", id) 109 | ttsUserParam := new(TtsUserParam) 110 | fout, err := os.OpenFile(fname, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0666) 111 | logger := nls.NewNlsLogger(os.Stderr, strId, log.LstdFlags|log.Lmicroseconds) 112 | logger.SetLogSil(false) 113 | logger.SetDebug(true) 114 | logger.Printf("Test Normal Case for SpeechRecognition:%s", strId) 115 | ttsUserParam.F = fout 116 | ttsUserParam.Logger = logger 117 | //third param control using realtime long text tts 118 | tts, err := nls.NewSpeechSynthesis(config, logger, false, 119 | onTaskFailed, onSynthesisResult, nil, 120 | onCompleted, onClose, ttsUserParam) 121 | if err != nil { 122 | logger.Fatalln(err) 123 | return 124 | } 125 | 126 | for { 127 | lk.Lock() 128 | reqNum++ 129 | lk.Unlock() 130 | logger.Println("SR start") 131 | ch, err := tts.Start(TEXT, param, nil) 132 | if err != nil { 133 | lk.Lock() 134 | fail++ 135 | lk.Unlock() 136 | tts.Shutdown() 137 | continue 138 | } 139 | 140 | err = waitReady(ch, logger) 141 | if err != nil { 142 | lk.Lock() 143 | fail++ 144 | lk.Unlock() 145 | tts.Shutdown() 146 | continue 147 | } 148 | logger.Println("Synthesis done") 149 | tts.Shutdown() 150 | } 151 | }(i) 152 | } 153 | 154 | wg.Wait() 155 | } 156 | 157 | func main() { 158 | go func() { 159 | log.Default().Println(http.ListenAndServe(":6060", nil)) 160 | }() 161 | coroutineId := flag.Int("num", 1, "coroutine number") 162 | flag.Parse() 163 | log.Default().Printf("start %d coroutines", *coroutineId) 164 | 165 | c := make(chan os.Signal, 1) 166 | signal.Notify(c, os.Interrupt) 167 | go func() { 168 | for range c { 169 | lk.Lock() 170 | log.Printf(">>>>>>>>REQ NUM: %d>>>>>>>>>FAIL: %d", reqNum, fail) 171 | lk.Unlock() 172 | os.Exit(0) 173 | } 174 | }() 175 | testMultiInstance(*coroutineId) 176 | } 177 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | /* 2 | utils.go 3 | 4 | Copyright 1999-present Alibaba Group Holding Ltd. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package nls 20 | 21 | import ( 22 | "io" 23 | "strings" 24 | 25 | uuid "github.com/satori/go.uuid" 26 | ) 27 | 28 | const ( 29 | SDK_VERSION = "0.0.1fix" 30 | SDK_NAME = "nls-go-sdk" 31 | SDK_LANGUAGE = "go" 32 | 33 | //AFORMAT 34 | PCM = "pcm" 35 | WAV = "wav" 36 | OPUS = "opus" 37 | OPU = "opu" 38 | 39 | //token 40 | DEFAULT_DISTRIBUTE = "cn-shanghai" 41 | DEFAULT_DOMAIN = "nls-meta.cn-shanghai.aliyuncs.com" 42 | DEFAULT_VERSION = "2019-02-28" 43 | 44 | DEFAULT_SEC_WEBSOCKET_KEY = "x3JJHMbDL1EzLkh9GBhXDw==" 45 | DEFAULT_SEC_WEBSOCKET_VER = "13" 46 | 47 | DEFAULT_X_NLS_TOKEN_KEY = "X-NLS-Token" 48 | 49 | DEFAULT_URL = "wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1" 50 | 51 | TASK_FAILED_NAME = "TaskFailed" 52 | CUSTOM_DEFINED_NAME = "CustomDefined" 53 | 54 | AUDIO_FORMAT_KEY = "format" 55 | SAMPLE_RATE_KEY = "sample_rate" 56 | ENABLE_INTERMEDIATE_KEY = "enable_intermediate_result" 57 | ENABLE_PP_KEY = "enable_punctuation_prediction" 58 | ENABLE_ITN_KEY = "enable_inverse_text_normalization" 59 | ) 60 | 61 | type Chunk struct { 62 | Data []byte 63 | } 64 | 65 | type ChunkBuffer struct { 66 | Data []*Chunk 67 | } 68 | 69 | type TokenResult struct { 70 | UserId string `json:"UserId"` 71 | Id string `json:"Id"` 72 | ExpireTime int64 `json:"ExpireTime"` 73 | } 74 | 75 | type TokenResultMessage struct { 76 | ErrMsg string `json:"ErrMsg"` 77 | TokenResult TokenResult `json:"Token"` 78 | } 79 | 80 | type Header struct { 81 | MessageId string `json:"message_id"` 82 | TaskId string `json:"task_id"` 83 | Namespace string `json:"namespace"` 84 | Name string `json:"name"` 85 | Appkey string `json:"appkey"` 86 | } 87 | 88 | type SDK struct { 89 | Name string `json:"name"` 90 | Version string `json:"version"` 91 | Language string `json:"language"` 92 | } 93 | 94 | type Context struct { 95 | Sdk SDK `json:"sdk"` 96 | App map[string]interface{} `json:"app,omitempty"` 97 | System map[string]interface{} `json:"system,omitempty"` 98 | Device map[string]interface{} `json:"device,omitempty"` 99 | Network map[string]interface{} `json:"network,omitempty"` 100 | Geography map[string]interface{} `json:"geography,omitempty"` 101 | Bridge map[string]interface{} `json:"bridge,omitempty"` 102 | Custom map[string]interface{} `json:"custom,omitempty"` 103 | } 104 | 105 | var DefaultContext = Context{ 106 | Sdk: SDK{ 107 | Name: SDK_NAME, 108 | Version: SDK_VERSION, 109 | Language: SDK_LANGUAGE, 110 | }, 111 | } 112 | 113 | type CommonResponse struct { 114 | Header Header `json:"header"` 115 | Payload map[string]interface{} `json:"payload,omitempty"` 116 | } 117 | 118 | type CommonRequest struct { 119 | Header Header `json:"header"` 120 | Payload map[string]interface{} `json:"payload,omitempty"` 121 | Context Context `json:"context"` 122 | } 123 | 124 | func LoadPcmInChunk(r io.Reader, chunkSize int) *ChunkBuffer { 125 | buffer := new(ChunkBuffer) 126 | buffer.Data = make([]*Chunk, 0) 127 | for { 128 | chunk := new(Chunk) 129 | chunk.Data = make([]byte, chunkSize) 130 | i, err := r.Read(chunk.Data) 131 | if err == io.EOF { 132 | break 133 | } else { 134 | if i != chunkSize { 135 | chunk2 := new(Chunk) 136 | chunk2.Data = make([]byte, i) 137 | copy(chunk2.Data, chunk.Data) 138 | chunk = chunk2 139 | } 140 | buffer.Data = append(buffer.Data, chunk) 141 | } 142 | } 143 | 144 | return buffer 145 | } 146 | 147 | func getUuid() string { 148 | return strings.ReplaceAll(uuid.NewV4().String(), "-", "") 149 | } 150 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/aliyun/alibaba-cloud-sdk-go v1.61.1376 h1:lExo7heZgdFn5AbaNJEllbA0KSJ/Z8T7MphvMREJOOo= 2 | github.com/aliyun/alibaba-cloud-sdk-go v1.61.1376/go.mod h1:9CMdKNL3ynIGPpfTcdwTvIm8SGuAZYYC4jFVSSvE1YQ= 3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/goji/httpauth v0.0.0-20160601135302-2da839ab0f4d/go.mod h1:nnjvkQ9ptGaCkuDUx6wNykzzlUixGxvkme+H/lnzb+A= 6 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 7 | github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= 8 | github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= 9 | github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= 10 | github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= 11 | github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= 12 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= 13 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= 14 | github.com/json-iterator/go v1.1.5/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= 15 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 16 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 17 | github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= 18 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 19 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 20 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 21 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 22 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 23 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 24 | github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= 25 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 26 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 27 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 28 | github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= 29 | github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= 30 | github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= 31 | github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= 32 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 33 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 34 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 35 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 36 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 37 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 38 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 39 | golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 40 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 41 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 42 | gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= 43 | gopkg.in/ini.v1 v1.66.2 h1:XfR1dOYubytKy4Shzc2LHrrGhU0lDCfDGG1yLPmpgsI= 44 | gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= 45 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 46 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 47 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 48 | -------------------------------------------------------------------------------- /tests/sr/test_sr.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "net/http" 9 | _ "net/http/pprof" 10 | "os" 11 | "os/signal" 12 | "sync" 13 | "time" 14 | 15 | "github.com/aliyun/alibabacloud-nls-go-sdk" 16 | ) 17 | 18 | const ( 19 | AKID = "Your AKID" 20 | AKKEY = "Your AKKEY" 21 | //online key 22 | APPKEY = "Your APPKEY" 23 | TOKEN = "TEST TOKEN" 24 | ) 25 | 26 | func onTaskFailed(text string, param interface{}) { 27 | logger, ok := param.(*nls.NlsLogger) 28 | if !ok { 29 | log.Default().Fatal("invalid logger") 30 | return 31 | } 32 | 33 | logger.Println("TaskFailed:", text) 34 | } 35 | 36 | func onStarted(text string, param interface{}) { 37 | logger, ok := param.(*nls.NlsLogger) 38 | if !ok { 39 | log.Default().Fatal("invalid logger") 40 | return 41 | } 42 | 43 | logger.Println("onStarted:", text) 44 | } 45 | 46 | func onResultChanged(text string, param interface{}) { 47 | logger, ok := param.(*nls.NlsLogger) 48 | if !ok { 49 | log.Default().Fatal("invalid logger") 50 | return 51 | } 52 | 53 | logger.Println("onResultChanged:", text) 54 | } 55 | 56 | func onCompleted(text string, param interface{}) { 57 | logger, ok := param.(*nls.NlsLogger) 58 | if !ok { 59 | log.Default().Fatal("invalid logger") 60 | return 61 | } 62 | 63 | logger.Println("onCompleted:", text) 64 | } 65 | 66 | func onClose(param interface{}) { 67 | logger, ok := param.(*nls.NlsLogger) 68 | if !ok { 69 | log.Default().Fatal("invalid logger") 70 | return 71 | } 72 | 73 | logger.Println("onClosed:") 74 | } 75 | 76 | func waitReady(ch chan bool, logger *nls.NlsLogger) error { 77 | select { 78 | case done := <-ch: 79 | { 80 | if !done { 81 | logger.Println("Wait failed") 82 | return errors.New("wait failed") 83 | } 84 | logger.Println("Wait done") 85 | } 86 | case <-time.After(20 * time.Second): 87 | { 88 | logger.Println("Wait timeout") 89 | return errors.New("wait timeout") 90 | } 91 | } 92 | return nil 93 | } 94 | 95 | var lk sync.Mutex 96 | var fail = 0 97 | var reqNum = 0 98 | 99 | func testMultiInstance(num int) { 100 | pcm, err := os.Open("tests/test1.pcm") 101 | if err != nil { 102 | log.Default().Fatalln(err) 103 | } 104 | 105 | buffers := nls.LoadPcmInChunk(pcm, 320) 106 | param := nls.DefaultSpeechRecognitionParam() 107 | config := nls.NewConnectionConfigWithToken(nls.DEFAULT_URL, 108 | APPKEY, TOKEN) 109 | var wg sync.WaitGroup 110 | for i := 0; i < num; i++ { 111 | wg.Add(1) 112 | go func(id int) { 113 | defer wg.Done() 114 | strId := fmt.Sprintf("ID%d ", id) 115 | logger := nls.NewNlsLogger(os.Stderr, strId, log.LstdFlags|log.Lmicroseconds) 116 | logger.SetLogSil(false) 117 | logger.SetDebug(true) 118 | logger.Printf("Test Normal Case for SpeechRecognition:%s", strId) 119 | sr, err := nls.NewSpeechRecognition(config, logger, 120 | onTaskFailed, onStarted, onResultChanged, 121 | onCompleted, onClose, logger) 122 | if err != nil { 123 | logger.Fatalln(err) 124 | return 125 | } 126 | 127 | test_ex := make(map[string]interface{}) 128 | test_ex["test"] = "hello" 129 | 130 | for { 131 | lk.Lock() 132 | reqNum++ 133 | lk.Unlock() 134 | logger.Println("SR start") 135 | ready, err := sr.Start(param, test_ex) 136 | if err != nil { 137 | lk.Lock() 138 | fail++ 139 | lk.Unlock() 140 | sr.Shutdown() 141 | continue 142 | } 143 | 144 | err = waitReady(ready, logger) 145 | if err != nil { 146 | lk.Lock() 147 | fail++ 148 | lk.Unlock() 149 | sr.Shutdown() 150 | continue 151 | } 152 | 153 | for _, data := range buffers.Data { 154 | if data != nil { 155 | sr.SendAudioData(data.Data) 156 | time.Sleep(10 * time.Millisecond) 157 | } 158 | } 159 | 160 | logger.Println("send audio done") 161 | ready, err = sr.Stop() 162 | if err != nil { 163 | lk.Lock() 164 | fail++ 165 | lk.Unlock() 166 | sr.Shutdown() 167 | continue 168 | } 169 | 170 | err = waitReady(ready, logger) 171 | if err != nil { 172 | lk.Lock() 173 | fail++ 174 | lk.Unlock() 175 | sr.Shutdown() 176 | continue 177 | } 178 | 179 | logger.Println("Sr done") 180 | sr.Shutdown() 181 | } 182 | }(i) 183 | } 184 | 185 | wg.Wait() 186 | } 187 | 188 | func main() { 189 | go func() { 190 | log.Default().Println(http.ListenAndServe(":6060", nil)) 191 | }() 192 | coroutineId := flag.Int("num", 1, "coroutine number") 193 | flag.Parse() 194 | log.Default().Printf("start %d coroutines", *coroutineId) 195 | 196 | c := make(chan os.Signal, 1) 197 | signal.Notify(c, os.Interrupt) 198 | go func() { 199 | for range c { 200 | lk.Lock() 201 | log.Printf(">>>>>>>>REQ NUM: %d>>>>>>>>>FAIL: %d", reqNum, fail) 202 | lk.Unlock() 203 | os.Exit(0) 204 | } 205 | }() 206 | testMultiInstance(*coroutineId) 207 | } 208 | -------------------------------------------------------------------------------- /ws.go: -------------------------------------------------------------------------------- 1 | /* 2 | ws.go 3 | 4 | Copyright 1999-present Alibaba Group Holding Ltd. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package nls 20 | 21 | import ( 22 | "errors" 23 | "time" 24 | 25 | "github.com/gorilla/websocket" 26 | "net/http" 27 | ) 28 | 29 | type wsConnection struct { 30 | connection *websocket.Conn 31 | 32 | recvf func(bool, []byte) 33 | closef func(int, string, error) 34 | 35 | logger *NlsLogger 36 | } 37 | 38 | func newWsConnection(url string, token string, handshakeTimeout time.Duration, 39 | readBufferSize int, writeBufferSize int, logger *NlsLogger, 40 | recvHandler func(rawData bool, data []byte), 41 | closeHandler func(code int, text string, err error)) (*wsConnection, error) { 42 | if recvHandler == nil { 43 | return nil, errors.New("empty recvHandler") 44 | } 45 | 46 | connection := new(wsConnection) 47 | if logger == nil { 48 | connection.logger = DefaultNlsLog() 49 | } else { 50 | connection.logger = logger 51 | } 52 | 53 | retry := 0 54 | for { 55 | err := connection.issueWsConnect(url, token, handshakeTimeout, readBufferSize, writeBufferSize) 56 | if err != nil { 57 | if err.Error() == "EOF" { 58 | connection.logger.Debugf("connection(%p) connect failed: %s retry: %d", connection, err, retry) 59 | retry++ 60 | if retry >= 5 { 61 | return nil, err 62 | } 63 | time.Sleep(10 * time.Millisecond) 64 | } else { 65 | connection.logger.Debugf("connection(%p) connect failed: %s", connection, err) 66 | return nil, err 67 | } 68 | } else { 69 | break 70 | } 71 | } 72 | connection.logger.Debugln("underlying network info:", 73 | connection.connection.UnderlyingConn().LocalAddr().String()) 74 | 75 | connection.recvf = recvHandler 76 | connection.startResultHandler() 77 | 78 | if closeHandler != nil { 79 | connection.closef = closeHandler 80 | connection.setCloseHandler() 81 | } 82 | 83 | return connection, nil 84 | } 85 | 86 | func (conn *wsConnection) issueWsConnect(url string, token string, handshakeTimeout time.Duration, readBufferSize int, writeBufferSize int) error { 87 | header := http.Header{ 88 | DEFAULT_X_NLS_TOKEN_KEY: []string{token}, 89 | } 90 | 91 | dialer := websocket.Dialer{ 92 | HandshakeTimeout: handshakeTimeout, 93 | ReadBufferSize: readBufferSize, 94 | WriteBufferSize: writeBufferSize, 95 | } 96 | 97 | c, _, err := dialer.Dial(url, header) 98 | if err != nil { 99 | return err 100 | } 101 | 102 | conn.connection = c 103 | return nil 104 | } 105 | 106 | func (conn *wsConnection) setPingInterval(timeout time.Duration) { 107 | if conn.connection == nil { 108 | return 109 | } 110 | 111 | conn.connection.SetPongHandler(func(data string) error { 112 | //do nothing 113 | return nil 114 | }) 115 | 116 | go func() { 117 | for { 118 | select { 119 | case <-time.After(timeout): 120 | if conn != nil { 121 | err := conn.connection.WriteMessage(websocket.PingMessage, []byte{}) 122 | if err != nil { 123 | conn.logger.Debugln("write ping msg failed:", err) 124 | return 125 | } 126 | } 127 | } 128 | } 129 | }() 130 | } 131 | 132 | func (conn *wsConnection) sendTextData(data string) error { 133 | if conn == nil { 134 | return errors.New("nil connection in sendTextData") 135 | } 136 | 137 | conn.logger.Debugln("ws write:", data) 138 | return conn.connection.WriteMessage(websocket.TextMessage, []byte(data)) 139 | } 140 | 141 | func (conn *wsConnection) sendRequest(req CommonRequest) error { 142 | if conn == nil { 143 | return errors.New("nil connection in sendTextData") 144 | } 145 | 146 | return conn.connection.WriteJSON(req) 147 | } 148 | 149 | func (conn *wsConnection) sendBinary(bin []byte) error { 150 | if conn == nil || bin == nil || len(bin) == 0 { 151 | return errors.New("invalid params: nil connection or empty binary") 152 | } 153 | 154 | return conn.connection.WriteMessage(websocket.BinaryMessage, bin) 155 | } 156 | 157 | func (conn *wsConnection) startResultHandler() { 158 | if conn == nil { 159 | return 160 | } 161 | 162 | go func() { 163 | for { 164 | mtype, resp, err := conn.connection.ReadMessage() 165 | if err != nil { 166 | return 167 | } 168 | 169 | raw := false 170 | if mtype == websocket.BinaryMessage { 171 | raw = true 172 | } 173 | 174 | if conn.recvf != nil { 175 | conn.recvf(raw, resp) 176 | } 177 | } 178 | }() 179 | } 180 | 181 | func (conn *wsConnection) setCloseHandler() { 182 | if conn == nil { 183 | return 184 | } 185 | 186 | conn.connection.SetCloseHandler(func(code int, text string) error { 187 | conn.logger.Debugf("connection %p closed", conn) 188 | err := conn.connection.Close() 189 | if conn.closef != nil { 190 | conn.closef(code, text, err) 191 | } 192 | return err 193 | }) 194 | } 195 | 196 | func (conn *wsConnection) shutdown() error { 197 | if conn == nil { 198 | return nil 199 | } 200 | 201 | return conn.connection.Close() 202 | } 203 | -------------------------------------------------------------------------------- /tests/st/test_st.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "net/http" 9 | _ "net/http/pprof" 10 | "os" 11 | "os/signal" 12 | "sync" 13 | "time" 14 | 15 | nls "github.com/aliyun/alibabacloud-nls-go-sdk" 16 | ) 17 | 18 | const ( 19 | AKID = "Your AKID" 20 | AKKEY = "Your AKKEY" 21 | //online key 22 | APPKEY = "Your APPKEY" 23 | TOKEN = "TEST TOKEN" 24 | ) 25 | 26 | func onTaskFailed(text string, param interface{}) { 27 | logger, ok := param.(*nls.NlsLogger) 28 | if !ok { 29 | log.Default().Fatal("invalid logger") 30 | return 31 | } 32 | 33 | logger.Println("TaskFailed:", text) 34 | } 35 | 36 | func onStarted(text string, param interface{}) { 37 | logger, ok := param.(*nls.NlsLogger) 38 | if !ok { 39 | log.Default().Fatal("invalid logger") 40 | return 41 | } 42 | 43 | logger.Println("onStarted:", text) 44 | } 45 | 46 | func onSentenceBegin(text string, param interface{}) { 47 | logger, ok := param.(*nls.NlsLogger) 48 | if !ok { 49 | log.Default().Fatal("invalid logger") 50 | return 51 | } 52 | 53 | logger.Println("onSentenceBegin:", text) 54 | } 55 | 56 | func onSentenceEnd(text string, param interface{}) { 57 | logger, ok := param.(*nls.NlsLogger) 58 | if !ok { 59 | log.Default().Fatal("invalid logger") 60 | return 61 | } 62 | 63 | logger.Println("onSentenceEnd:", text) 64 | } 65 | 66 | func onResultChanged(text string, param interface{}) { 67 | logger, ok := param.(*nls.NlsLogger) 68 | if !ok { 69 | log.Default().Fatal("invalid logger") 70 | return 71 | } 72 | 73 | logger.Println("onResultChanged:", text) 74 | } 75 | 76 | func onCompleted(text string, param interface{}) { 77 | logger, ok := param.(*nls.NlsLogger) 78 | if !ok { 79 | log.Default().Fatal("invalid logger") 80 | return 81 | } 82 | 83 | logger.Println("onCompleted:", text) 84 | } 85 | 86 | func onResultTranslated(text string, param interface{}) { 87 | logger, ok := param.(*nls.NlsLogger) 88 | if !ok { 89 | log.Default().Fatal("invalid logger") 90 | return 91 | } 92 | logger.Println("onCustomHandler:", text) 93 | } 94 | 95 | func onClose(param interface{}) { 96 | logger, ok := param.(*nls.NlsLogger) 97 | if !ok { 98 | log.Default().Fatal("invalid logger") 99 | return 100 | } 101 | 102 | logger.Println("onClosed:") 103 | } 104 | 105 | func waitReady(ch chan bool, logger *nls.NlsLogger) error { 106 | select { 107 | case done := <-ch: 108 | { 109 | if !done { 110 | logger.Println("Wait failed") 111 | return errors.New("wait failed") 112 | } 113 | logger.Println("Wait done") 114 | } 115 | case <-time.After(20 * time.Second): 116 | { 117 | logger.Println("Wait timeout") 118 | return errors.New("wait timeout") 119 | } 120 | } 121 | return nil 122 | } 123 | 124 | var lk sync.Mutex 125 | var fail = 0 126 | var reqNum = 0 127 | 128 | func testMultiInstance(num int) { 129 | pcm, err := os.Open("tests/test1.pcm") 130 | if err != nil { 131 | log.Default().Fatalln(err) 132 | } 133 | 134 | buffers := nls.LoadPcmInChunk(pcm, 320) 135 | param := nls.DefaultSpeechTranscriptionParam() 136 | config := nls.NewConnectionConfigWithToken(nls.DEFAULT_URL, 137 | APPKEY, TOKEN) 138 | var wg sync.WaitGroup 139 | for i := 0; i < num; i++ { 140 | wg.Add(1) 141 | go func(id int) { 142 | defer wg.Done() 143 | strId := fmt.Sprintf("ID%d ", id) 144 | logger := nls.NewNlsLogger(os.Stderr, strId, log.LstdFlags|log.Lmicroseconds) 145 | logger.SetLogSil(false) 146 | logger.SetDebug(true) 147 | logger.Printf("Test Normal Case for SpeechRecognition:%s", strId) 148 | st, err := nls.NewSpeechTranscription(config, logger, 149 | onTaskFailed, onStarted, 150 | onSentenceBegin, onSentenceEnd, onResultChanged, 151 | onCompleted, onClose, logger) 152 | if err != nil { 153 | logger.Fatalln(err) 154 | return 155 | } 156 | st.SetCustomHandler("ResultTranslated", onResultTranslated) 157 | test_ex := make(map[string]interface{}) 158 | test_ex["test"] = "hello" 159 | 160 | for { 161 | lk.Lock() 162 | reqNum++ 163 | lk.Unlock() 164 | logger.Println("ST start") 165 | ready, err := st.Start(param, test_ex) 166 | if err != nil { 167 | lk.Lock() 168 | fail++ 169 | lk.Unlock() 170 | st.Shutdown() 171 | continue 172 | } 173 | 174 | err = waitReady(ready, logger) 175 | if err != nil { 176 | lk.Lock() 177 | fail++ 178 | lk.Unlock() 179 | st.Shutdown() 180 | continue 181 | } 182 | 183 | for _, data := range buffers.Data { 184 | if data != nil { 185 | st.SendAudioData(data.Data) 186 | time.Sleep(10 * time.Millisecond) 187 | } 188 | } 189 | 190 | logger.Println("send audio done") 191 | ready, err = st.Stop() 192 | if err != nil { 193 | lk.Lock() 194 | fail++ 195 | lk.Unlock() 196 | st.Shutdown() 197 | continue 198 | } 199 | 200 | err = waitReady(ready, logger) 201 | if err != nil { 202 | lk.Lock() 203 | fail++ 204 | lk.Unlock() 205 | st.Shutdown() 206 | continue 207 | } 208 | 209 | logger.Println("Sr done") 210 | st.Shutdown() 211 | } 212 | }(i) 213 | } 214 | 215 | wg.Wait() 216 | } 217 | 218 | func main() { 219 | go func() { 220 | log.Default().Println(http.ListenAndServe(":6060", nil)) 221 | }() 222 | coroutineId := flag.Int("num", 1, "coroutine number") 223 | flag.Parse() 224 | log.Default().Printf("start %d coroutines", *coroutineId) 225 | 226 | c := make(chan os.Signal, 1) 227 | signal.Notify(c, os.Interrupt) 228 | go func() { 229 | for range c { 230 | lk.Lock() 231 | log.Printf(">>>>>>>>REQ NUM: %d>>>>>>>>>FAIL: %d", reqNum, fail) 232 | lk.Unlock() 233 | os.Exit(0) 234 | } 235 | }() 236 | testMultiInstance(*coroutineId) 237 | } 238 | -------------------------------------------------------------------------------- /core.go: -------------------------------------------------------------------------------- 1 | /* 2 | core.go 3 | 4 | Copyright 1999-present Alibaba Group Holding Ltd. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package nls 20 | 21 | import ( 22 | "encoding/json" 23 | "errors" 24 | "fmt" 25 | "time" 26 | ) 27 | 28 | const ( 29 | CONNECTED_HANDLER = "CONNECTED_HANDLER" 30 | CLOSE_HANDLER = "CLOSE_HANDLER" 31 | RAW_HANDLER = "RAW_HANDLER" 32 | ) 33 | 34 | type ConnectionConfig struct { 35 | Url string `json:"url"` 36 | Token string `json:"token"` 37 | Akid string `json:"akid"` 38 | Akkey string `json:"akkey"` 39 | Appkey string `json:"appkey"` 40 | Rbuffer int `json:"rbuffer"` 41 | Wbuffer int `json:"wbuffer"` 42 | } 43 | 44 | func NewConnectionConfigWithAKInfoDefault(url string, appkey string, 45 | akid string, akkey string) (*ConnectionConfig, error) { 46 | tokenMsg, err := GetToken(DEFAULT_DISTRIBUTE, DEFAULT_DOMAIN, akid, akkey, DEFAULT_VERSION) 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | if tokenMsg.TokenResult.Id == "" { 52 | str := fmt.Sprintf("obtain empty token err:%s", tokenMsg.ErrMsg) 53 | return nil, errors.New(str) 54 | } 55 | 56 | return NewConnectionConfigWithToken(url, appkey, tokenMsg.TokenResult.Id), nil 57 | } 58 | 59 | func NewConnectionConfigWithToken(url string, appkey string, token string) *ConnectionConfig { 60 | config := new(ConnectionConfig) 61 | config.Url = url 62 | config.Appkey = appkey 63 | config.Token = token 64 | config.Rbuffer = 1024 65 | config.Wbuffer = 4096 66 | return config 67 | } 68 | 69 | func NewConnectionConfigFromJson(jsonStr string) (*ConnectionConfig, error) { 70 | config := ConnectionConfig{} 71 | err := json.Unmarshal([]byte(jsonStr), &config) 72 | if err != nil { 73 | return nil, err 74 | } 75 | 76 | if config.Url == "" || config.Appkey == "" { 77 | return nil, errors.New("invalid connection config: no url or appkey") 78 | } 79 | 80 | if config.Token == "" { 81 | if config.Akid == "" || config.Akkey == "" { 82 | return nil, errors.New("invalid connection config: if no token provided, must provide akid and akkey") 83 | } 84 | return NewConnectionConfigWithAKInfoDefault(config.Url, config.Appkey, config.Akid, config.Akkey) 85 | } else { 86 | return NewConnectionConfigWithToken(config.Url, config.Appkey, config.Token), nil 87 | } 88 | } 89 | 90 | type nlsProto struct { 91 | proto *commonProto 92 | conn *wsConnection 93 | connConfig *ConnectionConfig 94 | logger *NlsLogger 95 | taskId string 96 | param interface{} 97 | } 98 | 99 | type commonProto struct { 100 | namespace string 101 | handlers map[string]func(isErr bool, text []byte, proto *nlsProto) 102 | } 103 | 104 | func newNlsProto(connConfig *ConnectionConfig, 105 | proto *commonProto, logger *NlsLogger, param interface{}) (*nlsProto, error) { 106 | if connConfig == nil || proto == nil { 107 | return nil, errors.New("connConfig or proto is nil") 108 | } 109 | if proto.handlers == nil { 110 | return nil, errors.New("invalid proto: nil handler") 111 | } 112 | 113 | nls := new(nlsProto) 114 | nls.connConfig = connConfig 115 | nls.proto = proto 116 | if logger == nil { 117 | nls.logger = DefaultNlsLog() 118 | } else { 119 | nls.logger = logger 120 | } 121 | 122 | nls.param = param 123 | return nls, nil 124 | } 125 | 126 | func (nls *nlsProto) Connect() error { 127 | if nls.conn != nil { 128 | nls.conn.shutdown() 129 | time.Sleep(time.Millisecond * 100) 130 | } 131 | 132 | ws, err := newWsConnection(nls.connConfig.Url, 133 | nls.connConfig.Token, 10*time.Second, nls.connConfig.Rbuffer, 134 | nls.connConfig.Wbuffer, nls.logger, 135 | //recv frame 136 | func(rawData bool, data []byte) { 137 | if rawData { 138 | handler, ok := nls.proto.handlers[RAW_HANDLER] 139 | if !ok { 140 | nls.logger.Fatal("NO RAW_HANDLER BUT recv RAW FRAME") 141 | return 142 | } else { 143 | handler(false, data, nls) 144 | } 145 | } else { 146 | nls.logger.Debugf("recv raw frame:%s", string(data)) 147 | resp := CommonResponse{} 148 | err := json.Unmarshal(data, &resp) 149 | if err != nil { 150 | nls.logger.Println("OCCUR UNKNOWN PROTO:", err) 151 | return 152 | } 153 | 154 | if resp.Header.Namespace != "Default" && resp.Header.Namespace != nls.proto.namespace { 155 | nls.logger.Fatalf("WTF namespace mismatch expect %s but %s", nls.proto.namespace, resp.Header.Namespace) 156 | return 157 | } 158 | handler, ok := nls.proto.handlers[resp.Header.Name] 159 | if !ok { 160 | nls.logger.Printf("no handler for %s", resp.Header.Name) 161 | if cust_handler, ok := nls.proto.handlers[CUSTOM_DEFINED_NAME]; ok { 162 | nls.logger.Println("using custom handler for", resp.Header.Name) 163 | cust_handler(false, data, nls) 164 | } else { 165 | nls.logger.Println("no custom handler for", resp.Header.Name) 166 | } 167 | return 168 | } 169 | handler(false, data, nls) 170 | } 171 | }, 172 | //close 173 | func(code int, text string, err error) { 174 | handler, ok := nls.proto.handlers[CLOSE_HANDLER] 175 | if ok { 176 | handler(true, []byte(text), nls) 177 | } 178 | }) 179 | if err != nil { 180 | return err 181 | } 182 | 183 | nls.conn = ws 184 | nls.logger.Println("connect done") 185 | handler, ok := nls.proto.handlers[CONNECTED_HANDLER] 186 | if ok { 187 | handler(false, nil, nls) 188 | } else { 189 | nls.logger.Println("no onConnected handler") 190 | } 191 | 192 | return nil 193 | } 194 | 195 | func (nls *nlsProto) shutdown() error { 196 | if nls.conn == nil { 197 | return errors.New("nls proto is nil") 198 | } 199 | return nls.conn.shutdown() 200 | } 201 | 202 | func (nls *nlsProto) cmd(cmd string) error { 203 | if nls.conn == nil { 204 | return errors.New("nls proto is nil") 205 | } 206 | 207 | return nls.conn.sendTextData(cmd) 208 | } 209 | 210 | func (nls *nlsProto) sendRawData(data []byte) error { 211 | if nls.conn == nil { 212 | return errors.New("nls proto is nil") 213 | } 214 | 215 | return nls.conn.sendBinary(data) 216 | } 217 | -------------------------------------------------------------------------------- /tts.go: -------------------------------------------------------------------------------- 1 | /* 2 | tts.go 3 | 4 | Copyright 1999-present Alibaba Group Holding Ltd. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package nls 20 | 21 | import ( 22 | "encoding/json" 23 | "errors" 24 | "log" 25 | "sync" 26 | ) 27 | 28 | const ( 29 | //namespace field 30 | TTS_NAMESPACE = "SpeechSynthesizer" 31 | TTS_LONG_NAMESPACE = "SpeechLongSynthesizer" 32 | //name field 33 | TTS_START_NAME = "StartSynthesis" 34 | TTS_COMPLETED_NAME = "SynthesisCompleted" 35 | TTS_METAINFO_NAME = "MetaInfo" 36 | ) 37 | 38 | type SpeechSynthesisStartParam struct { 39 | Voice string `json:"voice"` 40 | Format string `json:"format,omitempty"` 41 | SampleRate int `json:"sample_rate,omitempty"` 42 | Volume int `json:"volume"` 43 | SpeechRate int `json:"speech_rate"` 44 | PitchRate int `json:"pitch_rate"` 45 | EnableSubtitle bool `json:"enable_subtitle"` 46 | } 47 | 48 | func DefaultSpeechSynthesisParam() SpeechSynthesisStartParam { 49 | return SpeechSynthesisStartParam{ 50 | Voice: "xiaoyun", 51 | Format: "wav", 52 | SampleRate: 16000, 53 | Volume: 50, 54 | SpeechRate: 0, 55 | PitchRate: 0, 56 | EnableSubtitle: false, 57 | } 58 | } 59 | 60 | type SpeechSynthesis struct { 61 | nls *nlsProto 62 | taskId string 63 | 64 | completeChan chan bool 65 | lk sync.Mutex 66 | 67 | onTaskFailed func(text string, param interface{}) 68 | onSynthesisResult func(data []byte, param interface{}) 69 | onCompleted func(text string, param interface{}) 70 | onMetaInfo func(text string, param interface{}) 71 | onClose func(param interface{}) 72 | 73 | StartParam map[string]interface{} 74 | UserParam interface{} 75 | 76 | usingLong bool 77 | } 78 | 79 | func checkTtsNlsProto(proto *nlsProto) *SpeechSynthesis { 80 | if proto == nil { 81 | log.Default().Fatal("empty proto check failed") 82 | return nil 83 | } 84 | 85 | tts, ok := proto.param.(*SpeechSynthesis) 86 | if !ok { 87 | log.Default().Fatal("proto param not SpeechSynthesis instance") 88 | return nil 89 | } 90 | 91 | return tts 92 | } 93 | 94 | func onTtsTaskFailedHandler(isErr bool, text []byte, proto *nlsProto) { 95 | tts := checkTtsNlsProto(proto) 96 | if tts.onTaskFailed != nil { 97 | tts.onTaskFailed(string(text), tts.UserParam) 98 | } 99 | 100 | tts.lk.Lock() 101 | defer tts.lk.Unlock() 102 | if tts.completeChan != nil { 103 | tts.completeChan <- false 104 | close(tts.completeChan) 105 | tts.completeChan = nil 106 | } 107 | } 108 | 109 | func onTtsConnectedHandler(isErr bool, text []byte, proto *nlsProto) { 110 | tts := checkTtsNlsProto(proto) 111 | 112 | req := CommonRequest{} 113 | req.Context = DefaultContext 114 | req.Header.Appkey = tts.nls.connConfig.Appkey 115 | req.Header.MessageId = getUuid() 116 | req.Header.Name = TTS_START_NAME 117 | if tts.usingLong { 118 | req.Header.Namespace = TTS_LONG_NAMESPACE 119 | } else { 120 | req.Header.Namespace = TTS_NAMESPACE 121 | } 122 | req.Header.TaskId = tts.taskId 123 | req.Payload = tts.StartParam 124 | 125 | b, _ := json.Marshal(req) 126 | tts.nls.logger.Println("send:", string(b)) 127 | tts.nls.cmd(string(b)) 128 | } 129 | 130 | func onTtsCloseHandler(isErr bool, text []byte, proto *nlsProto) { 131 | tts := checkTtsNlsProto(proto) 132 | if tts.onClose != nil { 133 | tts.onClose(tts.UserParam) 134 | } 135 | 136 | tts.nls.shutdown() 137 | } 138 | 139 | func onTtsMetaInfoHandler(isErr bool, text []byte, proto *nlsProto) { 140 | tts := checkTtsNlsProto(proto) 141 | if tts.onMetaInfo != nil { 142 | tts.onMetaInfo(string(text), tts.UserParam) 143 | } 144 | } 145 | 146 | func onTtsRawResultHandler(isErr bool, text []byte, proto *nlsProto) { 147 | tts := checkTtsNlsProto(proto) 148 | if tts.onSynthesisResult != nil { 149 | tts.onSynthesisResult(text, tts.UserParam) 150 | } 151 | } 152 | 153 | func onTtsCompletedHandler(isErr bool, text []byte, proto *nlsProto) { 154 | tts := checkTtsNlsProto(proto) 155 | if tts.onCompleted != nil { 156 | tts.onCompleted(string(text), tts.UserParam) 157 | } 158 | 159 | tts.lk.Lock() 160 | defer tts.lk.Unlock() 161 | if tts.completeChan != nil { 162 | tts.completeChan <- true 163 | close(tts.completeChan) 164 | tts.completeChan = nil 165 | } 166 | } 167 | 168 | var ttsProto = commonProto{ 169 | namespace: TTS_NAMESPACE, 170 | handlers: map[string]func(bool, []byte, *nlsProto){ 171 | CLOSE_HANDLER: onTtsCloseHandler, 172 | CONNECTED_HANDLER: onTtsConnectedHandler, 173 | RAW_HANDLER: onTtsRawResultHandler, 174 | TTS_COMPLETED_NAME: onTtsCompletedHandler, 175 | TASK_FAILED_NAME: onTtsTaskFailedHandler, 176 | TTS_METAINFO_NAME: onTtsMetaInfoHandler, 177 | }, 178 | } 179 | 180 | func newSpeechSynthesisProto(isRealtime bool) *commonProto { 181 | if isRealtime { 182 | ttsProto.namespace = TTS_LONG_NAMESPACE 183 | } 184 | return &ttsProto 185 | } 186 | 187 | func NewSpeechSynthesis(config *ConnectionConfig, 188 | logger *NlsLogger, 189 | realtimeLongText bool, 190 | taskfailed func(string, interface{}), 191 | synthesisresult func([]byte, interface{}), 192 | metainfo func(string, interface{}), 193 | completed func(string, interface{}), 194 | closed func(interface{}), 195 | param interface{}) (*SpeechSynthesis, error) { 196 | tts := new(SpeechSynthesis) 197 | proto := newSpeechSynthesisProto(realtimeLongText) 198 | if logger == nil { 199 | logger = DefaultNlsLog() 200 | } 201 | 202 | nls, err := newNlsProto(config, proto, logger, tts) 203 | if err != nil { 204 | return nil, err 205 | } 206 | 207 | tts.nls = nls 208 | tts.UserParam = param 209 | tts.onTaskFailed = taskfailed 210 | tts.onSynthesisResult = synthesisresult 211 | tts.onMetaInfo = metainfo 212 | tts.onCompleted = completed 213 | tts.onClose = closed 214 | tts.usingLong = realtimeLongText 215 | return tts, nil 216 | } 217 | 218 | func (tts *SpeechSynthesis) Start(text string, 219 | param SpeechSynthesisStartParam, 220 | extra map[string]interface{}) (chan bool, error) { 221 | if tts.nls == nil { 222 | return nil, errors.New("empty nls: using NewSpeechSynthesis to create a valid instance") 223 | } 224 | 225 | b, err := json.Marshal(param) 226 | if err != nil { 227 | return nil, err 228 | } 229 | 230 | json.Unmarshal(b, &tts.StartParam) 231 | if extra != nil { 232 | if tts.StartParam == nil { 233 | tts.StartParam = extra 234 | } else { 235 | for k, v := range extra { 236 | tts.StartParam[k] = v 237 | } 238 | } 239 | } 240 | tts.StartParam["text"] = text 241 | tts.taskId = getUuid() 242 | err = tts.nls.Connect() 243 | if err != nil { 244 | return nil, err 245 | } 246 | 247 | tts.lk.Lock() 248 | defer tts.lk.Unlock() 249 | tts.completeChan = make(chan bool, 1) 250 | return tts.completeChan, nil 251 | } 252 | 253 | func (tts *SpeechSynthesis) Shutdown() { 254 | if tts.nls == nil { 255 | return 256 | } 257 | 258 | tts.lk.Lock() 259 | defer tts.lk.Unlock() 260 | tts.nls.shutdown() 261 | if tts.completeChan != nil { 262 | tts.completeChan <- false 263 | close(tts.completeChan) 264 | tts.completeChan = nil 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /sr.go: -------------------------------------------------------------------------------- 1 | /* 2 | sr.go 3 | 4 | Copyright 1999-present Alibaba Group Holding Ltd. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | 20 | 21 | package nls 22 | 23 | import ( 24 | "encoding/json" 25 | "errors" 26 | "log" 27 | "sync" 28 | ) 29 | 30 | const ( 31 | //namespace field 32 | SR_NAMESPACE = "SpeechRecognizer" 33 | 34 | //name field 35 | SR_START_NAME = "StartRecognition" 36 | SR_STOP_NAME = "StopRecognition" 37 | 38 | SR_STARTED_NAME = "RecognitionStarted" 39 | SR_RESULT_CHG_NAME = "RecognitionResultChanged" 40 | SR_COMPLETED_NAME = "RecognitionCompleted" 41 | ) 42 | 43 | type SpeechRecognitionStartParam struct { 44 | Format string `json:"format,omitempty"` 45 | SampleRate int `json:"sample_rate,omitempty"` 46 | EnableIntermediateResult bool `json:"enable_intermediate_result"` 47 | EnablePunctuationPrediction bool `json:"enable_punctuation_prediction"` 48 | EnableInverseTextNormalization bool `json:"enable_inverse_text_normalization"` 49 | } 50 | 51 | func DefaultSpeechRecognitionParam() SpeechRecognitionStartParam { 52 | return SpeechRecognitionStartParam{ 53 | Format: "pcm", 54 | SampleRate: 16000, 55 | EnableIntermediateResult: true, 56 | EnablePunctuationPrediction: true, 57 | EnableInverseTextNormalization: true, 58 | } 59 | } 60 | 61 | type SpeechRecognition struct { 62 | nls *nlsProto 63 | taskId string 64 | 65 | startCh chan bool 66 | stopCh chan bool 67 | lk sync.Mutex 68 | 69 | onTaskFailed func(text string, param interface{}) 70 | onStarted func(text string, param interface{}) 71 | onResultChanged func(text string, param interface{}) 72 | onCompleted func(text string, param interface{}) 73 | onClose func(param interface{}) 74 | 75 | StartParam map[string]interface{} 76 | UserParam interface{} 77 | } 78 | 79 | func checkSrNlsProto(proto *nlsProto) *SpeechRecognition { 80 | if proto == nil { 81 | log.Default().Fatal("empty proto check failed") 82 | return nil 83 | } 84 | 85 | sr, ok := proto.param.(*SpeechRecognition) 86 | if !ok { 87 | log.Default().Fatal("proto param not SpeechRecognition instance") 88 | return nil 89 | } 90 | 91 | return sr 92 | } 93 | 94 | func onSrTaskFailedHandler(isErr bool, text []byte, proto *nlsProto) { 95 | sr := checkSrNlsProto(proto) 96 | if sr.onTaskFailed != nil { 97 | sr.onTaskFailed(string(text), sr.UserParam) 98 | } 99 | 100 | sr.lk.Lock() 101 | defer sr.lk.Unlock() 102 | if sr.startCh != nil { 103 | sr.startCh <- false 104 | close(sr.startCh) 105 | sr.startCh = nil 106 | } 107 | 108 | if sr.stopCh != nil { 109 | sr.stopCh <- false 110 | close(sr.stopCh) 111 | sr.stopCh = nil 112 | } 113 | } 114 | 115 | func onSrConnectedHandler(isErr bool, text []byte, proto *nlsProto) { 116 | sr := checkSrNlsProto(proto) 117 | 118 | req := CommonRequest{} 119 | req.Context = DefaultContext 120 | req.Header.Appkey = sr.nls.connConfig.Appkey 121 | req.Header.MessageId = getUuid() 122 | req.Header.Name = SR_START_NAME 123 | req.Header.Namespace = SR_NAMESPACE 124 | req.Header.TaskId = sr.taskId 125 | req.Payload = sr.StartParam 126 | 127 | b, _ := json.Marshal(req) 128 | sr.nls.logger.Println("send:", string(b)) 129 | sr.nls.cmd(string(b)) 130 | } 131 | 132 | func onSrCloseHandler(isErr bool, text []byte, proto *nlsProto) { 133 | sr := checkSrNlsProto(proto) 134 | if sr.onClose != nil { 135 | sr.onClose(sr.UserParam) 136 | } 137 | 138 | sr.nls.shutdown() 139 | } 140 | 141 | func onSrStartedHandler(isErr bool, text []byte, proto *nlsProto) { 142 | sr := checkSrNlsProto(proto) 143 | if sr.onStarted != nil { 144 | sr.onStarted(string(text), sr.UserParam) 145 | } 146 | 147 | sr.lk.Lock() 148 | defer sr.lk.Unlock() 149 | if sr.startCh != nil { 150 | sr.startCh <- true 151 | close(sr.startCh) 152 | sr.startCh = nil 153 | } 154 | } 155 | 156 | func onSrResultChangedHandler(isErr bool, text []byte, proto *nlsProto) { 157 | sr := checkSrNlsProto(proto) 158 | if sr.onResultChanged != nil { 159 | sr.onResultChanged(string(text), sr.UserParam) 160 | } 161 | } 162 | 163 | func onSrCompletedHandler(isErr bool, text []byte, proto *nlsProto) { 164 | sr := checkSrNlsProto(proto) 165 | if sr.onCompleted != nil { 166 | sr.onCompleted(string(text), sr.UserParam) 167 | } 168 | 169 | sr.lk.Lock() 170 | defer sr.lk.Unlock() 171 | if sr.stopCh != nil { 172 | sr.stopCh <- true 173 | close(sr.stopCh) 174 | sr.stopCh = nil 175 | } 176 | } 177 | 178 | var srProto = commonProto{ 179 | namespace: SR_NAMESPACE, 180 | handlers: map[string]func(bool, []byte, *nlsProto){ 181 | CLOSE_HANDLER: onSrCloseHandler, 182 | CONNECTED_HANDLER: onSrConnectedHandler, 183 | SR_STARTED_NAME: onSrStartedHandler, 184 | SR_RESULT_CHG_NAME: onSrResultChangedHandler, 185 | SR_COMPLETED_NAME: onSrCompletedHandler, 186 | TASK_FAILED_NAME: onSrTaskFailedHandler, 187 | }, 188 | } 189 | 190 | func newSpeechRecognitionProto() *commonProto { 191 | return &srProto 192 | } 193 | 194 | func NewSpeechRecognition(config *ConnectionConfig, 195 | logger *NlsLogger, 196 | taskfailed func(string, interface{}), 197 | started func(string, interface{}), 198 | resultchanged func(string, interface{}), 199 | completed func(string, interface{}), 200 | closed func(interface{}), 201 | param interface{}) (*SpeechRecognition, error) { 202 | sr := new(SpeechRecognition) 203 | proto := newSpeechRecognitionProto() 204 | if logger == nil { 205 | logger = DefaultNlsLog() 206 | } 207 | 208 | nls, err := newNlsProto(config, proto, logger, sr) 209 | if err != nil { 210 | return nil, err 211 | } 212 | 213 | sr.nls = nls 214 | sr.UserParam = param 215 | sr.onTaskFailed = taskfailed 216 | sr.onStarted = started 217 | sr.onResultChanged = resultchanged 218 | sr.onCompleted = completed 219 | sr.onClose = closed 220 | return sr, nil 221 | } 222 | 223 | func (sr *SpeechRecognition) Start(param SpeechRecognitionStartParam, extra map[string]interface{}) (chan bool, error) { 224 | if sr.nls == nil { 225 | return nil, errors.New("empty nls: using NewSpeechRecognition to create a valid instance") 226 | } 227 | 228 | b, err := json.Marshal(param) 229 | if err != nil { 230 | return nil, err 231 | } 232 | 233 | json.Unmarshal(b, &sr.StartParam) 234 | if extra != nil { 235 | if sr.StartParam == nil { 236 | sr.StartParam = extra 237 | } else { 238 | for k, v := range extra { 239 | sr.StartParam[k] = v 240 | } 241 | } 242 | } 243 | sr.taskId = getUuid() 244 | err = sr.nls.Connect() 245 | if err != nil { 246 | return nil, err 247 | } 248 | 249 | sr.lk.Lock() 250 | defer sr.lk.Unlock() 251 | sr.startCh = make(chan bool, 1) 252 | return sr.startCh, nil 253 | } 254 | 255 | func (sr *SpeechRecognition) Stop() (chan bool, error) { 256 | if sr.nls == nil { 257 | return nil, errors.New("empty nls: using NewSpeechRecognition to create a valid instance") 258 | } 259 | 260 | 261 | req := CommonRequest{} 262 | req.Context = DefaultContext 263 | req.Header.Appkey = sr.nls.connConfig.Appkey 264 | req.Header.MessageId = getUuid() 265 | req.Header.Name = SR_STOP_NAME 266 | req.Header.Namespace = SR_NAMESPACE 267 | req.Header.TaskId = sr.taskId 268 | 269 | b, _ := json.Marshal(req) 270 | err := sr.nls.cmd(string(b)) 271 | if err != nil { 272 | return nil, err 273 | } 274 | 275 | sr.lk.Lock() 276 | defer sr.lk.Unlock() 277 | sr.stopCh = make(chan bool, 1) 278 | return sr.stopCh, nil 279 | } 280 | 281 | func (sr *SpeechRecognition) Shutdown() { 282 | if sr.nls == nil { 283 | return 284 | } 285 | 286 | sr.nls.shutdown() 287 | 288 | sr.lk.Lock() 289 | defer sr.lk.Unlock() 290 | if sr.startCh != nil { 291 | sr.startCh <- false 292 | close(sr.startCh) 293 | sr.startCh = nil 294 | } 295 | 296 | if sr.stopCh != nil { 297 | sr.stopCh <- false 298 | close(sr.stopCh) 299 | sr.stopCh = nil 300 | } 301 | } 302 | 303 | func (sr *SpeechRecognition) SendAudioData(data []byte) error { 304 | if sr.nls == nil { 305 | return errors.New("empty nls: using NewSpeechRecognition to create a valid instance") 306 | } 307 | 308 | return sr.nls.sendRawData(data) 309 | } 310 | -------------------------------------------------------------------------------- /st.go: -------------------------------------------------------------------------------- 1 | /* 2 | st.go 3 | 4 | Copyright 1999-present Alibaba Group Holding Ltd. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package nls 20 | 21 | import ( 22 | "encoding/json" 23 | "errors" 24 | "log" 25 | "sync" 26 | ) 27 | 28 | const ( 29 | //namespace field 30 | ST_NAMESPACE = "SpeechTranscriber" 31 | 32 | //name field 33 | ST_START_NAME = "StartTranscription" 34 | ST_STOP_NAME = "StopTranscription" 35 | ST_CTRL_NAME = "ControlTranscriber" 36 | 37 | ST_STARTED_NAME = "TranscriptionStarted" 38 | ST_SENTENCE_BEGIN_NAME = "SentenceBegin" 39 | ST_SENTENCE_END_NAME = "SentenceEnd" 40 | ST_RESULT_CHG_NAME = "TranscriptionResultChanged" 41 | ST_COMPLETED_NAME = "TranscriptionCompleted" 42 | ) 43 | 44 | type SpeechTranscriptionStartParam struct { 45 | Format string `json:"format,omitempty"` 46 | SampleRate int `json:"sample_rate,omitempty"` 47 | EnableIntermediateResult bool `json:"enable_intermediate_result"` 48 | EnablePunctuationPrediction bool `json:"enable_punctuation_prediction"` 49 | EnableInverseTextNormalization bool `json:"enable_inverse_text_normalization"` 50 | MaxSentenceSilence int `json:"max_sentence_silence,omitempty"` 51 | EnableWords bool `json:"enable_words"` 52 | } 53 | 54 | func DefaultSpeechTranscriptionParam() SpeechTranscriptionStartParam { 55 | return SpeechTranscriptionStartParam{ 56 | Format: "pcm", 57 | SampleRate: 16000, 58 | EnableIntermediateResult: true, 59 | EnablePunctuationPrediction: true, 60 | EnableInverseTextNormalization: true, 61 | MaxSentenceSilence: 800, 62 | EnableWords: false, 63 | } 64 | } 65 | 66 | type SpeechTranscription struct { 67 | nls *nlsProto 68 | taskId string 69 | 70 | startCh chan bool 71 | stopCh chan bool 72 | 73 | lk sync.Mutex 74 | 75 | onTaskFailed func(text string, param interface{}) 76 | onStarted func(text string, param interface{}) 77 | onSentenceBegin func(text string, param interface{}) 78 | onSentenceEnd func(text string, param interface{}) 79 | onResultChanged func(text string, param interface{}) 80 | onCompleted func(text string, param interface{}) 81 | onClose func(param interface{}) 82 | 83 | CustomHandler map[string]func(text string, param interface{}) 84 | 85 | StartParam map[string]interface{} 86 | UserParam interface{} 87 | } 88 | 89 | func checkStNlsProto(proto *nlsProto) *SpeechTranscription { 90 | if proto == nil { 91 | log.Default().Fatal("empty proto check failed") 92 | return nil 93 | } 94 | 95 | st, ok := proto.param.(*SpeechTranscription) 96 | if !ok { 97 | log.Default().Fatal("proto param not SpeechTranscription instance") 98 | return nil 99 | } 100 | 101 | return st 102 | } 103 | 104 | func onStTaskFailedHandler(isErr bool, text []byte, proto *nlsProto) { 105 | st := checkStNlsProto(proto) 106 | if st.onTaskFailed != nil { 107 | st.onTaskFailed(string(text), st.UserParam) 108 | } 109 | 110 | st.lk.Lock() 111 | defer st.lk.Unlock() 112 | 113 | if st.startCh != nil { 114 | st.startCh <- false 115 | close(st.startCh) 116 | st.startCh = nil 117 | } 118 | 119 | if st.stopCh != nil { 120 | st.stopCh <- false 121 | close(st.stopCh) 122 | st.stopCh = nil 123 | } 124 | } 125 | 126 | func onStConnectedHandler(isErr bool, text []byte, proto *nlsProto) { 127 | st := checkStNlsProto(proto) 128 | 129 | req := CommonRequest{} 130 | req.Context = DefaultContext 131 | req.Header.Appkey = st.nls.connConfig.Appkey 132 | req.Header.MessageId = getUuid() 133 | req.Header.Name = ST_START_NAME 134 | req.Header.Namespace = ST_NAMESPACE 135 | req.Header.TaskId = st.taskId 136 | req.Payload = st.StartParam 137 | 138 | b, _ := json.Marshal(req) 139 | st.nls.logger.Println("send:", string(b)) 140 | st.nls.cmd(string(b)) 141 | } 142 | 143 | func onStCloseHandler(isErr bool, text []byte, proto *nlsProto) { 144 | st := checkStNlsProto(proto) 145 | if st.onClose != nil { 146 | st.onClose(st.UserParam) 147 | } 148 | 149 | st.nls.shutdown() 150 | } 151 | 152 | func onStStartedHandler(isErr bool, text []byte, proto *nlsProto) { 153 | st := checkStNlsProto(proto) 154 | if st.onStarted != nil { 155 | st.onStarted(string(text), st.UserParam) 156 | } 157 | st.lk.Lock() 158 | defer st.lk.Unlock() 159 | if st.startCh != nil { 160 | st.startCh <- true 161 | close(st.startCh) 162 | st.startCh = nil 163 | } 164 | } 165 | 166 | func onStSentenceBeginHandler(isErr bool, text []byte, proto *nlsProto) { 167 | st := checkStNlsProto(proto) 168 | if st.onSentenceBegin != nil { 169 | st.onSentenceBegin(string(text), st.UserParam) 170 | } 171 | } 172 | 173 | func onStSentenceEndHandler(isErr bool, text []byte, proto *nlsProto) { 174 | st := checkStNlsProto(proto) 175 | if st.onSentenceEnd != nil { 176 | st.onSentenceEnd(string(text), st.UserParam) 177 | } 178 | } 179 | 180 | func onStResultChangedHandler(isErr bool, text []byte, proto *nlsProto) { 181 | st := checkStNlsProto(proto) 182 | if st.onResultChanged != nil { 183 | st.onResultChanged(string(text), st.UserParam) 184 | } 185 | } 186 | 187 | func onStCompletedHandler(isErr bool, text []byte, proto *nlsProto) { 188 | st := checkStNlsProto(proto) 189 | if st.onCompleted != nil { 190 | st.onCompleted(string(text), st.UserParam) 191 | } 192 | 193 | st.lk.Lock() 194 | defer st.lk.Unlock() 195 | if st.stopCh != nil { 196 | st.stopCh <- true 197 | st.stopCh = nil 198 | } 199 | } 200 | 201 | func onCustomDefinedHandler(isErr bool, text []byte, proto *nlsProto) { 202 | st := checkStNlsProto(proto) 203 | st.nls.logger.Println("onCustomHandler:", string(text)) 204 | 205 | resp := CommonResponse{} 206 | err := json.Unmarshal(text, &resp) 207 | if err != nil { 208 | st.nls.logger.Println("OCCUR UNKNOWN PROTO:", err) 209 | return 210 | } 211 | 212 | if st.CustomHandler != nil { 213 | handler, ok := st.CustomHandler[resp.Header.Name] 214 | if ok { 215 | handler(string(text), st.UserParam) 216 | } else { 217 | st.nls.logger.Println("no custom handler for", resp.Header.Name) 218 | } 219 | } 220 | } 221 | 222 | var stProto = commonProto{ 223 | namespace: ST_NAMESPACE, 224 | handlers: map[string]func(bool, []byte, *nlsProto){ 225 | CLOSE_HANDLER: onStCloseHandler, 226 | CONNECTED_HANDLER: onStConnectedHandler, 227 | ST_STARTED_NAME: onStStartedHandler, 228 | ST_SENTENCE_BEGIN_NAME: onStSentenceBeginHandler, 229 | ST_SENTENCE_END_NAME: onStSentenceEndHandler, 230 | ST_RESULT_CHG_NAME: onStResultChangedHandler, 231 | ST_COMPLETED_NAME: onStCompletedHandler, 232 | TASK_FAILED_NAME: onStTaskFailedHandler, 233 | CUSTOM_DEFINED_NAME: onCustomDefinedHandler, 234 | }, 235 | } 236 | 237 | func newSpeechTranscriptionProto() *commonProto { 238 | return &stProto 239 | } 240 | 241 | func NewSpeechTranscription(config *ConnectionConfig, 242 | logger *NlsLogger, 243 | taskfailed func(string, interface{}), 244 | started func(string, interface{}), 245 | sentencebegin func(string, interface{}), 246 | sentenceend func(string, interface{}), 247 | resultchanged func(string, interface{}), 248 | completed func(string, interface{}), 249 | closed func(interface{}), 250 | param interface{}) (*SpeechTranscription, error) { 251 | st := new(SpeechTranscription) 252 | proto := newSpeechTranscriptionProto() 253 | if logger == nil { 254 | logger = DefaultNlsLog() 255 | } 256 | 257 | nls, err := newNlsProto(config, proto, logger, st) 258 | if err != nil { 259 | return nil, err 260 | } 261 | 262 | st.nls = nls 263 | st.UserParam = param 264 | st.onTaskFailed = taskfailed 265 | st.onStarted = started 266 | st.onSentenceBegin = sentencebegin 267 | st.onSentenceEnd = sentenceend 268 | st.onResultChanged = resultchanged 269 | st.onCompleted = completed 270 | st.onClose = closed 271 | return st, nil 272 | } 273 | 274 | func (st *SpeechTranscription) SetCustomHandler(name string, handler func(string, interface{})) { 275 | if st.CustomHandler == nil { 276 | st.CustomHandler = make(map[string]func(string, interface{})) 277 | } 278 | 279 | st.CustomHandler[name] = handler 280 | } 281 | 282 | func (st *SpeechTranscription) Start(param SpeechTranscriptionStartParam, extra map[string]interface{}) (chan bool, error) { 283 | if st.nls == nil { 284 | return nil, errors.New("empty nls: using NewSpeechTranscription to create a valid instance") 285 | } 286 | 287 | b, err := json.Marshal(param) 288 | if err != nil { 289 | return nil, err 290 | } 291 | 292 | json.Unmarshal(b, &st.StartParam) 293 | if extra != nil { 294 | if st.StartParam == nil { 295 | st.StartParam = extra 296 | } else { 297 | for k, v := range extra { 298 | st.StartParam[k] = v 299 | } 300 | } 301 | } 302 | st.taskId = getUuid() 303 | err = st.nls.Connect() 304 | if err != nil { 305 | return nil, err 306 | } 307 | 308 | st.lk.Lock() 309 | defer st.lk.Unlock() 310 | 311 | st.startCh = make(chan bool, 1) 312 | return st.startCh, nil 313 | } 314 | 315 | func (st *SpeechTranscription) Ctrl(param map[string]interface{}) error { 316 | if st.nls == nil { 317 | return errors.New("empty nls: using NewSpeechTranscription to create a valid instance") 318 | } 319 | 320 | req := CommonRequest{} 321 | req.Context = DefaultContext 322 | req.Header.Appkey = st.nls.connConfig.Appkey 323 | req.Header.MessageId = getUuid() 324 | req.Header.Name = ST_CTRL_NAME 325 | req.Header.Namespace = ST_NAMESPACE 326 | req.Header.TaskId = st.taskId 327 | req.Payload = param 328 | 329 | b, _ := json.Marshal(req) 330 | err := st.nls.cmd(string(b)) 331 | if err != nil { 332 | return err 333 | } 334 | 335 | return nil 336 | } 337 | 338 | func (st *SpeechTranscription) Stop() (chan bool, error) { 339 | if st.nls == nil { 340 | return nil, errors.New("empty nls: using NewSpeechTranscription to create a valid instance") 341 | } 342 | 343 | req := CommonRequest{} 344 | req.Context = DefaultContext 345 | req.Header.Appkey = st.nls.connConfig.Appkey 346 | req.Header.MessageId = getUuid() 347 | req.Header.Name = ST_STOP_NAME 348 | req.Header.Namespace = ST_NAMESPACE 349 | req.Header.TaskId = st.taskId 350 | 351 | b, _ := json.Marshal(req) 352 | err := st.nls.cmd(string(b)) 353 | if err != nil { 354 | return nil, err 355 | } 356 | 357 | st.lk.Lock() 358 | defer st.lk.Unlock() 359 | st.stopCh = make(chan bool, 1) 360 | return st.stopCh, nil 361 | } 362 | 363 | func (st *SpeechTranscription) Shutdown() { 364 | if st.nls == nil { 365 | return 366 | } 367 | 368 | st.nls.shutdown() 369 | st.lk.Lock() 370 | defer st.lk.Unlock() 371 | if st.startCh != nil { 372 | st.startCh <- false 373 | close(st.startCh) 374 | st.startCh = nil 375 | } 376 | 377 | if st.stopCh != nil { 378 | st.stopCh <- false 379 | close(st.stopCh) 380 | st.stopCh = nil 381 | } 382 | } 383 | 384 | func (st *SpeechTranscription) SendAudioData(data []byte) error { 385 | if st.nls == nil { 386 | return errors.New("empty nls: using NewSpeechTranscription to create a valid instance") 387 | } 388 | 389 | return st.nls.sendRawData(data) 390 | } 391 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /docs/TTS.md: -------------------------------------------------------------------------------- 1 | # NLS Go SDK说明 2 | 3 | > 本文介绍如何使用阿里云智能语音服务提供的Go SDK,包括SDK的安装方法及SDK代码示例。 4 | 5 | 6 | 7 | ## 前提条件 8 | 9 | 使用SDK前,请先阅读接口说明,详细请参见**接口说明**。 10 | 11 | ### 下载安装 12 | 13 | > 说明 14 | > 15 | > * SDK支持go1.16 16 | > * 请确认已经安装golang环境,并完成基本配置 17 | 18 | 1. 下载SDK 19 | 20 | 通过以下命令完成SDK下载和安装: 21 | 22 | > go get github.com/aliyun/alibabacloud-nls-go-sdk 23 | 24 | 2. 导入SDK 25 | 26 | 在代码中通过将以下字段加入import来导入SDK: 27 | 28 | > import ("github.com/aliyun/alibabacloud-nls-go-sdk") 29 | 30 | 31 | 32 | ## SDK常量 33 | 34 | | 常量 | 常量含义 | 35 | | ------------------ | ------------------------------------------------------------ | 36 | | SDK_VERSION | SDK版本 | 37 | | PCM | pcm音频格式 | 38 | | WAV | wav音频格式 | 39 | | OPUS | opus音频格式 | 40 | | OPU | opu音频格式 | 41 | | DEFAULT_DISTRIBUTE | 获取token时使用的默认区域,"cn-shanghai" | 42 | | DEFAULT_DOMAIN | 获取token时使用的默认URL,"nls-meta.cn-shanghai.aliyuncs.com" | 43 | | DEFAULT_VERSION | 获取token时使用的协议版本,"2019-02-28" | 44 | | DEFAULT_URL | 默认公有云URL,"wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1" | 45 | 46 | 47 | 48 | ## SDK日志 49 | 50 | ### 1. func DefaultNlsLog() *NlsLogger 51 | 52 | > 用于创建全局唯一的默认日志对象,默认日志以NLS为前缀,输出到标准错误 53 | 54 | 参数说明: 55 | 56 | 无 57 | 58 | 返回值: 59 | 60 | NlsLogger对象指针 61 | 62 | 63 | 64 | ### 2. func NewNlsLogger(w io.Writer, tag string, flag int) *NlsLogger 65 | 66 | > 创建一个新的日志 67 | 68 | 参数说明: 69 | 70 | | 参数 | 类型 | 参数说明 | 71 | | ---- | --------- | ------------------------------- | 72 | | w | io.Writer | 任意实现io.Writer接口的对象 | 73 | | tag | string | 日志前缀,会打印到日志行首部 | 74 | | flag | int | 日志flag,具体参考go官方log文档 | 75 | 76 | 返回值: 77 | 78 | NlsLogger对象指针 79 | 80 | 81 | 82 | ### 3. func (logger *NlsLogger) SetLogSil(sil bool) 83 | 84 | > 设置日志是否输出到对应的io.Writer 85 | 86 | 参数说明: 87 | 88 | | 参数 | 类型 | 参数说明 | 89 | | ---- | ---- | ---------------------------- | 90 | | sil | bool | 是否禁止日志输出,true为禁止 | 91 | 92 | 返回值: 93 | 94 | 无 95 | 96 | 97 | 98 | ### 4. func (logger *NlsLogger) SetDebug(debug bool) 99 | 100 | > 设置是否打印debug日志,仅影响通过Debugf或Debugln进行输出的日志 101 | 102 | 参数说明: 103 | 104 | | 参数 | 类型 | 参数说明 | 105 | | ----- | ---- | --------------------------------- | 106 | | debug | bool | 是否允许debug日志输出,true为允许 | 107 | 108 | 返回值: 109 | 110 | 无 111 | 112 | 113 | 114 | ### 5. func (logger *NlsLogger) SetOutput(w io.Writer) 115 | 116 | > 设置日志输出方式 117 | 118 | 参数说明: 119 | 120 | | 参数 | 类型 | 参数说明 | 121 | | ---- | --------- | --------------------------- | 122 | | w | io.Writer | 任意实现io.Writer接口的对象 | 123 | 124 | 返回值: 125 | 126 | 无 127 | 128 | 129 | 130 | ### 6. func (logger *NlsLogger) SetPrefix(prefix string) 131 | 132 | > 设置日志行的标签 133 | 134 | 参数说明: 135 | 136 | | 参数 | 类型 | 参数说明 | 137 | | ------ | ------ | ------------------------------ | 138 | | prefix | string | 日志行标签,会输出在日志行行首 | 139 | 140 | 返回值: 141 | 142 | 无 143 | 144 | 145 | 146 | ### 7. func (logger *NlsLogger) SetFlags(flags int) 147 | 148 | > 设置日志属性 149 | 150 | 参数说明: 151 | 152 | | 参数 | 类型 | 参数说明 | 153 | | ----- | ---- | ------------------------------------------------ | 154 | | flags | int | 日志属性,见https://pkg.go.dev/log#pkg-constants | 155 | 156 | 返回值: 157 | 158 | 无 159 | 160 | 161 | 162 | ### 8. 日志打印 163 | 164 | 日志打印方法: 165 | 166 | | 方法名 | 方法说明 | 167 | | ----------------------------------------------------------- | ------------------------------------------------------------ | 168 | | func (l *NlsLogger) Print(v ...interface{}) | 标准日志输出 | 169 | | func (l *NlsLogger) Println(v ...interface{}) | 标注日志输出,行尾自动换行 | 170 | | func (l *NlsLogger) Printf(format string, v ...interface{}) | 带format的日志输出,format方式见go官方文档 | 171 | | func (l *NlsLogger) Debugln(v ...interface{}) | debug信息日志输出,行尾自动换行 | 172 | | func (l *NlsLogger) Debugf(format string, v ...interface{}) | 带format的debug信息日志输出 | 173 | | func (l *NlsLogger) Fatal(v ...interface{}) | 致命错误日志输出,输出后自动进程退出 | 174 | | func (l *NlsLogger) Fatalln(v ...interface{}) | 致命错误日志输出,行尾自动换行,输出后自动进程退出 | 175 | | func (l *NlsLogger) Fatalf(format string, v ...interface{}) | 带format的致命错误日志输出,输出后自动进程退出 | 176 | | func (l *NlsLogger) Panic(v ...interface{}) | 致命错误日志输出,输出后自动进程退出并打印崩溃信息 | 177 | | func (l *NlsLogger) Panicln(v ...interface{}) | 致命错误日志输出,行尾自动换行,输出后自动进程退出并打印崩溃信息 | 178 | | func (l *NlsLogger) Panicf(format string, v ...interface{}) | 带format的致命错误日志输出,输出后自动进程退出并打印崩溃信息 | 179 | 180 | 181 | 182 | ## 获取token 183 | 184 | ### 1. func GetToken(dist string, domain string, akid string, akkey string, version string) (*TokenResultMessage, error) 185 | 186 | > 获取访问token 187 | 188 | 参数说明: 189 | 190 | | 参数 | 类型 | 参数说明 | 191 | | ------- | ------ | ------------------------------------------- | 192 | | dist | string | 区域,如果不确定,请使用DEFAULT_DISTRIBUTE | 193 | | domain | string | URL,如果不确定,请使用DEFAULT_DOMAIN | 194 | | akid | string | 阿里云accessid | 195 | | akkey | string | 阿里云accesskey | 196 | | version | string | 协议版本,如果不确定,请使用DEFAULT_VERSION | 197 | 198 | 返回值: 199 | 200 | TokenResultMessage对象指针和错误信息 201 | 202 | 203 | 204 | ## 建立连接 205 | 206 | ### 1. ConnectionConfig 207 | 208 | > 用于建立连接的基础参数 209 | 210 | 参数说明: 211 | 212 | | 参数 | 类型 | 参数说明 | 213 | | ------ | ------ | ------------------------------------------------ | 214 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 215 | | Token | string | 通过GetToken获取的token或者测试token | 216 | | Akid | string | 阿里云accessid | 217 | | Akkey | string | 阿里云accesskey | 218 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 219 | 220 | 221 | 222 | ### 2. func NewConnectionConfigWithAKInfoDefault(url string, appkey string, akid string, akkey string) (*ConnectionConfig, error) 223 | 224 | > 通过url,appkey,akid和akkey创建连接参数,等效于先调用GetToken然后再调用NewConnectionConfigWithToken 225 | 226 | 参数说明: 227 | 228 | | 参数 | 类型 | 参数说明 | 229 | | ------ | ------ | ------------------------------------------------ | 230 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 231 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 232 | | Akid | string | 阿里云accessid | 233 | | Akkey | string | 阿里云accesskey | 234 | 235 | 返回值: 236 | 237 | *ConnectionConfig:连接参数对象指针,用于后续创建语音交互实例 238 | 239 | error:异常对象,为nil则无异常 240 | 241 | 242 | 243 | ### 3. func NewConnectionConfigWithToken(url string, appkey string, token string) *ConnectionConfig 244 | 245 | > 通过url,appkey和token创建连接参数 246 | 247 | 参数说明: 248 | 249 | | 参数 | 类型 | 参数说明 | 250 | | ------ | ------ | ------------------------------------------------ | 251 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 252 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 253 | | Token | string | 已经通过GetToken或其他方式获取的token | 254 | 255 | 返回值: 256 | 257 | *ConnectionConfig:连接参数对象指针 258 | 259 | 260 | 261 | ### 4. func NewConnectionConfigFromJson(jsonStr string) (*ConnectionConfig, error) 262 | 263 | > 通过json字符串来创建连接参数 264 | 265 | 参数说明 266 | 267 | | 参数 | 类型 | 参数说明 | 268 | | ------- | ------ | ------------------------------------------------------------ | 269 | | jsonStr | string | 描述连接参数的json字符串,有效字段如下:url,token,akid,akkey,appkey。其中必须包含url和appkey,如果包含token则不需要包含akid和akkey | 270 | 271 | 返回值: 272 | 273 | *ConnectionConfig:连接对象指针 274 | 275 | 276 | 277 | ## 语音合成 278 | 279 | ### 1. SpeechSynthesisStartParam 280 | 281 | 参数说明: 282 | 283 | | 参数 | 类型 | 参数说明 | 284 | | -------------- | ------ | ----------------------------- | 285 | | Voice | string | 发音人,默认“xiaoyun” | 286 | | Format | string | 音频格式,默认使用wav | 287 | | SampleRate | int | 采样率,默认16000 | 288 | | Volume | int | 音量,范围为0-100,默认50 | 289 | | SpeechRate | int | 语速,范围为-500-500,默认为0 | 290 | | PitchRate | int | 音高,范围为-500-500,默认为0 | 291 | | EnableSubtitle | bool | 字幕功能,默认为false | 292 | 293 | ### 2. func DefaultSpeechSynthesisParam() SpeechSynthesisStartParam 294 | 295 | > 创建一个默认的语音合成参数 296 | 297 | 参数说明: 298 | 299 | 无 300 | 301 | 返回值: 302 | 303 | SpeechSynthesisStartParam:语音合成参数 304 | 305 | ### 3. func NewSpeechSynthesis(...) (*SpeechSynthesis, error) 306 | 307 | > 创建一个新的语音合成对象 308 | 309 | 参数说明: 310 | 311 | | 参数 | 类型 | 参数说明 | 312 | | --------------- | ------------------------- | ----------------------------------------------------- | 313 | | config | *ConnectionConfig | 见上文建立连接相关内容 | 314 | | logger | *NlsLogger | 见SDK日志相关内容 | 315 | | realtime | bool | 是否使用实时长文本,默认为短文本 | 316 | | taskfailed | func(string, interface{}) | 识别过程中的错误处理回调,interface{}为用户自定义参数 | 317 | | synthesisresult | func([]byte, interface{}) | 语音合成数据回调 | 318 | | metainfo | func(string, interface{}) | 字幕数据回调,需要参数中EnableSubtitle为true | 319 | | completed | func(string, interface{}) | 合成完毕结果回调 | 320 | | closed | func(interface{}) | 连接断开回调 | 321 | | param | interface{} | 用户自定义参数 | 322 | 323 | 返回值: 324 | 325 | 无 326 | 327 | ### 4. func (tts *SpeechSynthesis) Start(text string, param SpeechSynthesisStartParam, extra map[string]interface{}) (chan bool, error) 328 | 329 | > 给定文本和参数进行语音合成 330 | 331 | 参数说明: 332 | 333 | | 参数 | 类型 | 参数说明 | 334 | | ----- | ----------------------------- | ----------------- | 335 | | text | string | 待合成文本 | 336 | | param | SpeechTranscriptionStartParam | 语音合成参数 | 337 | | extra | map[string]interface{} | 额外key value参数 | 338 | 339 | 返回值: 340 | 341 | chan bool:语音合成完成通知管道 342 | 343 | error:错误异常 344 | 345 | ### 5. func (tts *SpeechSynthesis) Shutdown() 346 | 347 | > 强制停止语音合成 348 | 349 | 参数说明: 350 | 351 | 无 352 | 353 | 返回值: 354 | 355 | 无 356 | 357 | 358 | 359 | ### 代码示例: 360 | 361 | ```python 362 | package main 363 | 364 | import ( 365 | "errors" 366 | "flag" 367 | "fmt" 368 | "io" 369 | "log" 370 | "os" 371 | "os/signal" 372 | "sync" 373 | "time" 374 | 375 | "github.com/aliyun/alibabacloud-nls-go-sdk" 376 | ) 377 | 378 | const ( 379 | AKID = "Your AKID" 380 | AKKEY = "Your AKKEY" 381 | //online key 382 | APPKEY = "Your APPKEY" 383 | TOKEN = "Your TOKEN" 384 | ) 385 | 386 | type TtsUserParam struct { 387 | F io.Writer 388 | Logger *nls.NlsLogger 389 | } 390 | 391 | func onTaskFailed(text string, param interface{}) { 392 | p, ok := param.(*TtsUserParam) 393 | if !ok { 394 | log.Default().Fatal("invalid logger") 395 | return 396 | } 397 | 398 | p.Logger.Println("TaskFailed:", text) 399 | } 400 | 401 | func onSynthesisResult(data []byte, param interface{}) { 402 | p, ok := param.(*TtsUserParam) 403 | if !ok { 404 | log.Default().Fatal("invalid logger") 405 | return 406 | } 407 | p.F.Write(data) 408 | } 409 | 410 | func onCompleted(text string, param interface{}) { 411 | p, ok := param.(*TtsUserParam) 412 | if !ok { 413 | log.Default().Fatal("invalid logger") 414 | return 415 | } 416 | 417 | p.Logger.Println("onCompleted:", text) 418 | } 419 | 420 | 421 | func onClose(param interface{}) { 422 | p, ok := param.(*TtsUserParam) 423 | if !ok { 424 | log.Default().Fatal("invalid logger") 425 | return 426 | } 427 | 428 | p.Logger.Println("onClosed:") 429 | } 430 | 431 | func waitReady(ch chan bool, logger *nls.NlsLogger) error { 432 | select { 433 | case done := <-ch: 434 | { 435 | if !done { 436 | logger.Println("Wait failed") 437 | return errors.New("wait failed") 438 | } 439 | logger.Println("Wait done") 440 | } 441 | case <-time.After(60 * time.Second): 442 | { 443 | logger.Println("Wait timeout") 444 | return errors.New("wait timeout") 445 | } 446 | } 447 | return nil 448 | } 449 | 450 | var lk sync.Mutex 451 | var fail = 0 452 | var reqNum = 0 453 | 454 | const ( 455 | TEXT = "你好小德,今天天气怎么样。" 456 | ) 457 | 458 | func testMultiInstance(num int) { 459 | param := nls.DefaultSpeechSynthesisParam() 460 | config,_ := nls.NewConnectionConfigWithAKInfoDefault(nls.DEFAULT_URL, APPKEY, AKID, AKKEY) 461 | var wg sync.WaitGroup 462 | for i := 0; i < num; i++ { 463 | wg.Add(1) 464 | go func(id int) { 465 | defer wg.Done() 466 | strId := fmt.Sprintf("ID%d ", id) 467 | fname := fmt.Sprintf("ttsdump%d.wav", id) 468 | ttsUserParam := new(TtsUserParam) 469 | fout, err := os.OpenFile(fname, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0666) 470 | logger := nls.NewNlsLogger(os.Stderr, strId, log.LstdFlags|log.Lmicroseconds) 471 | logger.SetLogSil(false) 472 | logger.SetDebug(true) 473 | logger.Printf("Test Normal Case for SpeechRecognition:%s", strId) 474 | ttsUserParam.F = fout 475 | ttsUserParam.Logger = logger 476 | tts, err := nls.NewSpeechSynthesis(config, logger, false, 477 | onTaskFailed, onSynthesisResult, nil, 478 | onCompleted, onClose, ttsUserParam) 479 | if err != nil { 480 | logger.Fatalln(err) 481 | return 482 | } 483 | 484 | for { 485 | lk.Lock() 486 | reqNum++ 487 | lk.Unlock() 488 | logger.Println("SR start") 489 | ch, err := tts.Start(TEXT, param, nil) 490 | if err != nil { 491 | lk.Lock() 492 | fail++ 493 | lk.Unlock() 494 | tts.Shutdown() 495 | continue 496 | } 497 | 498 | err = waitReady(ch, logger) 499 | if err != nil { 500 | lk.Lock() 501 | fail++ 502 | lk.Unlock() 503 | tts.Shutdown() 504 | continue 505 | } 506 | logger.Println("Synthesis done") 507 | tts.Shutdown() 508 | } 509 | }(i) 510 | } 511 | 512 | wg.Wait() 513 | } 514 | 515 | func main() { 516 | coroutineId := flag.Int("num", 1, "coroutine number") 517 | flag.Parse() 518 | log.Default().Printf("start %d coroutines", *coroutineId) 519 | 520 | c := make(chan os.Signal, 1) 521 | signal.Notify(c, os.Interrupt) 522 | go func() { 523 | for range c { 524 | lk.Lock() 525 | log.Printf(">>>>>>>>REQ NUM: %d>>>>>>>>>FAIL: %d", reqNum, fail) 526 | lk.Unlock() 527 | os.Exit(0) 528 | } 529 | }() 530 | testMultiInstance(*coroutineId) 531 | } 532 | 533 | ``` 534 | 535 | 536 | 537 | -------------------------------------------------------------------------------- /docs/SR.md: -------------------------------------------------------------------------------- 1 | # NLS Go SDK说明 2 | 3 | > 本文介绍如何使用阿里云智能语音服务提供的Go SDK,包括SDK的安装方法及SDK代码示例。 4 | 5 | 6 | 7 | ## 前提条件 8 | 9 | 使用SDK前,请先阅读接口说明,详细请参见**接口说明**。 10 | 11 | ### 下载安装 12 | 13 | > 说明 14 | > 15 | > * SDK支持go1.16 16 | > * 请确认已经安装golang环境,并完成基本配置 17 | 18 | 1. 下载SDK 19 | 20 | 通过以下命令完成SDK下载和安装: 21 | 22 | > go get github.com/aliyun/alibabacloud-nls-go-sdk 23 | 24 | 2. 导入SDK 25 | 26 | 在代码中通过将以下字段加入import来导入SDK: 27 | 28 | > import ("github.com/aliyun/alibabacloud-nls-go-sdk") 29 | 30 | 31 | 32 | ## SDK常量 33 | 34 | | 常量 | 常量含义 | 35 | | ------------------ | ------------------------------------------------------------ | 36 | | SDK_VERSION | SDK版本 | 37 | | PCM | pcm音频格式 | 38 | | WAV | wav音频格式 | 39 | | OPUS | opus音频格式 | 40 | | OPU | opu音频格式 | 41 | | DEFAULT_DISTRIBUTE | 获取token时使用的默认区域,"cn-shanghai" | 42 | | DEFAULT_DOMAIN | 获取token时使用的默认URL,"nls-meta.cn-shanghai.aliyuncs.com" | 43 | | DEFAULT_VERSION | 获取token时使用的协议版本,"2019-02-28" | 44 | | DEFAULT_URL | 默认公有云URL,"wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1" | 45 | 46 | 47 | 48 | ## SDK日志 49 | 50 | ### 1. func DefaultNlsLog() *NlsLogger 51 | 52 | > 用于创建全局唯一的默认日志对象,默认日志以NLS为前缀,输出到标准错误 53 | 54 | 参数说明: 55 | 56 | 无 57 | 58 | 返回值: 59 | 60 | NlsLogger对象指针 61 | 62 | 63 | 64 | ### 2. func NewNlsLogger(w io.Writer, tag string, flag int) *NlsLogger 65 | 66 | > 创建一个新的日志 67 | 68 | 参数说明: 69 | 70 | | 参数 | 类型 | 参数说明 | 71 | | ---- | --------- | ------------------------------- | 72 | | w | io.Writer | 任意实现io.Writer接口的对象 | 73 | | tag | string | 日志前缀,会打印到日志行首部 | 74 | | flag | int | 日志flag,具体参考go官方log文档 | 75 | 76 | 返回值: 77 | 78 | NlsLogger对象指针 79 | 80 | 81 | 82 | ### 3. func (logger *NlsLogger) SetLogSil(sil bool) 83 | 84 | > 设置日志是否输出到对应的io.Writer 85 | 86 | 参数说明: 87 | 88 | | 参数 | 类型 | 参数说明 | 89 | | ---- | ---- | ---------------------------- | 90 | | sil | bool | 是否禁止日志输出,true为禁止 | 91 | 92 | 返回值: 93 | 94 | 无 95 | 96 | 97 | 98 | ### 4. func (logger *NlsLogger) SetDebug(debug bool) 99 | 100 | > 设置是否打印debug日志,仅影响通过Debugf或Debugln进行输出的日志 101 | 102 | 参数说明: 103 | 104 | | 参数 | 类型 | 参数说明 | 105 | | ----- | ---- | --------------------------------- | 106 | | debug | bool | 是否允许debug日志输出,true为允许 | 107 | 108 | 返回值: 109 | 110 | 无 111 | 112 | 113 | 114 | ### 5. func (logger *NlsLogger) SetOutput(w io.Writer) 115 | 116 | > 设置日志输出方式 117 | 118 | 参数说明: 119 | 120 | | 参数 | 类型 | 参数说明 | 121 | | ---- | --------- | --------------------------- | 122 | | w | io.Writer | 任意实现io.Writer接口的对象 | 123 | 124 | 返回值: 125 | 126 | 无 127 | 128 | 129 | 130 | ### 6. func (logger *NlsLogger) SetPrefix(prefix string) 131 | 132 | > 设置日志行的标签 133 | 134 | 参数说明: 135 | 136 | | 参数 | 类型 | 参数说明 | 137 | | ------ | ------ | ------------------------------ | 138 | | prefix | string | 日志行标签,会输出在日志行行首 | 139 | 140 | 返回值: 141 | 142 | 无 143 | 144 | 145 | 146 | ### 7. func (logger *NlsLogger) SetFlags(flags int) 147 | 148 | > 设置日志属性 149 | 150 | 参数说明: 151 | 152 | | 参数 | 类型 | 参数说明 | 153 | | ----- | ---- | ------------------------------------------------ | 154 | | flags | int | 日志属性,见https://pkg.go.dev/log#pkg-constants | 155 | 156 | 返回值: 157 | 158 | 无 159 | 160 | 161 | 162 | ### 8. 日志打印 163 | 164 | 日志打印方法: 165 | 166 | | 方法名 | 方法说明 | 167 | | ----------------------------------------------------------- | ------------------------------------------------------------ | 168 | | func (l *NlsLogger) Print(v ...interface{}) | 标准日志输出 | 169 | | func (l *NlsLogger) Println(v ...interface{}) | 标注日志输出,行尾自动换行 | 170 | | func (l *NlsLogger) Printf(format string, v ...interface{}) | 带format的日志输出,format方式见go官方文档 | 171 | | func (l *NlsLogger) Debugln(v ...interface{}) | debug信息日志输出,行尾自动换行 | 172 | | func (l *NlsLogger) Debugf(format string, v ...interface{}) | 带format的debug信息日志输出 | 173 | | func (l *NlsLogger) Fatal(v ...interface{}) | 致命错误日志输出,输出后自动进程退出 | 174 | | func (l *NlsLogger) Fatalln(v ...interface{}) | 致命错误日志输出,行尾自动换行,输出后自动进程退出 | 175 | | func (l *NlsLogger) Fatalf(format string, v ...interface{}) | 带format的致命错误日志输出,输出后自动进程退出 | 176 | | func (l *NlsLogger) Panic(v ...interface{}) | 致命错误日志输出,输出后自动进程退出并打印崩溃信息 | 177 | | func (l *NlsLogger) Panicln(v ...interface{}) | 致命错误日志输出,行尾自动换行,输出后自动进程退出并打印崩溃信息 | 178 | | func (l *NlsLogger) Panicf(format string, v ...interface{}) | 带format的致命错误日志输出,输出后自动进程退出并打印崩溃信息 | 179 | 180 | 181 | 182 | ## 获取token 183 | 184 | ### 1. func GetToken(dist string, domain string, akid string, akkey string, version string) (*TokenResultMessage, error) 185 | 186 | > 获取访问token 187 | 188 | 参数说明: 189 | 190 | | 参数 | 类型 | 参数说明 | 191 | | ------- | ------ | ------------------------------------------- | 192 | | dist | string | 区域,如果不确定,请使用DEFAULT_DISTRIBUTE | 193 | | domain | string | URL,如果不确定,请使用DEFAULT_DOMAIN | 194 | | akid | string | 阿里云accessid | 195 | | akkey | string | 阿里云accesskey | 196 | | version | string | 协议版本,如果不确定,请使用DEFAULT_VERSION | 197 | 198 | 返回值: 199 | 200 | TokenResultMessage对象指针和错误信息 201 | 202 | 203 | 204 | ## 建立连接 205 | 206 | ### 1. ConnectionConfig 207 | 208 | > 用于建立连接的基础参数 209 | 210 | 参数说明: 211 | 212 | | 参数 | 类型 | 参数说明 | 213 | | ------ | ------ | ------------------------------------------------ | 214 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 215 | | Token | string | 通过GetToken获取的token或者测试token | 216 | | Akid | string | 阿里云accessid | 217 | | Akkey | string | 阿里云accesskey | 218 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 219 | 220 | 221 | 222 | ### 2. func NewConnectionConfigWithAKInfoDefault(url string, appkey string, akid string, akkey string) (*ConnectionConfig, error) 223 | 224 | > 通过url,appkey,akid和akkey创建连接参数,等效于先调用GetToken然后再调用NewConnectionConfigWithToken 225 | 226 | 参数说明: 227 | 228 | | 参数 | 类型 | 参数说明 | 229 | | ------ | ------ | ------------------------------------------------ | 230 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 231 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 232 | | Akid | string | 阿里云accessid | 233 | | Akkey | string | 阿里云accesskey | 234 | 235 | 返回值: 236 | 237 | *ConnectionConfig:连接参数对象指针,用于后续创建语音交互实例 238 | 239 | error:异常对象,为nil则无异常 240 | 241 | 242 | 243 | ### 3. func NewConnectionConfigWithToken(url string, appkey string, token string) *ConnectionConfig 244 | 245 | > 通过url,appkey和token创建连接参数 246 | 247 | 参数说明: 248 | 249 | | 参数 | 类型 | 参数说明 | 250 | | ------ | ------ | ------------------------------------------------ | 251 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 252 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 253 | | Token | string | 已经通过GetToken或其他方式获取的token | 254 | 255 | 返回值: 256 | 257 | *ConnectionConfig:连接参数对象指针 258 | 259 | 260 | 261 | ### 4. func NewConnectionConfigFromJson(jsonStr string) (*ConnectionConfig, error) 262 | 263 | > 通过json字符串来创建连接参数 264 | 265 | 参数说明 266 | 267 | | 参数 | 类型 | 参数说明 | 268 | | ------- | ------ | ------------------------------------------------------------ | 269 | | jsonStr | string | 描述连接参数的json字符串,有效字段如下:url,token,akid,akkey,appkey。其中必须包含url和appkey,如果包含token则不需要包含akid和akkey | 270 | 271 | 返回值: 272 | 273 | *ConnectionConfig:连接对象指针 274 | 275 | ## 一句话识别 276 | 277 | ### 1. SpeechRecognitionStartParam 278 | 279 | > 一句话识别参数 280 | 281 | 参数说明: 282 | 283 | | 参数 | 类型 | 参数说明 | 284 | | ------------------------------ | ------ | --------------------- | 285 | | Format | string | 音频格式,默认使用pcm | 286 | | SampleRate | int | 采样率,默认16000 | 287 | | EnableIntermediateResult | bool | 是否打开中间结果返回 | 288 | | EnablePunctuationPredition | bool | 是否打开标点预测 | 289 | | EnableInverseTextNormalization | bool | 是否打开ITN | 290 | 291 | 292 | 293 | ### 2. func DefaultSpeechRecognitionParam() SpeechRecognitionStartParam 294 | 295 | > 返回一个默认的推荐参数,其中format为pcm,采样率为16000,中间结果,标点预测和ITN全开 296 | 297 | 参数说明: 298 | 299 | 无 300 | 301 | 返回值: 302 | 303 | 默认参数 304 | 305 | 306 | 307 | ### 3. func NewSpeechRecognition(...) (*SpeechRecognition, error) 308 | 309 | > 创建一个SpeechRecognition实例 310 | 311 | 参数说明: 312 | 313 | | 参数 | 类型 | 参数说明 | 314 | | ------------- | ------------------------- | ----------------------------------------------------- | 315 | | config | *ConnectionConfig | 见上文建立连接相关内容 | 316 | | logger | *NlsLogger | 见SDK日志相关内容 | 317 | | taskfailed | func(string, interface{}) | 识别过程中的错误处理回调,interface{}为用户自定义参数 | 318 | | started | func(string, interface{}) | 建连完成回调 | 319 | | resultchanged | func(string, interface{}) | 识别中间结果回调 | 320 | | completed | func(string, interface{}) | 最终识别结果回调 | 321 | | closed | func(interface{}) | 连接断开回调 | 322 | | param | interface{} | 用户自定义参数 | 323 | 324 | 返回值: 325 | 326 | *SpeechRecognition:识别对象指针 327 | 328 | error:错误异常 329 | 330 | 331 | 332 | ### 4. func (sr *SpeechRecognition) Start(param SpeechRecognitionStartParam, extra map[string]interface{}) (chan bool, error) 333 | 334 | > 根据param发起一次一句话识别 335 | 336 | 参数说明: 337 | 338 | | 参数 | 类型 | 参数说明 | 339 | | ----- | --------------------------- | ----------------- | 340 | | param | SpeechRecognitionStartParam | 一句话识别参数 | 341 | | extra | map[string]interface{} | 额外key value参数 | 342 | 343 | 返回值: 344 | 345 | chan bool:同步start完成的管道 346 | 347 | error:错误异常 348 | 349 | ### 5. func (sr *SpeechRecognition) Stop() (chan bool, error) 350 | 351 | > 停止一句话识别 352 | 353 | 参数说明: 354 | 355 | 无 356 | 357 | 返回值: 358 | 359 | chan bool:同步stop完成的管道 360 | 361 | error:错误异常 362 | 363 | 364 | 365 | ### 6. func (sr *SpeechRecognition) Shutdown() 366 | 367 | > 强制断开连接 368 | 369 | 参数说明: 370 | 371 | 无 372 | 373 | 返回值: 374 | 375 | 无 376 | 377 | 378 | 379 | ### 7. func (sr *SpeechRecognition) SendAudioData(data []byte) error 380 | 381 | > 发送音频,音频格式必须和参数中一致 382 | 383 | 参数说明 384 | 385 | | 参数 | 类型 | 参数说明 | 386 | | ---- | ------ | -------- | 387 | | data | []byte | 音频数据 | 388 | 389 | 返回值: 390 | 391 | error:异常错误 392 | 393 | 394 | 395 | ### 一句话识别代码示例: 396 | 397 | ```python 398 | package main 399 | 400 | import ( 401 | "errors" 402 | "flag" 403 | "fmt" 404 | "log" 405 | "os" 406 | "os/signal" 407 | "sync" 408 | "time" 409 | 410 | "github.com/aliyun/alibabacloud-nls-go-sdk" 411 | ) 412 | 413 | const ( 414 | AKID = "Your AKID" 415 | AKKEY = "Your AKKEY" 416 | //online key 417 | APPKEY = "Your APPKEY" 418 | TOKEN = "Your TOKEN" 419 | ) 420 | 421 | func onTaskFailed(text string, param interface{}) { 422 | logger, ok := param.(*nls.NlsLogger) 423 | if !ok { 424 | log.Default().Fatal("invalid logger") 425 | return 426 | } 427 | 428 | logger.Println("TaskFailed:", text) 429 | } 430 | 431 | func onStarted(text string, param interface{}) { 432 | logger, ok := param.(*nls.NlsLogger) 433 | if !ok { 434 | log.Default().Fatal("invalid logger") 435 | return 436 | } 437 | 438 | logger.Println("onStarted:", text) 439 | } 440 | 441 | func onResultChanged(text string, param interface{}) { 442 | logger, ok := param.(*nls.NlsLogger) 443 | if !ok { 444 | log.Default().Fatal("invalid logger") 445 | return 446 | } 447 | 448 | logger.Println("onResultChanged:", text) 449 | } 450 | 451 | func onCompleted(text string, param interface{}) { 452 | logger, ok := param.(*nls.NlsLogger) 453 | if !ok { 454 | log.Default().Fatal("invalid logger") 455 | return 456 | } 457 | 458 | logger.Println("onCompleted:", text) 459 | } 460 | 461 | func onClose(param interface{}) { 462 | logger, ok := param.(*nls.NlsLogger) 463 | if !ok { 464 | log.Default().Fatal("invalid logger") 465 | return 466 | } 467 | 468 | logger.Println("onClosed:") 469 | } 470 | 471 | func waitReady(ch chan bool, logger *nls.NlsLogger) error { 472 | select { 473 | case done := <-ch: 474 | { 475 | if !done { 476 | logger.Println("Wait failed") 477 | return errors.New("wait failed") 478 | } 479 | logger.Println("Wait done") 480 | } 481 | case <-time.After(20 * time.Second): 482 | { 483 | logger.Println("Wait timeout") 484 | return errors.New("wait timeout") 485 | } 486 | } 487 | return nil 488 | } 489 | 490 | var lk sync.Mutex 491 | var fail = 0 492 | var reqNum = 0 493 | 494 | func testMultiInstance(num int) { 495 | pcm, err := os.Open("tests/test1.pcm") 496 | if err != nil { 497 | log.Default().Fatalln(err) 498 | } 499 | 500 | buffers := nls.LoadPcmInChunk(pcm, 320) 501 | param := nls.DefaultSpeechRecognitionParam() 502 | config := nls.NewConnectionConfigWithAKInfoDefault(nls.DEFAULT_URL, APPKEY, AKID, AKKEY) 503 | var wg sync.WaitGroup 504 | for i := 0; i < num; i++ { 505 | wg.Add(1) 506 | go func(id int) { 507 | defer wg.Done() 508 | strId := fmt.Sprintf("ID%d ", id) 509 | logger := nls.NewNlsLogger(os.Stderr, strId, log.LstdFlags|log.Lmicroseconds) 510 | logger.SetLogSil(false) 511 | logger.SetDebug(true) 512 | logger.Printf("Test Normal Case for SpeechRecognition:%s", strId) 513 | sr, err := nls.NewSpeechRecognition(config, logger, 514 | onTaskFailed, onStarted, onResultChanged, 515 | onCompleted, onClose, logger) 516 | if err != nil { 517 | logger.Fatalln(err) 518 | return 519 | } 520 | 521 | test_ex := make(map[string]interface{}) 522 | test_ex["test"] = "hello" 523 | 524 | for { 525 | lk.Lock() 526 | reqNum++ 527 | lk.Unlock() 528 | logger.Println("SR start") 529 | ready, err := sr.Start(param, test_ex) 530 | if err != nil { 531 | lk.Lock() 532 | fail++ 533 | lk.Unlock() 534 | sr.Shutdown() 535 | continue 536 | } 537 | 538 | err = waitReady(ready, logger) 539 | if err != nil { 540 | lk.Lock() 541 | fail++ 542 | lk.Unlock() 543 | sr.Shutdown() 544 | continue 545 | } 546 | 547 | for _, data := range buffers.Data { 548 | if data != nil { 549 | sr.SendAudioData(data.Data) 550 | time.Sleep(10 * time.Millisecond) 551 | } 552 | } 553 | 554 | logger.Println("send audio done") 555 | ready, err = sr.Stop() 556 | if err != nil { 557 | lk.Lock() 558 | fail++ 559 | lk.Unlock() 560 | sr.Shutdown() 561 | continue 562 | } 563 | 564 | err = waitReady(ready, logger) 565 | if err != nil { 566 | lk.Lock() 567 | fail++ 568 | lk.Unlock() 569 | sr.Shutdown() 570 | continue 571 | } 572 | 573 | logger.Println("Sr done") 574 | sr.Shutdown() 575 | } 576 | }(i) 577 | } 578 | 579 | wg.Wait() 580 | } 581 | 582 | func main() { 583 | coroutineId := flag.Int("num", 1, "coroutine number") 584 | flag.Parse() 585 | log.Default().Printf("start %d coroutines", *coroutineId) 586 | 587 | c := make(chan os.Signal, 1) 588 | signal.Notify(c, os.Interrupt) 589 | go func() { 590 | for range c { 591 | lk.Lock() 592 | log.Printf(">>>>>>>>REQ NUM: %d>>>>>>>>>FAIL: %d", reqNum, fail) 593 | lk.Unlock() 594 | os.Exit(0) 595 | } 596 | }() 597 | testMultiInstance(*coroutineId) 598 | } 599 | 600 | ``` 601 | 602 | 603 | 604 | 605 | 606 | -------------------------------------------------------------------------------- /docs/ST.md: -------------------------------------------------------------------------------- 1 | # NLS Go SDK说明 2 | 3 | > 本文介绍如何使用阿里云智能语音服务提供的Go SDK,包括SDK的安装方法及SDK代码示例。 4 | 5 | 6 | 7 | ## 前提条件 8 | 9 | 使用SDK前,请先阅读接口说明,详细请参见**接口说明**。 10 | 11 | ### 下载安装 12 | 13 | > 说明 14 | > 15 | > * SDK支持go1.16 16 | > * 请确认已经安装golang环境,并完成基本配置 17 | 18 | 1. 下载SDK 19 | 20 | 通过以下命令完成SDK下载和安装: 21 | 22 | > go get github.com/aliyun/alibabacloud-nls-go-sdk 23 | 24 | 2. 导入SDK 25 | 26 | 在代码中通过将以下字段加入import来导入SDK: 27 | 28 | > import ("github.com/aliyun/alibabacloud-nls-go-sdk") 29 | 30 | 31 | 32 | ## SDK常量 33 | 34 | | 常量 | 常量含义 | 35 | | ------------------ | ------------------------------------------------------------ | 36 | | SDK_VERSION | SDK版本 | 37 | | PCM | pcm音频格式 | 38 | | WAV | wav音频格式 | 39 | | OPUS | opus音频格式 | 40 | | OPU | opu音频格式 | 41 | | DEFAULT_DISTRIBUTE | 获取token时使用的默认区域,"cn-shanghai" | 42 | | DEFAULT_DOMAIN | 获取token时使用的默认URL,"nls-meta.cn-shanghai.aliyuncs.com" | 43 | | DEFAULT_VERSION | 获取token时使用的协议版本,"2019-02-28" | 44 | | DEFAULT_URL | 默认公有云URL,"wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1" | 45 | 46 | 47 | 48 | ## SDK日志 49 | 50 | ### 1. func DefaultNlsLog() *NlsLogger 51 | 52 | > 用于创建全局唯一的默认日志对象,默认日志以NLS为前缀,输出到标准错误 53 | 54 | 参数说明: 55 | 56 | 无 57 | 58 | 返回值: 59 | 60 | NlsLogger对象指针 61 | 62 | 63 | 64 | ### 2. func NewNlsLogger(w io.Writer, tag string, flag int) *NlsLogger 65 | 66 | > 创建一个新的日志 67 | 68 | 参数说明: 69 | 70 | | 参数 | 类型 | 参数说明 | 71 | | ---- | --------- | ------------------------------- | 72 | | w | io.Writer | 任意实现io.Writer接口的对象 | 73 | | tag | string | 日志前缀,会打印到日志行首部 | 74 | | flag | int | 日志flag,具体参考go官方log文档 | 75 | 76 | 返回值: 77 | 78 | NlsLogger对象指针 79 | 80 | 81 | 82 | ### 3. func (logger *NlsLogger) SetLogSil(sil bool) 83 | 84 | > 设置日志是否输出到对应的io.Writer 85 | 86 | 参数说明: 87 | 88 | | 参数 | 类型 | 参数说明 | 89 | | ---- | ---- | ---------------------------- | 90 | | sil | bool | 是否禁止日志输出,true为禁止 | 91 | 92 | 返回值: 93 | 94 | 无 95 | 96 | 97 | 98 | ### 4. func (logger *NlsLogger) SetDebug(debug bool) 99 | 100 | > 设置是否打印debug日志,仅影响通过Debugf或Debugln进行输出的日志 101 | 102 | 参数说明: 103 | 104 | | 参数 | 类型 | 参数说明 | 105 | | ----- | ---- | --------------------------------- | 106 | | debug | bool | 是否允许debug日志输出,true为允许 | 107 | 108 | 返回值: 109 | 110 | 无 111 | 112 | 113 | 114 | ### 5. func (logger *NlsLogger) SetOutput(w io.Writer) 115 | 116 | > 设置日志输出方式 117 | 118 | 参数说明: 119 | 120 | | 参数 | 类型 | 参数说明 | 121 | | ---- | --------- | --------------------------- | 122 | | w | io.Writer | 任意实现io.Writer接口的对象 | 123 | 124 | 返回值: 125 | 126 | 无 127 | 128 | 129 | 130 | ### 6. func (logger *NlsLogger) SetPrefix(prefix string) 131 | 132 | > 设置日志行的标签 133 | 134 | 参数说明: 135 | 136 | | 参数 | 类型 | 参数说明 | 137 | | ------ | ------ | ------------------------------ | 138 | | prefix | string | 日志行标签,会输出在日志行行首 | 139 | 140 | 返回值: 141 | 142 | 无 143 | 144 | 145 | 146 | ### 7. func (logger *NlsLogger) SetFlags(flags int) 147 | 148 | > 设置日志属性 149 | 150 | 参数说明: 151 | 152 | | 参数 | 类型 | 参数说明 | 153 | | ----- | ---- | ------------------------------------------------ | 154 | | flags | int | 日志属性,见https://pkg.go.dev/log#pkg-constants | 155 | 156 | 返回值: 157 | 158 | 无 159 | 160 | 161 | 162 | ### 8. 日志打印 163 | 164 | 日志打印方法: 165 | 166 | | 方法名 | 方法说明 | 167 | | ----------------------------------------------------------- | ------------------------------------------------------------ | 168 | | func (l *NlsLogger) Print(v ...interface{}) | 标准日志输出 | 169 | | func (l *NlsLogger) Println(v ...interface{}) | 标注日志输出,行尾自动换行 | 170 | | func (l *NlsLogger) Printf(format string, v ...interface{}) | 带format的日志输出,format方式见go官方文档 | 171 | | func (l *NlsLogger) Debugln(v ...interface{}) | debug信息日志输出,行尾自动换行 | 172 | | func (l *NlsLogger) Debugf(format string, v ...interface{}) | 带format的debug信息日志输出 | 173 | | func (l *NlsLogger) Fatal(v ...interface{}) | 致命错误日志输出,输出后自动进程退出 | 174 | | func (l *NlsLogger) Fatalln(v ...interface{}) | 致命错误日志输出,行尾自动换行,输出后自动进程退出 | 175 | | func (l *NlsLogger) Fatalf(format string, v ...interface{}) | 带format的致命错误日志输出,输出后自动进程退出 | 176 | | func (l *NlsLogger) Panic(v ...interface{}) | 致命错误日志输出,输出后自动进程退出并打印崩溃信息 | 177 | | func (l *NlsLogger) Panicln(v ...interface{}) | 致命错误日志输出,行尾自动换行,输出后自动进程退出并打印崩溃信息 | 178 | | func (l *NlsLogger) Panicf(format string, v ...interface{}) | 带format的致命错误日志输出,输出后自动进程退出并打印崩溃信息 | 179 | 180 | 181 | 182 | ## 获取token 183 | 184 | ### 1. func GetToken(dist string, domain string, akid string, akkey string, version string) (*TokenResultMessage, error) 185 | 186 | > 获取访问token 187 | 188 | 参数说明: 189 | 190 | | 参数 | 类型 | 参数说明 | 191 | | ------- | ------ | ------------------------------------------- | 192 | | dist | string | 区域,如果不确定,请使用DEFAULT_DISTRIBUTE | 193 | | domain | string | URL,如果不确定,请使用DEFAULT_DOMAIN | 194 | | akid | string | 阿里云accessid | 195 | | akkey | string | 阿里云accesskey | 196 | | version | string | 协议版本,如果不确定,请使用DEFAULT_VERSION | 197 | 198 | 返回值: 199 | 200 | TokenResultMessage对象指针和错误信息 201 | 202 | 203 | 204 | ## 建立连接 205 | 206 | ### 1. ConnectionConfig 207 | 208 | > 用于建立连接的基础参数 209 | 210 | 参数说明: 211 | 212 | | 参数 | 类型 | 参数说明 | 213 | | ------ | ------ | ------------------------------------------------ | 214 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 215 | | Token | string | 通过GetToken获取的token或者测试token | 216 | | Akid | string | 阿里云accessid | 217 | | Akkey | string | 阿里云accesskey | 218 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 219 | 220 | 221 | 222 | ### 2. func NewConnectionConfigWithAKInfoDefault(url string, appkey string, akid string, akkey string) (*ConnectionConfig, error) 223 | 224 | > 通过url,appkey,akid和akkey创建连接参数,等效于先调用GetToken然后再调用NewConnectionConfigWithToken 225 | 226 | 参数说明: 227 | 228 | | 参数 | 类型 | 参数说明 | 229 | | ------ | ------ | ------------------------------------------------ | 230 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 231 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 232 | | Akid | string | 阿里云accessid | 233 | | Akkey | string | 阿里云accesskey | 234 | 235 | 返回值: 236 | 237 | *ConnectionConfig:连接参数对象指针,用于后续创建语音交互实例 238 | 239 | error:异常对象,为nil则无异常 240 | 241 | 242 | 243 | ### 3. func NewConnectionConfigWithToken(url string, appkey string, token string) *ConnectionConfig 244 | 245 | > 通过url,appkey和token创建连接参数 246 | 247 | 参数说明: 248 | 249 | | 参数 | 类型 | 参数说明 | 250 | | ------ | ------ | ------------------------------------------------ | 251 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 252 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 253 | | Token | string | 已经通过GetToken或其他方式获取的token | 254 | 255 | 返回值: 256 | 257 | *ConnectionConfig:连接参数对象指针 258 | 259 | 260 | 261 | ### 4. func NewConnectionConfigFromJson(jsonStr string) (*ConnectionConfig, error) 262 | 263 | > 通过json字符串来创建连接参数 264 | 265 | 参数说明 266 | 267 | | 参数 | 类型 | 参数说明 | 268 | | ------- | ------ | ------------------------------------------------------------ | 269 | | jsonStr | string | 描述连接参数的json字符串,有效字段如下:url,token,akid,akkey,appkey。其中必须包含url和appkey,如果包含token则不需要包含akid和akkey | 270 | 271 | 返回值: 272 | 273 | *ConnectionConfig:连接对象指针 274 | 275 | 276 | 277 | ## 实时语音识别 278 | 279 | ### 1. SpeechTranscriptionStartParam 280 | 281 | > 实时语音识别参数 282 | 283 | 参数说明: 284 | 285 | | 参数 | 类型 | 参数说明 | 286 | | ------------------------------ | ------ | ------------------------------------------------------------ | 287 | | Format | string | 音频格式,默认使用pcm | 288 | | SampleRate | int | 采样率,默认16000 | 289 | | EnableIntermediateResult | bool | 是否打开中间结果返回 | 290 | | EnablePunctuationPredition | bool | 是否打开标点预测 | 291 | | EnableInverseTextNormalization | bool | 是否打开ITN | 292 | | MaxSentenceSilence | int | 语音断句检测阈值,静音时长超过该阈值会被认为断句,合法参数范围200~2000(ms),默认值800m | 293 | | enable_words | bool | 是否开启返回词信息,可选,默认false不开启 | 294 | 295 | 296 | 297 | ### 2. func DefaultSpeechTranscriptionParam() SpeechTranscriptionStartParam 298 | 299 | > 创建一个默认参数 300 | 301 | 参数说明: 302 | 303 | 无 304 | 305 | 返回值: 306 | 307 | SpeechTranscriptionStartParam:默认参数 308 | 309 | ### 3. func NewSpeechTranscription(...) (*SpeechTranscription, error) 310 | 311 | > 创建一个实时识别对象 312 | 313 | 参数说明: 314 | 315 | | 参数 | 类型 | 参数说明 | 316 | | ------------- | ------------------------- | ----------------------------------------------------- | 317 | | config | *ConnectionConfig | 见上文建立连接相关内容 | 318 | | logger | *NlsLogger | 见SDK日志相关内容 | 319 | | taskfailed | func(string, interface{}) | 识别过程中的错误处理回调,interface{}为用户自定义参数 | 320 | | started | func(string, interface{}) | 建连完成回调 | 321 | | sentencebegin | func(string, interface{}) | 一句话开始 | 322 | | sentenceend | func(string, interface{}) | 一句话结束 | 323 | | resultchanged | func(string, interface{}) | 识别中间结果回调 | 324 | | completed | func(string, interface{}) | 最终识别结果回调 | 325 | | closed | func(interface{}) | 连接断开回调 | 326 | | param | interface{} | 用户自定义参数 | 327 | 328 | 返回值: 329 | 330 | *SpeechRecognition:识别对象指针 331 | 332 | error:错误异常 333 | 334 | ### 4. func (st *SpeechTranscription) Start(param SpeechTranscriptionStartParam, extra map[string]interface{}) (chan bool, error) 335 | 336 | > 开始实时识别 337 | 338 | 参数说明: 339 | 340 | | 参数 | 类型 | 参数说明 | 341 | | ----- | ----------------------------- | ----------------- | 342 | | param | SpeechTranscriptionStartParam | 实时识别参数 | 343 | | extra | map[string]interface{} | 额外key value参数 | 344 | 345 | 返回值: 346 | 347 | chan bool:同步start完成的管道 348 | 349 | error:错误异常 350 | 351 | ### 5. func (st *SpeechTranscription) Stop() (chan bool, error) 352 | 353 | > 停止实时识别 354 | 355 | 参数说明: 356 | 357 | 无 358 | 359 | 返回值: 360 | 361 | chan bool:同步stop完成的管道 362 | 363 | error:错误异常 364 | 365 | ### 6. func (st *SpeechTranscription) Ctrl(param map[string]interface{}) error 366 | 367 | > 发送控制命令,先阅读实时语音识别接口说明 368 | 369 | 参数说明: 370 | 371 | | 参数 | 类型 | 参数说明 | 372 | | ----- | ---------------------- | ------------------------------------------------------------ | 373 | | param | map[string]interface{} | 自定义控制命令,该字典内容会以key:value形式合并进请求的payload段中 | 374 | 375 | 返回值: 376 | 377 | error:错误异常 378 | 379 | ### 7. func (st *SpeechTranscription) Shutdown() 380 | 381 | > 强制停止 382 | 383 | 参数说明: 384 | 385 | 无 386 | 387 | 返回值: 388 | 389 | 无 390 | 391 | ### 8. func (sr *SpeechTranscription) SendAudioData(data []byte) error 392 | 393 | > 发送音频,音频格式必须和参数中一致 394 | 395 | 参数说明 396 | 397 | | 参数 | 类型 | 参数说明 | 398 | | ---- | ------ | -------- | 399 | | data | []byte | 音频数据 | 400 | 401 | 返回值: 402 | 403 | error:异常错误 404 | 405 | 406 | 407 | ### 代码示例 408 | 409 | ```python 410 | package main 411 | 412 | import ( 413 | "errors" 414 | "flag" 415 | "fmt" 416 | "log" 417 | "os" 418 | "os/signal" 419 | "sync" 420 | "time" 421 | 422 | "github.com/aliyun/alibabacloud-nls-go-sdk" 423 | ) 424 | 425 | const ( 426 | AKID = "Your AKID" 427 | AKKEY = "Your AKKEY" 428 | //online key 429 | APPKEY = "Your APPKEY" 430 | TOKEN = "Your TOKEN" 431 | ) 432 | 433 | func onTaskFailed(text string, param interface{}) { 434 | logger, ok := param.(*nls.NlsLogger) 435 | if !ok { 436 | log.Default().Fatal("invalid logger") 437 | return 438 | } 439 | 440 | logger.Println("TaskFailed:", text) 441 | } 442 | 443 | func onStarted(text string, param interface{}) { 444 | logger, ok := param.(*nls.NlsLogger) 445 | if !ok { 446 | log.Default().Fatal("invalid logger") 447 | return 448 | } 449 | 450 | logger.Println("onStarted:", text) 451 | } 452 | 453 | func onSentenceBegin(text string, param interface{}) { 454 | logger, ok := param.(*nls.NlsLogger) 455 | if !ok { 456 | log.Default().Fatal("invalid logger") 457 | return 458 | } 459 | 460 | logger.Println("onSentenceBegin:", text) 461 | } 462 | 463 | func onSentenceEnd(text string, param interface{}) { 464 | logger, ok := param.(*nls.NlsLogger) 465 | if !ok { 466 | log.Default().Fatal("invalid logger") 467 | return 468 | } 469 | 470 | logger.Println("onSentenceEnd:", text) 471 | } 472 | 473 | func onResultChanged(text string, param interface{}) { 474 | logger, ok := param.(*nls.NlsLogger) 475 | if !ok { 476 | log.Default().Fatal("invalid logger") 477 | return 478 | } 479 | 480 | logger.Println("onResultChanged:", text) 481 | } 482 | 483 | func onCompleted(text string, param interface{}) { 484 | logger, ok := param.(*nls.NlsLogger) 485 | if !ok { 486 | log.Default().Fatal("invalid logger") 487 | return 488 | } 489 | 490 | logger.Println("onCompleted:", text) 491 | } 492 | 493 | func onClose(param interface{}) { 494 | logger, ok := param.(*nls.NlsLogger) 495 | if !ok { 496 | log.Default().Fatal("invalid logger") 497 | return 498 | } 499 | 500 | logger.Println("onClosed:") 501 | } 502 | 503 | func waitReady(ch chan bool, logger *nls.NlsLogger) error { 504 | select { 505 | case done := <-ch: 506 | { 507 | if !done { 508 | logger.Println("Wait failed") 509 | return errors.New("wait failed") 510 | } 511 | logger.Println("Wait done") 512 | } 513 | case <-time.After(20 * time.Second): 514 | { 515 | logger.Println("Wait timeout") 516 | return errors.New("wait timeout") 517 | } 518 | } 519 | return nil 520 | } 521 | 522 | var lk sync.Mutex 523 | var fail = 0 524 | var reqNum = 0 525 | 526 | func testMultiInstance(num int) { 527 | pcm, err := os.Open("tests/test1.pcm") 528 | if err != nil { 529 | log.Default().Fatalln(err) 530 | } 531 | 532 | buffers := nls.LoadPcmInChunk(pcm, 320) 533 | param := nls.DefaultSpeechTranscriptionParam() 534 | config := nls.NewConnectionConfigWithAKInfoDefault(nls.DEFAULT_URL, APPKEY, AKID, AKKEY) 535 | var wg sync.WaitGroup 536 | for i := 0; i < num; i++ { 537 | wg.Add(1) 538 | go func(id int) { 539 | defer wg.Done() 540 | strId := fmt.Sprintf("ID%d ", id) 541 | logger := nls.NewNlsLogger(os.Stderr, strId, log.LstdFlags|log.Lmicroseconds) 542 | logger.SetLogSil(false) 543 | logger.SetDebug(true) 544 | logger.Printf("Test Normal Case for SpeechRecognition:%s", strId) 545 | st, err := nls.NewSpeechTranscription(config, logger, 546 | onTaskFailed, onStarted, 547 | onSentenceBegin, onSentenceEnd, onResultChanged, 548 | onCompleted, onClose, logger) 549 | if err != nil { 550 | logger.Fatalln(err) 551 | return 552 | } 553 | 554 | test_ex := make(map[string]interface{}) 555 | test_ex["test"] = "hello" 556 | 557 | for { 558 | lk.Lock() 559 | reqNum++ 560 | lk.Unlock() 561 | logger.Println("ST start") 562 | ready, err := st.Start(param, test_ex) 563 | if err != nil { 564 | lk.Lock() 565 | fail++ 566 | lk.Unlock() 567 | st.Shutdown() 568 | continue 569 | } 570 | 571 | err = waitReady(ready, logger) 572 | if err != nil { 573 | lk.Lock() 574 | fail++ 575 | lk.Unlock() 576 | st.Shutdown() 577 | continue 578 | } 579 | 580 | for _, data := range buffers.Data { 581 | if data != nil { 582 | st.SendAudioData(data.Data) 583 | time.Sleep(10 * time.Millisecond) 584 | } 585 | } 586 | 587 | logger.Println("send audio done") 588 | ready, err = st.Stop() 589 | if err != nil { 590 | lk.Lock() 591 | fail++ 592 | lk.Unlock() 593 | st.Shutdown() 594 | continue 595 | } 596 | 597 | err = waitReady(ready, logger) 598 | if err != nil { 599 | lk.Lock() 600 | fail++ 601 | lk.Unlock() 602 | st.Shutdown() 603 | continue 604 | } 605 | 606 | logger.Println("Sr done") 607 | st.Shutdown() 608 | } 609 | }(i) 610 | } 611 | 612 | wg.Wait() 613 | } 614 | 615 | func main() { 616 | coroutineId := flag.Int("num", 1, "coroutine number") 617 | flag.Parse() 618 | log.Default().Printf("start %d coroutines", *coroutineId) 619 | 620 | c := make(chan os.Signal, 1) 621 | signal.Notify(c, os.Interrupt) 622 | go func() { 623 | for range c { 624 | lk.Lock() 625 | log.Printf(">>>>>>>>REQ NUM: %d>>>>>>>>>FAIL: %d", reqNum, fail) 626 | lk.Unlock() 627 | os.Exit(0) 628 | } 629 | }() 630 | testMultiInstance(*coroutineId) 631 | } 632 | ``` 633 | 634 | 635 | 636 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NLS Go SDK说明 2 | 3 | > 本文介绍如何使用阿里云智能语音服务提供的Go SDK,包括SDK的安装方法及SDK代码示例。 4 | 5 | 6 | 7 | ## 前提条件 8 | 9 | 使用SDK前,请先阅读接口说明,详细请参见**接口说明**。 10 | 11 | ### 下载安装 12 | 13 | > 说明 14 | > 15 | > * SDK支持go1.16 16 | > * 请确认已经安装golang环境,并完成基本配置 17 | 18 | 1. 下载SDK 19 | 20 | 通过以下命令完成SDK下载和安装: 21 | 22 | > go get github.com/aliyun/alibabacloud-nls-go-sdk 23 | 24 | 2. 导入SDK 25 | 26 | 在代码中通过将以下字段加入import来导入SDK: 27 | 28 | > import ("github.com/aliyun/alibabacloud-nls-go-sdk") 29 | 30 | 31 | 32 | ## SDK常量 33 | 34 | | 常量 | 常量含义 | 35 | | ------------------ | ------------------------------------------------------------ | 36 | | SDK_VERSION | SDK版本 | 37 | | PCM | pcm音频格式 | 38 | | WAV | wav音频格式 | 39 | | OPUS | opus音频格式 | 40 | | OPU | opu音频格式 | 41 | | DEFAULT_DISTRIBUTE | 获取token时使用的默认区域,"cn-shanghai" | 42 | | DEFAULT_DOMAIN | 获取token时使用的默认URL,"nls-meta.cn-shanghai.aliyuncs.com" | 43 | | DEFAULT_VERSION | 获取token时使用的协议版本,"2019-02-28" | 44 | | DEFAULT_URL | 默认公有云URL,"wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1" | 45 | 46 | 47 | 48 | ## SDK日志 49 | 50 | ### 1. func DefaultNlsLog() *NlsLogger 51 | 52 | > 用于创建全局唯一的默认日志对象,默认日志以NLS为前缀,输出到标准错误 53 | 54 | 参数说明: 55 | 56 | 无 57 | 58 | 返回值: 59 | 60 | NlsLogger对象指针 61 | 62 | 63 | 64 | ### 2. func NewNlsLogger(w io.Writer, tag string, flag int) *NlsLogger 65 | 66 | > 创建一个新的日志 67 | 68 | 参数说明: 69 | 70 | | 参数 | 类型 | 参数说明 | 71 | | ---- | --------- | ------------------------------- | 72 | | w | io.Writer | 任意实现io.Writer接口的对象 | 73 | | tag | string | 日志前缀,会打印到日志行首部 | 74 | | flag | int | 日志flag,具体参考go官方log文档 | 75 | 76 | 返回值: 77 | 78 | NlsLogger对象指针 79 | 80 | 81 | 82 | ### 3. func (logger *NlsLogger) SetLogSil(sil bool) 83 | 84 | > 设置日志是否输出到对应的io.Writer 85 | 86 | 参数说明: 87 | 88 | | 参数 | 类型 | 参数说明 | 89 | | ---- | ---- | ---------------------------- | 90 | | sil | bool | 是否禁止日志输出,true为禁止 | 91 | 92 | 返回值: 93 | 94 | 无 95 | 96 | 97 | 98 | ### 4. func (logger *NlsLogger) SetDebug(debug bool) 99 | 100 | > 设置是否打印debug日志,仅影响通过Debugf或Debugln进行输出的日志 101 | 102 | 参数说明: 103 | 104 | | 参数 | 类型 | 参数说明 | 105 | | ----- | ---- | --------------------------------- | 106 | | debug | bool | 是否允许debug日志输出,true为允许 | 107 | 108 | 返回值: 109 | 110 | 无 111 | 112 | 113 | 114 | ### 5. func (logger *NlsLogger) SetOutput(w io.Writer) 115 | 116 | > 设置日志输出方式 117 | 118 | 参数说明: 119 | 120 | | 参数 | 类型 | 参数说明 | 121 | | ---- | --------- | --------------------------- | 122 | | w | io.Writer | 任意实现io.Writer接口的对象 | 123 | 124 | 返回值: 125 | 126 | 无 127 | 128 | 129 | 130 | ### 6. func (logger *NlsLogger) SetPrefix(prefix string) 131 | 132 | > 设置日志行的标签 133 | 134 | 参数说明: 135 | 136 | | 参数 | 类型 | 参数说明 | 137 | | ------ | ------ | ------------------------------ | 138 | | prefix | string | 日志行标签,会输出在日志行行首 | 139 | 140 | 返回值: 141 | 142 | 无 143 | 144 | 145 | 146 | ### 7. func (logger *NlsLogger) SetFlags(flags int) 147 | 148 | > 设置日志属性 149 | 150 | 参数说明: 151 | 152 | | 参数 | 类型 | 参数说明 | 153 | | ----- | ---- | ------------------------------------------------ | 154 | | flags | int | 日志属性,见https://pkg.go.dev/log#pkg-constants | 155 | 156 | 返回值: 157 | 158 | 无 159 | 160 | 161 | 162 | ### 8. 日志打印 163 | 164 | 日志打印方法: 165 | 166 | | 方法名 | 方法说明 | 167 | | ----------------------------------------------------------- | ------------------------------------------------------------ | 168 | | func (l *NlsLogger) Print(v ...interface{}) | 标准日志输出 | 169 | | func (l *NlsLogger) Println(v ...interface{}) | 标注日志输出,行尾自动换行 | 170 | | func (l *NlsLogger) Printf(format string, v ...interface{}) | 带format的日志输出,format方式见go官方文档 | 171 | | func (l *NlsLogger) Debugln(v ...interface{}) | debug信息日志输出,行尾自动换行 | 172 | | func (l *NlsLogger) Debugf(format string, v ...interface{}) | 带format的debug信息日志输出 | 173 | | func (l *NlsLogger) Fatal(v ...interface{}) | 致命错误日志输出,输出后自动进程退出 | 174 | | func (l *NlsLogger) Fatalln(v ...interface{}) | 致命错误日志输出,行尾自动换行,输出后自动进程退出 | 175 | | func (l *NlsLogger) Fatalf(format string, v ...interface{}) | 带format的致命错误日志输出,输出后自动进程退出 | 176 | | func (l *NlsLogger) Panic(v ...interface{}) | 致命错误日志输出,输出后自动进程退出并打印崩溃信息 | 177 | | func (l *NlsLogger) Panicln(v ...interface{}) | 致命错误日志输出,行尾自动换行,输出后自动进程退出并打印崩溃信息 | 178 | | func (l *NlsLogger) Panicf(format string, v ...interface{}) | 带format的致命错误日志输出,输出后自动进程退出并打印崩溃信息 | 179 | 180 | 181 | 182 | ## 获取token 183 | 184 | ### 1. func GetToken(dist string, domain string, akid string, akkey string, version string) (*TokenResultMessage, error) 185 | 186 | > 获取访问token 187 | 188 | 参数说明: 189 | 190 | | 参数 | 类型 | 参数说明 | 191 | | ------- | ------ | ------------------------------------------- | 192 | | dist | string | 区域,如果不确定,请使用DEFAULT_DISTRIBUTE | 193 | | domain | string | URL,如果不确定,请使用DEFAULT_DOMAIN | 194 | | akid | string | 阿里云accessid | 195 | | akkey | string | 阿里云accesskey | 196 | | version | string | 协议版本,如果不确定,请使用DEFAULT_VERSION | 197 | 198 | 返回值: 199 | 200 | TokenResultMessage对象指针和错误信息 201 | 202 | 203 | 204 | ## 建立连接 205 | 206 | ### 1. ConnectionConfig 207 | 208 | > 用于建立连接的基础参数 209 | 210 | 参数说明: 211 | 212 | | 参数 | 类型 | 参数说明 | 213 | | ------ | ------ | ------------------------------------------------ | 214 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 215 | | Token | string | 通过GetToken获取的token或者测试token | 216 | | Akid | string | 阿里云accessid | 217 | | Akkey | string | 阿里云accesskey | 218 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 219 | 220 | 221 | 222 | ### 2. func NewConnectionConfigWithAKInfoDefault(url string, appkey string, akid string, akkey string) (*ConnectionConfig, error) 223 | 224 | > 通过url,appkey,akid和akkey创建连接参数,等效于先调用GetToken然后再调用NewConnectionConfigWithToken 225 | 226 | 参数说明: 227 | 228 | | 参数 | 类型 | 参数说明 | 229 | | ------ | ------ | ------------------------------------------------ | 230 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 231 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 232 | | Akid | string | 阿里云accessid | 233 | | Akkey | string | 阿里云accesskey | 234 | 235 | 返回值: 236 | 237 | *ConnectionConfig:连接参数对象指针,用于后续创建语音交互实例 238 | 239 | error:异常对象,为nil则无异常 240 | 241 | 242 | 243 | ### 3. func NewConnectionConfigWithToken(url string, appkey string, token string) *ConnectionConfig 244 | 245 | > 通过url,appkey和token创建连接参数 246 | 247 | 参数说明: 248 | 249 | | 参数 | 类型 | 参数说明 | 250 | | ------ | ------ | ------------------------------------------------ | 251 | | Url | string | 访问的公有云URL,如果不确定,可以使用DEFAULT_URL | 252 | | Appkey | string | appkey,可以在控制台中对应项目上看到 | 253 | | Token | string | 已经通过GetToken或其他方式获取的token | 254 | 255 | 返回值: 256 | 257 | *ConnectionConfig:连接参数对象指针 258 | 259 | 260 | 261 | ### 4. func NewConnectionConfigFromJson(jsonStr string) (*ConnectionConfig, error) 262 | 263 | > 通过json字符串来创建连接参数 264 | 265 | 参数说明 266 | 267 | | 参数 | 类型 | 参数说明 | 268 | | ------- | ------ | ------------------------------------------------------------ | 269 | | jsonStr | string | 描述连接参数的json字符串,有效字段如下:url,token,akid,akkey,appkey。其中必须包含url和appkey,如果包含token则不需要包含akid和akkey | 270 | 271 | 返回值: 272 | 273 | *ConnectionConfig:连接对象指针 274 | 275 | ## 一句话识别 276 | 277 | ### 1. SpeechRecognitionStartParam 278 | 279 | > 一句话识别参数 280 | 281 | 参数说明: 282 | 283 | | 参数 | 类型 | 参数说明 | 284 | | ------------------------------ | ------ | --------------------- | 285 | | Format | string | 音频格式,默认使用pcm | 286 | | SampleRate | int | 采样率,默认16000 | 287 | | EnableIntermediateResult | bool | 是否打开中间结果返回 | 288 | | EnablePunctuationPredition | bool | 是否打开标点预测 | 289 | | EnableInverseTextNormalization | bool | 是否打开ITN | 290 | 291 | 292 | 293 | ### 2. func DefaultSpeechRecognitionParam() SpeechRecognitionStartParam 294 | 295 | > 返回一个默认的推荐参数,其中format为pcm,采样率为16000,中间结果,标点预测和ITN全开 296 | 297 | 参数说明: 298 | 299 | 无 300 | 301 | 返回值: 302 | 303 | 默认参数 304 | 305 | 306 | 307 | ### 3. func NewSpeechRecognition(...) (*SpeechRecognition, error) 308 | 309 | > 创建一个SpeechRecognition实例 310 | 311 | 参数说明: 312 | 313 | | 参数 | 类型 | 参数说明 | 314 | | ------------- | ------------------------- | ----------------------------------------------------- | 315 | | config | *ConnectionConfig | 见上文建立连接相关内容 | 316 | | logger | *NlsLogger | 见SDK日志相关内容 | 317 | | taskfailed | func(string, interface{}) | 识别过程中的错误处理回调,interface{}为用户自定义参数 | 318 | | started | func(string, interface{}) | 建连完成回调 | 319 | | resultchanged | func(string, interface{}) | 识别中间结果回调 | 320 | | completed | func(string, interface{}) | 最终识别结果回调 | 321 | | closed | func(interface{}) | 连接断开回调 | 322 | | param | interface{} | 用户自定义参数 | 323 | 324 | 返回值: 325 | 326 | *SpeechRecognition:识别对象指针 327 | 328 | error:错误异常 329 | 330 | 331 | 332 | ### 4. func (sr *SpeechRecognition) Start(param SpeechRecognitionStartParam, extra map[string]interface{}) (chan bool, error) 333 | 334 | > 根据param发起一次一句话识别 335 | 336 | 参数说明: 337 | 338 | | 参数 | 类型 | 参数说明 | 339 | | ----- | --------------------------- | ----------------- | 340 | | param | SpeechRecognitionStartParam | 一句话识别参数 | 341 | | extra | map[string]interface{} | 额外key value参数 | 342 | 343 | 返回值: 344 | 345 | chan bool:同步start完成的管道 346 | 347 | error:错误异常 348 | 349 | ### 5. func (sr *SpeechRecognition) Stop() (chan bool, error) 350 | 351 | > 停止一句话识别 352 | 353 | 参数说明: 354 | 355 | 无 356 | 357 | 返回值: 358 | 359 | chan bool:同步stop完成的管道 360 | 361 | error:错误异常 362 | 363 | 364 | 365 | ### 6. func (sr *SpeechRecognition) Shutdown() 366 | 367 | > 强制断开连接 368 | 369 | 参数说明: 370 | 371 | 无 372 | 373 | 返回值: 374 | 375 | 无 376 | 377 | 378 | 379 | ### 7. func (sr *SpeechRecognition) SendAudioData(data []byte) error 380 | 381 | > 发送音频,音频格式必须和参数中一致 382 | 383 | 参数说明 384 | 385 | | 参数 | 类型 | 参数说明 | 386 | | ---- | ------ | -------- | 387 | | data | []byte | 音频数据 | 388 | 389 | 返回值: 390 | 391 | error:异常错误 392 | 393 | 394 | 395 | ### 一句话识别代码示例: 396 | 397 | ```python 398 | package main 399 | 400 | import ( 401 | "errors" 402 | "flag" 403 | "fmt" 404 | "log" 405 | "os" 406 | "os/signal" 407 | "sync" 408 | "time" 409 | 410 | "github.com/aliyun/alibabacloud-nls-go-sdk" 411 | ) 412 | 413 | const ( 414 | AKID = "Your AKID" 415 | AKKEY = "Your AKKEY" 416 | //online key 417 | APPKEY = "Your APPKEY" 418 | TOKEN = "Your TOKEN" 419 | ) 420 | 421 | func onTaskFailed(text string, param interface{}) { 422 | logger, ok := param.(*nls.NlsLogger) 423 | if !ok { 424 | log.Default().Fatal("invalid logger") 425 | return 426 | } 427 | 428 | logger.Println("TaskFailed:", text) 429 | } 430 | 431 | func onStarted(text string, param interface{}) { 432 | logger, ok := param.(*nls.NlsLogger) 433 | if !ok { 434 | log.Default().Fatal("invalid logger") 435 | return 436 | } 437 | 438 | logger.Println("onStarted:", text) 439 | } 440 | 441 | func onResultChanged(text string, param interface{}) { 442 | logger, ok := param.(*nls.NlsLogger) 443 | if !ok { 444 | log.Default().Fatal("invalid logger") 445 | return 446 | } 447 | 448 | logger.Println("onResultChanged:", text) 449 | } 450 | 451 | func onCompleted(text string, param interface{}) { 452 | logger, ok := param.(*nls.NlsLogger) 453 | if !ok { 454 | log.Default().Fatal("invalid logger") 455 | return 456 | } 457 | 458 | logger.Println("onCompleted:", text) 459 | } 460 | 461 | func onClose(param interface{}) { 462 | logger, ok := param.(*nls.NlsLogger) 463 | if !ok { 464 | log.Default().Fatal("invalid logger") 465 | return 466 | } 467 | 468 | logger.Println("onClosed:") 469 | } 470 | 471 | func waitReady(ch chan bool, logger *nls.NlsLogger) error { 472 | select { 473 | case done := <-ch: 474 | { 475 | if !done { 476 | logger.Println("Wait failed") 477 | return errors.New("wait failed") 478 | } 479 | logger.Println("Wait done") 480 | } 481 | case <-time.After(20 * time.Second): 482 | { 483 | logger.Println("Wait timeout") 484 | return errors.New("wait timeout") 485 | } 486 | } 487 | return nil 488 | } 489 | 490 | var lk sync.Mutex 491 | var fail = 0 492 | var reqNum = 0 493 | 494 | func testMultiInstance(num int) { 495 | pcm, err := os.Open("tests/test1.pcm") 496 | if err != nil { 497 | log.Default().Fatalln(err) 498 | } 499 | 500 | buffers := nls.LoadPcmInChunk(pcm, 320) 501 | param := nls.DefaultSpeechRecognitionParam() 502 | //config := nls.NewConnectionConfigWithToken(PRE_URL_WSS, 503 | // APPKEY, TOKEN) 504 | config := nls.NewConnectionConfigWithAKInfoDefault(nls.DEFAULT_URL, APPKEY, AKID, AKKEY) 505 | var wg sync.WaitGroup 506 | for i := 0; i < num; i++ { 507 | wg.Add(1) 508 | go func(id int) { 509 | defer wg.Done() 510 | strId := fmt.Sprintf("ID%d ", id) 511 | logger := nls.NewNlsLogger(os.Stderr, strId, log.LstdFlags|log.Lmicroseconds) 512 | logger.SetLogSil(false) 513 | logger.SetDebug(true) 514 | logger.Printf("Test Normal Case for SpeechRecognition:%s", strId) 515 | sr, err := nls.NewSpeechRecognition(config, logger, 516 | onTaskFailed, onStarted, onResultChanged, 517 | onCompleted, onClose, logger) 518 | if err != nil { 519 | logger.Fatalln(err) 520 | return 521 | } 522 | 523 | test_ex := make(map[string]interface{}) 524 | test_ex["test"] = "hello" 525 | 526 | for { 527 | lk.Lock() 528 | reqNum++ 529 | lk.Unlock() 530 | logger.Println("SR start") 531 | ready, err := sr.Start(param, test_ex) 532 | if err != nil { 533 | lk.Lock() 534 | fail++ 535 | lk.Unlock() 536 | sr.Shutdown() 537 | continue 538 | } 539 | 540 | err = waitReady(ready, logger) 541 | if err != nil { 542 | lk.Lock() 543 | fail++ 544 | lk.Unlock() 545 | sr.Shutdown() 546 | continue 547 | } 548 | 549 | for _, data := range buffers.Data { 550 | if data != nil { 551 | sr.SendAudioData(data.Data) 552 | time.Sleep(10 * time.Millisecond) 553 | } 554 | } 555 | 556 | logger.Println("send audio done") 557 | ready, err = sr.Stop() 558 | if err != nil { 559 | lk.Lock() 560 | fail++ 561 | lk.Unlock() 562 | sr.Shutdown() 563 | continue 564 | } 565 | 566 | err = waitReady(ready, logger) 567 | if err != nil { 568 | lk.Lock() 569 | fail++ 570 | lk.Unlock() 571 | sr.Shutdown() 572 | continue 573 | } 574 | 575 | logger.Println("Sr done") 576 | sr.Shutdown() 577 | } 578 | }(i) 579 | } 580 | 581 | wg.Wait() 582 | } 583 | 584 | func main() { 585 | coroutineId := flag.Int("num", 1, "coroutine number") 586 | flag.Parse() 587 | log.Default().Printf("start %d coroutines", *coroutineId) 588 | 589 | c := make(chan os.Signal, 1) 590 | signal.Notify(c, os.Interrupt) 591 | go func() { 592 | for range c { 593 | lk.Lock() 594 | log.Printf(">>>>>>>>REQ NUM: %d>>>>>>>>>FAIL: %d", reqNum, fail) 595 | lk.Unlock() 596 | os.Exit(0) 597 | } 598 | }() 599 | testMultiInstance(*coroutineId) 600 | } 601 | 602 | ``` 603 | 604 | 605 | 606 | ## 实时语音识别 607 | 608 | ### 1. SpeechTranscriptionStartParam 609 | 610 | > 实时语音识别参数 611 | 612 | 参数说明: 613 | 614 | | 参数 | 类型 | 参数说明 | 615 | | ------------------------------ | ------ | ------------------------------------------------------------ | 616 | | Format | string | 音频格式,默认使用pcm | 617 | | SampleRate | int | 采样率,默认16000 | 618 | | EnableIntermediateResult | bool | 是否打开中间结果返回 | 619 | | EnablePunctuationPredition | bool | 是否打开标点预测 | 620 | | EnableInverseTextNormalization | bool | 是否打开ITN | 621 | | MaxSentenceSilence | int | 语音断句检测阈值,静音时长超过该阈值会被认为断句,合法参数范围200~2000(ms),默认值800m | 622 | | enable_words | bool | 是否开启返回词信息,可选,默认false不开启 | 623 | 624 | 625 | 626 | ### 2. func DefaultSpeechTranscriptionParam() SpeechTranscriptionStartParam 627 | 628 | > 创建一个默认参数 629 | 630 | 参数说明: 631 | 632 | 无 633 | 634 | 返回值: 635 | 636 | SpeechTranscriptionStartParam:默认参数 637 | 638 | ### 3. func NewSpeechTranscription(...) (*SpeechTranscription, error) 639 | 640 | > 创建一个实时识别对象 641 | 642 | 参数说明: 643 | 644 | | 参数 | 类型 | 参数说明 | 645 | | ------------- | ------------------------- | ----------------------------------------------------- | 646 | | config | *ConnectionConfig | 见上文建立连接相关内容 | 647 | | logger | *NlsLogger | 见SDK日志相关内容 | 648 | | taskfailed | func(string, interface{}) | 识别过程中的错误处理回调,interface{}为用户自定义参数 | 649 | | started | func(string, interface{}) | 建连完成回调 | 650 | | sentencebegin | func(string, interface{}) | 一句话开始 | 651 | | sentenceend | func(string, interface{}) | 一句话结束 | 652 | | resultchanged | func(string, interface{}) | 识别中间结果回调 | 653 | | completed | func(string, interface{}) | 最终识别结果回调 | 654 | | closed | func(interface{}) | 连接断开回调 | 655 | | param | interface{} | 用户自定义参数 | 656 | 657 | 返回值: 658 | 659 | *SpeechRecognition:识别对象指针 660 | 661 | error:错误异常 662 | 663 | ### 4. func (st *SpeechTranscription) Start(param SpeechTranscriptionStartParam, extra map[string]interface{}) (chan bool, error) 664 | 665 | > 开始实时识别 666 | 667 | 参数说明: 668 | 669 | | 参数 | 类型 | 参数说明 | 670 | | ----- | ----------------------------- | ----------------- | 671 | | param | SpeechTranscriptionStartParam | 实时识别参数 | 672 | | extra | map[string]interface{} | 额外key value参数 | 673 | 674 | 返回值: 675 | 676 | chan bool:同步start完成的管道 677 | 678 | error:错误异常 679 | 680 | ### 5. func (st *SpeechTranscription) Stop() (chan bool, error) 681 | 682 | > 停止实时识别 683 | 684 | 参数说明: 685 | 686 | 无 687 | 688 | 返回值: 689 | 690 | chan bool:同步stop完成的管道 691 | 692 | error:错误异常 693 | 694 | ### 6. func (st *SpeechTranscription) Ctrl(param map[string]interface{}) error 695 | 696 | > 发送控制命令,先阅读实时语音识别接口说明 697 | 698 | 参数说明: 699 | 700 | | 参数 | 类型 | 参数说明 | 701 | | ----- | ---------------------- | ------------------------------------------------------------ | 702 | | param | map[string]interface{} | 自定义控制命令,该字典内容会以key:value形式合并进请求的payload段中 | 703 | 704 | 返回值: 705 | 706 | error:错误异常 707 | 708 | ### 7. func (st *SpeechTranscription) Shutdown() 709 | 710 | > 强制停止 711 | 712 | 参数说明: 713 | 714 | 无 715 | 716 | 返回值: 717 | 718 | 无 719 | 720 | ### 8. func (sr *SpeechTranscription) SendAudioData(data []byte) error 721 | 722 | > 发送音频,音频格式必须和参数中一致 723 | 724 | 参数说明 725 | 726 | | 参数 | 类型 | 参数说明 | 727 | | ---- | ------ | -------- | 728 | | data | []byte | 音频数据 | 729 | 730 | 返回值: 731 | 732 | error:异常错误 733 | 734 | 735 | 736 | ### 代码示例 737 | 738 | ```python 739 | package main 740 | 741 | import ( 742 | "errors" 743 | "flag" 744 | "fmt" 745 | "log" 746 | "os" 747 | "os/signal" 748 | "sync" 749 | "time" 750 | 751 | "github.com/aliyun/alibabacloud-nls-go-sdk" 752 | ) 753 | 754 | const ( 755 | AKID = "Your AKID" 756 | AKKEY = "Your AKKEY" 757 | //online key 758 | APPKEY = "Your APPKEY" 759 | TOKEN = "Your TOKEN" 760 | ) 761 | 762 | func onTaskFailed(text string, param interface{}) { 763 | logger, ok := param.(*nls.NlsLogger) 764 | if !ok { 765 | log.Default().Fatal("invalid logger") 766 | return 767 | } 768 | 769 | logger.Println("TaskFailed:", text) 770 | } 771 | 772 | func onStarted(text string, param interface{}) { 773 | logger, ok := param.(*nls.NlsLogger) 774 | if !ok { 775 | log.Default().Fatal("invalid logger") 776 | return 777 | } 778 | 779 | logger.Println("onStarted:", text) 780 | } 781 | 782 | func onSentenceBegin(text string, param interface{}) { 783 | logger, ok := param.(*nls.NlsLogger) 784 | if !ok { 785 | log.Default().Fatal("invalid logger") 786 | return 787 | } 788 | 789 | logger.Println("onSentenceBegin:", text) 790 | } 791 | 792 | func onSentenceEnd(text string, param interface{}) { 793 | logger, ok := param.(*nls.NlsLogger) 794 | if !ok { 795 | log.Default().Fatal("invalid logger") 796 | return 797 | } 798 | 799 | logger.Println("onSentenceEnd:", text) 800 | } 801 | 802 | func onResultChanged(text string, param interface{}) { 803 | logger, ok := param.(*nls.NlsLogger) 804 | if !ok { 805 | log.Default().Fatal("invalid logger") 806 | return 807 | } 808 | 809 | logger.Println("onResultChanged:", text) 810 | } 811 | 812 | func onCompleted(text string, param interface{}) { 813 | logger, ok := param.(*nls.NlsLogger) 814 | if !ok { 815 | log.Default().Fatal("invalid logger") 816 | return 817 | } 818 | 819 | logger.Println("onCompleted:", text) 820 | } 821 | 822 | func onClose(param interface{}) { 823 | logger, ok := param.(*nls.NlsLogger) 824 | if !ok { 825 | log.Default().Fatal("invalid logger") 826 | return 827 | } 828 | 829 | logger.Println("onClosed:") 830 | } 831 | 832 | func waitReady(ch chan bool, logger *nls.NlsLogger) error { 833 | select { 834 | case done := <-ch: 835 | { 836 | if !done { 837 | logger.Println("Wait failed") 838 | return errors.New("wait failed") 839 | } 840 | logger.Println("Wait done") 841 | } 842 | case <-time.After(20 * time.Second): 843 | { 844 | logger.Println("Wait timeout") 845 | return errors.New("wait timeout") 846 | } 847 | } 848 | return nil 849 | } 850 | 851 | var lk sync.Mutex 852 | var fail = 0 853 | var reqNum = 0 854 | 855 | func testMultiInstance(num int) { 856 | pcm, err := os.Open("tests/test1.pcm") 857 | if err != nil { 858 | log.Default().Fatalln(err) 859 | } 860 | 861 | buffers := nls.LoadPcmInChunk(pcm, 320) 862 | param := nls.DefaultSpeechTranscriptionParam() 863 | //config := nls.NewConnectionConfigWithToken(PRE_URL_WSS, 864 | // APPKEY, TOKEN) 865 | config := nls.NewConnectionConfigWithAKInfoDefault(nls.DEFAULT_URL, APPKEY, AKID, AKKEY) 866 | var wg sync.WaitGroup 867 | for i := 0; i < num; i++ { 868 | wg.Add(1) 869 | go func(id int) { 870 | defer wg.Done() 871 | strId := fmt.Sprintf("ID%d ", id) 872 | logger := nls.NewNlsLogger(os.Stderr, strId, log.LstdFlags|log.Lmicroseconds) 873 | logger.SetLogSil(false) 874 | logger.SetDebug(true) 875 | logger.Printf("Test Normal Case for SpeechRecognition:%s", strId) 876 | st, err := nls.NewSpeechTranscription(config, logger, 877 | onTaskFailed, onStarted, 878 | onSentenceBegin, onSentenceEnd, onResultChanged, 879 | onCompleted, onClose, logger) 880 | if err != nil { 881 | logger.Fatalln(err) 882 | return 883 | } 884 | 885 | test_ex := make(map[string]interface{}) 886 | test_ex["test"] = "hello" 887 | 888 | for { 889 | lk.Lock() 890 | reqNum++ 891 | lk.Unlock() 892 | logger.Println("ST start") 893 | ready, err := st.Start(param, test_ex) 894 | if err != nil { 895 | lk.Lock() 896 | fail++ 897 | lk.Unlock() 898 | st.Shutdown() 899 | continue 900 | } 901 | 902 | err = waitReady(ready, logger) 903 | if err != nil { 904 | lk.Lock() 905 | fail++ 906 | lk.Unlock() 907 | st.Shutdown() 908 | continue 909 | } 910 | 911 | for _, data := range buffers.Data { 912 | if data != nil { 913 | st.SendAudioData(data.Data) 914 | time.Sleep(10 * time.Millisecond) 915 | } 916 | } 917 | 918 | logger.Println("send audio done") 919 | ready, err = st.Stop() 920 | if err != nil { 921 | lk.Lock() 922 | fail++ 923 | lk.Unlock() 924 | st.Shutdown() 925 | continue 926 | } 927 | 928 | err = waitReady(ready, logger) 929 | if err != nil { 930 | lk.Lock() 931 | fail++ 932 | lk.Unlock() 933 | st.Shutdown() 934 | continue 935 | } 936 | 937 | logger.Println("Sr done") 938 | st.Shutdown() 939 | } 940 | }(i) 941 | } 942 | 943 | wg.Wait() 944 | } 945 | 946 | func main() { 947 | coroutineId := flag.Int("num", 1, "coroutine number") 948 | flag.Parse() 949 | log.Default().Printf("start %d coroutines", *coroutineId) 950 | 951 | c := make(chan os.Signal, 1) 952 | signal.Notify(c, os.Interrupt) 953 | go func() { 954 | for range c { 955 | lk.Lock() 956 | log.Printf(">>>>>>>>REQ NUM: %d>>>>>>>>>FAIL: %d", reqNum, fail) 957 | lk.Unlock() 958 | os.Exit(0) 959 | } 960 | }() 961 | testMultiInstance(*coroutineId) 962 | } 963 | ``` 964 | 965 | 966 | 967 | ## 语音合成 968 | 969 | ### 1. SpeechSynthesisStartParam 970 | 971 | 参数说明: 972 | 973 | | 参数 | 类型 | 参数说明 | 974 | | -------------- | ------ | ----------------------------- | 975 | | Voice | string | 发音人,默认“xiaoyun” | 976 | | Format | string | 音频格式,默认使用wav | 977 | | SampleRate | int | 采样率,默认16000 | 978 | | Volume | int | 音量,范围为0-100,默认50 | 979 | | SpeechRate | int | 语速,范围为-500-500,默认为0 | 980 | | PitchRate | int | 音高,范围为-500-500,默认为0 | 981 | | EnableSubtitle | bool | 字幕功能,默认为false | 982 | 983 | ### 2. func DefaultSpeechSynthesisParam() SpeechSynthesisStartParam 984 | 985 | > 创建一个默认的语音合成参数 986 | 987 | 参数说明: 988 | 989 | 无 990 | 991 | 返回值: 992 | 993 | SpeechSynthesisStartParam:语音合成参数 994 | 995 | ### 3. func NewSpeechSynthesis(...) (*SpeechSynthesis, error) 996 | 997 | > 创建一个新的语音合成对象 998 | 999 | 参数说明: 1000 | 1001 | | 参数 | 类型 | 参数说明 | 1002 | | --------------- | ------------------------- | ----------------------------------------------------- | 1003 | | config | *ConnectionConfig | 见上文建立连接相关内容 | 1004 | | logger | *NlsLogger | 见SDK日志相关内容 | 1005 | | taskfailed | func(string, interface{}) | 识别过程中的错误处理回调,interface{}为用户自定义参数 | 1006 | | synthesisresult | func([]byte, interface{}) | 语音合成数据回调 | 1007 | | metainfo | func(string, interface{}) | 字幕数据回调,需要参数中EnableSubtitle为true | 1008 | | completed | func(string, interface{}) | 合成完毕结果回调 | 1009 | | closed | func(interface{}) | 连接断开回调 | 1010 | | param | interface{} | 用户自定义参数 | 1011 | 1012 | 返回值: 1013 | 1014 | 无 1015 | 1016 | ### 4. func (tts *SpeechSynthesis) Start(text string, param SpeechSynthesisStartParam, extra map[string]interface{}) (chan bool, error) 1017 | 1018 | > 给定文本和参数进行语音合成 1019 | 1020 | 参数说明: 1021 | 1022 | | 参数 | 类型 | 参数说明 | 1023 | | ----- | ----------------------------- | ----------------- | 1024 | | text | string | 待合成文本 | 1025 | | param | SpeechTranscriptionStartParam | 语音合成参数 | 1026 | | extra | map[string]interface{} | 额外key value参数 | 1027 | 1028 | 返回值: 1029 | 1030 | chan bool:语音合成完成通知管道 1031 | 1032 | error:错误异常 1033 | 1034 | ### 5. func (tts *SpeechSynthesis) Shutdown() 1035 | 1036 | > 强制停止语音合成 1037 | 1038 | 参数说明: 1039 | 1040 | 无 1041 | 1042 | 返回值: 1043 | 1044 | 无 1045 | 1046 | 1047 | 1048 | ### 代码示例: 1049 | 1050 | ```python 1051 | package main 1052 | 1053 | import ( 1054 | "errors" 1055 | "flag" 1056 | "fmt" 1057 | "io" 1058 | "log" 1059 | "os" 1060 | "os/signal" 1061 | "sync" 1062 | "time" 1063 | 1064 | "github.com/aliyun/alibabacloud-nls-go-sdk" 1065 | ) 1066 | 1067 | const ( 1068 | AKID = "Your AKID" 1069 | AKKEY = "Your AKKEY" 1070 | //online key 1071 | APPKEY = "Your APPKEY" 1072 | TOKEN = "Your TOKEN" 1073 | ) 1074 | 1075 | type TtsUserParam struct { 1076 | F io.Writer 1077 | Logger *nls.NlsLogger 1078 | } 1079 | 1080 | func onTaskFailed(text string, param interface{}) { 1081 | p, ok := param.(*TtsUserParam) 1082 | if !ok { 1083 | log.Default().Fatal("invalid logger") 1084 | return 1085 | } 1086 | 1087 | p.Logger.Println("TaskFailed:", text) 1088 | } 1089 | 1090 | func onSynthesisResult(data []byte, param interface{}) { 1091 | p, ok := param.(*TtsUserParam) 1092 | if !ok { 1093 | log.Default().Fatal("invalid logger") 1094 | return 1095 | } 1096 | p.F.Write(data) 1097 | } 1098 | 1099 | func onCompleted(text string, param interface{}) { 1100 | p, ok := param.(*TtsUserParam) 1101 | if !ok { 1102 | log.Default().Fatal("invalid logger") 1103 | return 1104 | } 1105 | 1106 | p.Logger.Println("onCompleted:", text) 1107 | } 1108 | 1109 | 1110 | func onClose(param interface{}) { 1111 | p, ok := param.(*TtsUserParam) 1112 | if !ok { 1113 | log.Default().Fatal("invalid logger") 1114 | return 1115 | } 1116 | 1117 | p.Logger.Println("onClosed:") 1118 | } 1119 | 1120 | func waitReady(ch chan bool, logger *nls.NlsLogger) error { 1121 | select { 1122 | case done := <-ch: 1123 | { 1124 | if !done { 1125 | logger.Println("Wait failed") 1126 | return errors.New("wait failed") 1127 | } 1128 | logger.Println("Wait done") 1129 | } 1130 | case <-time.After(60 * time.Second): 1131 | { 1132 | logger.Println("Wait timeout") 1133 | return errors.New("wait timeout") 1134 | } 1135 | } 1136 | return nil 1137 | } 1138 | 1139 | var lk sync.Mutex 1140 | var fail = 0 1141 | var reqNum = 0 1142 | 1143 | const ( 1144 | TEXT = "你好小德,今天天气怎么样。" 1145 | ) 1146 | 1147 | func testMultiInstance(num int) { 1148 | param := nls.DefaultSpeechSynthesisParam() 1149 | //config := nls.NewConnectionConfigWithToken(PRE_URL_WSS, 1150 | // APPKEY, TOKEN) 1151 | config := nls.NewConnectionConfigWithAKInfoDefault(nls.DEFAULT_URL, APPKEY, AKID, AKKEY) 1152 | var wg sync.WaitGroup 1153 | for i := 0; i < num; i++ { 1154 | wg.Add(1) 1155 | go func(id int) { 1156 | defer wg.Done() 1157 | strId := fmt.Sprintf("ID%d ", id) 1158 | fname := fmt.Sprintf("ttsdump%d.wav", id) 1159 | ttsUserParam := new(TtsUserParam) 1160 | fout, err := os.OpenFile(fname, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0666) 1161 | logger := nls.NewNlsLogger(os.Stderr, strId, log.LstdFlags|log.Lmicroseconds) 1162 | logger.SetLogSil(false) 1163 | logger.SetDebug(true) 1164 | logger.Printf("Test Normal Case for SpeechRecognition:%s", strId) 1165 | ttsUserParam.F = fout 1166 | ttsUserParam.Logger = logger 1167 | tts, err := nls.NewSpeechSynthesis(config, logger, 1168 | onTaskFailed, onSynthesisResult, nil, 1169 | onCompleted, onClose, ttsUserParam) 1170 | if err != nil { 1171 | logger.Fatalln(err) 1172 | return 1173 | } 1174 | 1175 | for { 1176 | lk.Lock() 1177 | reqNum++ 1178 | lk.Unlock() 1179 | logger.Println("SR start") 1180 | ch, err := tts.Start(TEXT, param, nil) 1181 | if err != nil { 1182 | lk.Lock() 1183 | fail++ 1184 | lk.Unlock() 1185 | tts.Shutdown() 1186 | continue 1187 | } 1188 | 1189 | err = waitReady(ch, logger) 1190 | if err != nil { 1191 | lk.Lock() 1192 | fail++ 1193 | lk.Unlock() 1194 | tts.Shutdown() 1195 | continue 1196 | } 1197 | logger.Println("Synthesis done") 1198 | tts.Shutdown() 1199 | } 1200 | }(i) 1201 | } 1202 | 1203 | wg.Wait() 1204 | } 1205 | 1206 | func main() { 1207 | coroutineId := flag.Int("num", 1, "coroutine number") 1208 | flag.Parse() 1209 | log.Default().Printf("start %d coroutines", *coroutineId) 1210 | 1211 | c := make(chan os.Signal, 1) 1212 | signal.Notify(c, os.Interrupt) 1213 | go func() { 1214 | for range c { 1215 | lk.Lock() 1216 | log.Printf(">>>>>>>>REQ NUM: %d>>>>>>>>>FAIL: %d", reqNum, fail) 1217 | lk.Unlock() 1218 | os.Exit(0) 1219 | } 1220 | }() 1221 | testMultiInstance(*coroutineId) 1222 | } 1223 | 1224 | ``` 1225 | 1226 | 1227 | 1228 | --------------------------------------------------------------------------------