├── samples ├── .gitignore ├── go.mod ├── synthesizer │ ├── doc.go │ ├── to_audio_data_stream.go │ └── to_speaker.go ├── recognizer │ ├── doc.go │ ├── from_microphone.go │ ├── wrapper.go │ └── from_file.go ├── dialog_service_connector │ ├── doc.go │ ├── from_push_audio_stream.go │ ├── listen_once.go │ └── start_keyword_listening.go ├── helpers │ └── stream_utils.go └── main.go ├── go.mod ├── test_files ├── kws.table ├── peloozoid.wav ├── katiesteve_mono.wav ├── turn_on_the_lamp.wav ├── TalkForAFewSeconds16.wav ├── whats_the_weather_like.wav └── myVoiceIsMyPassportVerifyMe01.wav ├── common ├── doc.go ├── output_format.go ├── interop_utils.go ├── service_property_channel.go ├── profanity_option.go ├── operation_outcome.go ├── synthesis_voice_gender.go ├── speech_synthesis_boundary_type.go ├── synthesis_voice_type.go ├── cancellation_reason.go ├── cancellation_reason_string.go ├── stream_status_string.go ├── stream_status.go ├── cancellation_error_code_string.go ├── cancellation_error_code.go ├── result_reason_string.go ├── property_collection.go ├── result_reason.go ├── error.go └── speech_synthesis_output_format.go ├── audio ├── doc.go ├── interop_utils.go ├── interop_utils_test.go ├── cfunctions.go ├── audio_stream_container_format.go ├── audio_stream_format.go ├── audio_output_stream.go ├── audio_config.go └── audio_input_stream.go ├── .golangci.yaml ├── speech ├── doc.go ├── interop_utils_test.go ├── interop_utils.go ├── recognition_event_args.go ├── keyword_recognition_model.go ├── session_event_args.go ├── speech_synthesis_event_args.go ├── cancellation_details.go ├── speech_recognition_event_args.go ├── conversation_transcription_result.go ├── speech_synthesis_bookmark_event_args.go ├── speech_synthesis_viseme_event_args.go ├── speech_recognition_canceled_event_args.go ├── speech_config_test.go ├── speech_synthesis_word_boundary_event_args.go ├── source_language_config.go ├── translation_callback_helpers.go ├── conversation_callback_helpers.go ├── voice_info.go ├── speech_recognition_result.go ├── auto_detect_source_language_config.go ├── synthesis_voices_result.go ├── speech_synthesis_result.go ├── conversation_transcription_event_args.go ├── audio_data_stream.go ├── cfunctions.go └── translation_recognition_result.go ├── dialog ├── doc.go ├── interop_utils_test.go ├── interop_utils.go ├── cfunctions.go ├── activity_received_event_args.go └── callback_helpers.go ├── CODE_OF_CONDUCT.md ├── .vscode └── settings.json ├── diagnostics ├── error.go └── diagnostics.go ├── .github └── workflows │ ├── lint.yml │ └── go.yml ├── LICENSE ├── ci └── azure-pipelines.yml ├── README.md ├── SECURITY.md └── .gitignore /samples/.gitignore: -------------------------------------------------------------------------------- 1 | samples 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/Microsoft/cognitive-services-speech-sdk-go 2 | 3 | go 1.13 4 | -------------------------------------------------------------------------------- /test_files/kws.table: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/HEAD/test_files/kws.table -------------------------------------------------------------------------------- /test_files/peloozoid.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/HEAD/test_files/peloozoid.wav -------------------------------------------------------------------------------- /test_files/katiesteve_mono.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/HEAD/test_files/katiesteve_mono.wav -------------------------------------------------------------------------------- /test_files/turn_on_the_lamp.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/HEAD/test_files/turn_on_the_lamp.wav -------------------------------------------------------------------------------- /test_files/TalkForAFewSeconds16.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/HEAD/test_files/TalkForAFewSeconds16.wav -------------------------------------------------------------------------------- /test_files/whats_the_weather_like.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/HEAD/test_files/whats_the_weather_like.wav -------------------------------------------------------------------------------- /test_files/myVoiceIsMyPassportVerifyMe01.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/HEAD/test_files/myVoiceIsMyPassportVerifyMe01.wav -------------------------------------------------------------------------------- /samples/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/Microsoft/cognitive-services-speech-sdk-go/samples 2 | 3 | require github.com/Microsoft/cognitive-services-speech-sdk-go v1.33.0 4 | 5 | go 1.13 6 | -------------------------------------------------------------------------------- /samples/synthesizer/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package synthesizer provides samples of text-to-speech 5 | package synthesizer 6 | -------------------------------------------------------------------------------- /common/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package common contains the definitions for many of the shared objects and properties in the Speech SDK 5 | package common -------------------------------------------------------------------------------- /samples/recognizer/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package recognizer provides samples of recognition from microphone or from an audio file 5 | package recognizer -------------------------------------------------------------------------------- /audio/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package audio provides the audio configuration, input/output streams, and related utilities for audio interactions 5 | package audio -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | run: 2 | tests: false 3 | 4 | linters: 5 | enable: 6 | - revive 7 | 8 | issues: 9 | exclude-rules: 10 | - linters: 11 | - govet 12 | text: "OperationOutcome" 13 | - linters: 14 | - golint 15 | - revive 16 | text: "type name will be used" 17 | -------------------------------------------------------------------------------- /speech/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package speech provides functionality for speech recognizers along with their related configuration and event objects 5 | package speech -------------------------------------------------------------------------------- /dialog/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package dialog provides functionality for creating custom voice assistant applications and managing the 5 | // related interaction flow 6 | package dialog -------------------------------------------------------------------------------- /samples/dialog_service_connector/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package dialog_service_connector provides samples of single-turn recognition and keyword recognition 5 | package dialog_service_connector -------------------------------------------------------------------------------- /common/output_format.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // OutputFormat Defines output formats 7 | type OutputFormat int 8 | 9 | const ( 10 | // Simple output format 11 | Simple OutputFormat = 0 12 | // Detailed output format 13 | Detailed OutputFormat = 1 14 | ) 15 | -------------------------------------------------------------------------------- /common/interop_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // #include 7 | import "C" 8 | import "unsafe" 9 | 10 | // SPXHandle is the internal handle type 11 | type SPXHandle uintptr 12 | 13 | func uintptr2handle(h SPXHandle) C.SPXHANDLE { 14 | return (C.SPXHANDLE)(unsafe.Pointer(h)) //nolint:govet 15 | } 16 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /common/service_property_channel.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // ServicePropertyChannel defines channels used to pass property settings to service. 7 | type ServicePropertyChannel int 8 | 9 | const ( 10 | // URIQueryParameter uses URI query parameter to pass property settings to service. 11 | URIQueryParameter ServicePropertyChannel = 0 12 | ) 13 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "NOERROR", 4 | "PSESSION", 5 | "PSYNTHESIS", 6 | "SPXASYNCHANDLE", 7 | "SPXERR", 8 | "SPXEVENTHANDLE", 9 | "SPXHANDLE", 10 | "SPXRECOHANDLE", 11 | "SPXRESULTHANDLE", 12 | "SPXSYNTHHANDLE", 13 | "SSML", 14 | "Visemek", 15 | "golangci", 16 | "golint", 17 | "govet", 18 | "unstaged", 19 | "untracked", 20 | "webm" 21 | ] 22 | } -------------------------------------------------------------------------------- /common/profanity_option.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // ProfanityOption defines the profanity option. 7 | type ProfanityOption int 8 | 9 | const ( 10 | // Masked profanity option. 11 | Masked ProfanityOption = 0 12 | 13 | // Removed profanity option 14 | Removed ProfanityOption = 1 15 | 16 | // Raw profanity option 17 | Raw ProfanityOption = 2 18 | ) 19 | -------------------------------------------------------------------------------- /common/operation_outcome.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // OperationOutcome is the base type of operation outcomes. 7 | type OperationOutcome struct { 8 | // Error is present (not nil) if the operation failed 9 | Error error 10 | } 11 | 12 | // Failed checks if the operation failed 13 | func (outcome OperationOutcome) Failed() bool { 14 | return outcome.Error != nil 15 | } 16 | -------------------------------------------------------------------------------- /diagnostics/error.go: -------------------------------------------------------------------------------- 1 | package diagnostics 2 | 3 | // #include 4 | // #include 5 | import "C" 6 | 7 | import "fmt" 8 | 9 | type diagnosticsError struct { 10 | operation string 11 | code uintptr 12 | } 13 | 14 | func newDiagnosticsError(operation string, code uintptr) error { 15 | return &diagnosticsError{ 16 | operation: operation, 17 | code: code, 18 | } 19 | } 20 | 21 | func (e *diagnosticsError) Error() string { 22 | return fmt.Sprintf("diagnostics operation '%s' failed with error code %d", e.operation, e.code) 23 | } 24 | -------------------------------------------------------------------------------- /common/synthesis_voice_gender.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // SynthesisVoiceGender defines the gender of a synthesis voice. 7 | type SynthesisVoiceGender int 8 | 9 | const ( 10 | // GenderUnknown means the gender is unknown. 11 | GenderUnknown SynthesisVoiceGender = 0 12 | 13 | // Female indicates female. 14 | Female SynthesisVoiceGender = 1 15 | 16 | // Male indicates male. 17 | Male SynthesisVoiceGender = 2 18 | ) 19 | -------------------------------------------------------------------------------- /audio/interop_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | import "C" 14 | 15 | func uintptr2handle(h common.SPXHandle) C.SPXHANDLE { 16 | return (C.SPXHANDLE)(unsafe.Pointer(h)) //nolint:govet 17 | } 18 | 19 | func handle2uintptr(h C.SPXHANDLE) common.SPXHandle { 20 | return (common.SPXHandle)(unsafe.Pointer(h)) //nolint:govet 21 | } 22 | -------------------------------------------------------------------------------- /audio/interop_utils_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "testing" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | func TestHandleConversion(t *testing.T) { 13 | orig := common.SPXHandle(3) 14 | handle := uintptr2handle(orig) 15 | dest := handle2uintptr(handle) 16 | if orig != dest { 17 | t.Error("Values are not equal") 18 | } 19 | if uintptr2handle(dest) != handle { 20 | t.Error("Values are not equal") 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /dialog/interop_utils_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | "testing" 9 | ) 10 | 11 | func TestHandleConversion(t *testing.T) { 12 | orig := common.SPXHandle(3) 13 | handle := uintptr2handle(orig) 14 | dest := handle2uintptr(handle) 15 | if orig != dest { 16 | t.Error("Values are not equal") 17 | } 18 | if uintptr2handle(dest) != handle { 19 | t.Error("Values are not equal") 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /speech/interop_utils_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | "testing" 9 | ) 10 | 11 | func TestHandleConversion(t *testing.T) { 12 | orig := common.SPXHandle(3) 13 | handle := uintptr2handle(orig) 14 | dest := handle2uintptr(handle) 15 | if orig != dest { 16 | t.Error("Values are not equal") 17 | } 18 | if uintptr2handle(dest) != handle { 19 | t.Error("Values are not equal") 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /dialog/interop_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | import "C" 14 | 15 | func uintptr2handle(h common.SPXHandle) C.SPXHANDLE { 16 | return (C.SPXHANDLE)(unsafe.Pointer(h)) //nolint:govet 17 | } 18 | 19 | func handle2uintptr(h C.SPXHANDLE) common.SPXHandle { 20 | return (common.SPXHandle)(unsafe.Pointer(h)) //nolint:govet 21 | } 22 | -------------------------------------------------------------------------------- /speech/interop_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | import "C" 14 | 15 | func uintptr2handle(h common.SPXHandle) C.SPXHANDLE { 16 | return (C.SPXHANDLE)(unsafe.Pointer(h)) //nolint:govet 17 | } 18 | 19 | func handle2uintptr(h C.SPXHANDLE) common.SPXHandle { 20 | return (common.SPXHandle)(unsafe.Pointer(h)) //nolint:govet 21 | } 22 | -------------------------------------------------------------------------------- /common/speech_synthesis_boundary_type.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // SpeechSynthesisBoundaryType defines the boundary type of speech synthesis boundary event. 7 | type SpeechSynthesisBoundaryType int 8 | 9 | const ( 10 | // WordBoundary indicates word boundary. 11 | WordBoundary SpeechSynthesisBoundaryType = 0 12 | 13 | // PunctuationBoundary indicates punctuation boundary. 14 | PunctuationBoundary SpeechSynthesisBoundaryType = 1 15 | 16 | // SentenceBoundary indicates sentence boundary. 17 | SentenceBoundary SpeechSynthesisBoundaryType = 2 18 | ) 19 | -------------------------------------------------------------------------------- /common/synthesis_voice_type.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // SynthesisVoiceType defines the type of a synthesis voice. 7 | type SynthesisVoiceType int 8 | 9 | const ( 10 | // OnlineNeural indicates online neural voice. 11 | OnlineNeural SynthesisVoiceType = 1 12 | 13 | // OnlineStandard indicates online standard voice. 14 | OnlineStandard SynthesisVoiceType = 2 15 | 16 | // OfflineNeural indicates offline neural voice. 17 | OfflineNeural SynthesisVoiceType = 3 18 | 19 | // OfflineStandard indicates offline started voice. 20 | OfflineStandard SynthesisVoiceType = 4 21 | ) 22 | -------------------------------------------------------------------------------- /common/cancellation_reason.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // CancellationReason defines the possible reasons a recognition result might be canceled. 7 | type CancellationReason int 8 | 9 | const ( 10 | // Error indicates that an error occurred during speech recognition. 11 | Error CancellationReason = 1 12 | 13 | // EndOfStream indicates that the end of the audio stream was reached. 14 | EndOfStream CancellationReason = 2 15 | 16 | // CancelledByUser indicates that request was cancelled by the user. 17 | // Added in version 1.17.0 18 | CancelledByUser CancellationReason = 3 19 | ) 20 | 21 | //go:generate stringer -type=CancellationReason -output=cancellation_reason_string.go 22 | -------------------------------------------------------------------------------- /common/cancellation_reason_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=CancellationReason -output=cancellation_reason_string.go"; DO NOT EDIT. 2 | 3 | package common 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[Error-1] 12 | _ = x[EndOfStream-2] 13 | _ = x[CancelledByUser-3] 14 | } 15 | 16 | const _CancellationReason_name = "ErrorEndOfStreamCancelledByUser" 17 | 18 | var _CancellationReason_index = [...]uint8{0, 5, 16, 31} 19 | 20 | func (i CancellationReason) String() string { 21 | i -= 1 22 | if i < 0 || i >= CancellationReason(len(_CancellationReason_index)-1) { 23 | return "CancellationReason(" + strconv.FormatInt(int64(i+1), 10) + ")" 24 | } 25 | return _CancellationReason_name[_CancellationReason_index[i]:_CancellationReason_index[i+1]] 26 | } 27 | -------------------------------------------------------------------------------- /common/stream_status_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=StreamStatus -trimprefix=StreamStatus -output=stream_status_string.go"; DO NOT EDIT. 2 | 3 | package common 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[StreamStatusUnknown-0] 12 | _ = x[StreamStatusNoData-1] 13 | _ = x[StreamStatusPartialData-2] 14 | _ = x[StreamStatusAllData-3] 15 | _ = x[StreamStatusCanceled-4] 16 | } 17 | 18 | const _StreamStatus_name = "UnknownNoDataPartialDataAllDataCanceled" 19 | 20 | var _StreamStatus_index = [...]uint8{0, 7, 13, 24, 31, 39} 21 | 22 | func (i StreamStatus) String() string { 23 | if i < 0 || i >= StreamStatus(len(_StreamStatus_index)-1) { 24 | return "StreamStatus(" + strconv.FormatInt(int64(i), 10) + ")" 25 | } 26 | return _StreamStatus_name[_StreamStatus_index[i]:_StreamStatus_index[i+1]] 27 | } 28 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint code 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | branches: 7 | - master 8 | - main 9 | pull_request: 10 | env: 11 | CARBON_VERSION: "1.42.0" 12 | jobs: 13 | golangci: 14 | name: lint 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v2 18 | - name: Get Carbon 19 | run: | 20 | mkdir /tmp/carbon 21 | pushd /tmp/carbon 22 | wget https://csspeechstorage.blob.core.windows.net/drop/$CARBON_VERSION/SpeechSDK-Linux-$CARBON_VERSION.tar.gz 23 | tar xzf SpeechSDK-Linux-$CARBON_VERSION.tar.gz 24 | rm SpeechSDK-Linux-$CARBON_VERSION.tar.gz 25 | ln -s SpeechSDK-Linux-$CARBON_VERSION current 26 | popd 27 | - name: golangci-lint 28 | uses: golangci/golangci-lint-action@v5 29 | env: 30 | CGO_CFLAGS: "-I/tmp/carbon/current/include/c_api" 31 | CGO_LDFLAGS: "-L/tmp/carbon/current/lib/x64 -lMicrosoft.CognitiveServices.Speech.core" 32 | -------------------------------------------------------------------------------- /samples/helpers/stream_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package helpers 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "io" 10 | "os" 11 | 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 13 | ) 14 | 15 | func PumpFileIntoStream(filename string, stream *audio.PushAudioInputStream) { 16 | file, err := os.Open(filename) 17 | if err != nil { 18 | fmt.Println("Error opening file: ", err) 19 | return 20 | } 21 | defer file.Close() 22 | reader := bufio.NewReader(file) 23 | buffer := make([]byte, 1000) 24 | for { 25 | n, err := reader.Read(buffer) 26 | if err == io.EOF { 27 | fmt.Println("Done reading file.") 28 | break 29 | } 30 | if err != nil { 31 | fmt.Println("Error reading file: ", err) 32 | break 33 | } 34 | err = stream.Write(buffer[0:n]) 35 | if err != nil { 36 | fmt.Println("Error writing to the stream") 37 | } 38 | } 39 | stream.CloseStream() 40 | } 41 | -------------------------------------------------------------------------------- /common/stream_status.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // StreamStatus defines the possible status of audio data stream. 7 | type StreamStatus int 8 | 9 | const ( 10 | // StreamStatusUnknown indicates the audio data stream status is unknown. 11 | StreamStatusUnknown StreamStatus = 0 12 | 13 | // StreamStatusNoData indicates that the audio data stream contains no data. 14 | StreamStatusNoData StreamStatus = 1 15 | 16 | // StreamStatusPartialData indicates the audio data stream contains partial data of a speak request. 17 | StreamStatusPartialData StreamStatus = 2 18 | 19 | // StreamStatusAllData indicates the audio data stream contains all data of a speak request. 20 | StreamStatusAllData StreamStatus = 3 21 | 22 | // StreamStatusCanceled indicates the audio data stream was canceled. 23 | StreamStatusCanceled StreamStatus = 4 24 | ) 25 | 26 | //go:generate stringer -type=StreamStatus -trimprefix=StreamStatus -output=stream_status_string.go 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /common/cancellation_error_code_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=CancellationErrorCode -output=cancellation_error_code_string.go"; DO NOT EDIT. 2 | 3 | package common 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[NoError-0] 12 | _ = x[AuthenticationFailure-1] 13 | _ = x[BadRequest-2] 14 | _ = x[TooManyRequests-3] 15 | _ = x[Forbidden-4] 16 | _ = x[ConnectionFailure-5] 17 | _ = x[ServiceTimeout-6] 18 | _ = x[ServiceError-7] 19 | _ = x[ServiceUnavailable-8] 20 | _ = x[RuntimeError-9] 21 | } 22 | 23 | const _CancellationErrorCode_name = "NoErrorAuthenticationFailureBadRequestTooManyRequestsForbiddenConnectionFailureServiceTimeoutServiceErrorServiceUnavailableRuntimeError" 24 | 25 | var _CancellationErrorCode_index = [...]uint8{0, 7, 28, 38, 53, 62, 79, 93, 105, 123, 135} 26 | 27 | func (i CancellationErrorCode) String() string { 28 | if i < 0 || i >= CancellationErrorCode(len(_CancellationErrorCode_index)-1) { 29 | return "CancellationErrorCode(" + strconv.FormatInt(int64(i), 10) + ")" 30 | } 31 | return _CancellationErrorCode_name[_CancellationErrorCode_index[i]:_CancellationErrorCode_index[i+1]] 32 | } 33 | -------------------------------------------------------------------------------- /speech/recognition_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | import "C" 13 | 14 | // RecognitionEventArgs represents the recognition event arguments. 15 | type RecognitionEventArgs struct { 16 | SessionEventArgs 17 | Offset uint64 18 | } 19 | 20 | // NewRecognitionEventArgsFromHandle creates the object from the handle (for internal use) 21 | func NewRecognitionEventArgsFromHandle(handle common.SPXHandle) (*RecognitionEventArgs, error) { 22 | base, err := NewSessionEventArgsFromHandle(handle) 23 | if err != nil { 24 | return nil, err 25 | } 26 | var offset C.uint64_t 27 | ret := uintptr(C.recognizer_recognition_event_get_offset(uintptr2handle(handle), &offset)) 28 | if ret != C.SPX_NOERROR { 29 | return nil, common.NewCarbonError(ret) 30 | } 31 | event := new(RecognitionEventArgs) 32 | event.SessionEventArgs = *base 33 | event.Offset = uint64(offset) 34 | return event, nil 35 | } 36 | 37 | // RecognitionEventHandler is the type of the event handler that receives RecognitionEventArgs 38 | type RecognitionEventHandler func(event RecognitionEventArgs) 39 | -------------------------------------------------------------------------------- /speech/keyword_recognition_model.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | import "C" 13 | import "unsafe" 14 | 15 | // KeywordRecognitionModel represents the keyword recognition model used with StartKeywordRecognitionAsync methods. 16 | type KeywordRecognitionModel struct { 17 | handle C.SPXHANDLE 18 | } 19 | 20 | // Close disposes the associated resources. 21 | func (model KeywordRecognitionModel) Close() { 22 | C.keyword_recognition_model_handle_release(model.handle) 23 | } 24 | 25 | // GetHandle gets the handle to the resource (for internal use) 26 | func (model KeywordRecognitionModel) GetHandle() common.SPXHandle { 27 | return handle2uintptr(model.handle) 28 | } 29 | 30 | /// NewKeywordRecognitionModelFromFile creates a keyword recognition model using the specified file. 31 | func NewKeywordRecognitionModelFromFile(filename string) (*KeywordRecognitionModel, error) { 32 | var handle C.SPXHANDLE 33 | f := C.CString(filename) 34 | defer C.free(unsafe.Pointer(f)) 35 | ret := uintptr(C.keyword_recognition_model_create_from_file(f, &handle)) 36 | if ret != C.SPX_NOERROR { 37 | return nil, common.NewCarbonError(ret) 38 | } 39 | model := new(KeywordRecognitionModel) 40 | model.handle = handle 41 | return model, nil 42 | } 43 | -------------------------------------------------------------------------------- /speech/session_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | import "C" 15 | 16 | // SessionEventArgs represents the session event arguments. 17 | type SessionEventArgs struct { 18 | handle C.SPXHANDLE 19 | // SessionID Session identifier (a GUID in string format). 20 | SessionID string 21 | } 22 | 23 | // Close releases the underlying resources. 24 | func (event SessionEventArgs) Close() { 25 | C.recognizer_event_handle_release(event.handle) 26 | } 27 | 28 | // NewSessionEventArgsFromHandle creates the object from the handle (for internal use) 29 | func NewSessionEventArgsFromHandle(handle common.SPXHandle) (*SessionEventArgs, error) { 30 | buffer := C.malloc(C.sizeof_char * 37) 31 | defer C.free(unsafe.Pointer(buffer)) 32 | ret := uintptr(C.recognizer_session_event_get_session_id(uintptr2handle(handle), (*C.char)(buffer), 37)) 33 | if ret != C.SPX_NOERROR { 34 | return nil, common.NewCarbonError(ret) 35 | } 36 | event := new(SessionEventArgs) 37 | event.handle = uintptr2handle(handle) 38 | event.SessionID = C.GoString((*C.char)(buffer)) 39 | return event, nil 40 | } 41 | 42 | // SessionEventHandler is the type of the event handler that receives SessionEventArgs 43 | type SessionEventHandler func(event SessionEventArgs) 44 | -------------------------------------------------------------------------------- /speech/speech_synthesis_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | import "C" 13 | 14 | // SpeechSynthesisEventArgs represents the speech synthesis event arguments. 15 | type SpeechSynthesisEventArgs struct { 16 | handle C.SPXHANDLE 17 | Result SpeechSynthesisResult 18 | } 19 | 20 | // Close releases the underlying resources 21 | func (event SpeechSynthesisEventArgs) Close() { 22 | event.Result.Close() 23 | C.synthesizer_event_handle_release(event.handle) 24 | } 25 | 26 | // NewSpeechSynthesisEventArgsFromHandle creates the object from the handle (for internal use) 27 | func NewSpeechSynthesisEventArgsFromHandle(handle common.SPXHandle) (*SpeechSynthesisEventArgs, error) { 28 | event := new(SpeechSynthesisEventArgs) 29 | event.handle = uintptr2handle(handle) 30 | var resultHandle C.SPXHANDLE 31 | ret := uintptr(C.synthesizer_synthesis_event_get_result(event.handle, &resultHandle)) 32 | if ret != C.SPX_NOERROR { 33 | return nil, common.NewCarbonError(ret) 34 | } 35 | result, err := NewSpeechSynthesisResultFromHandle(handle2uintptr(resultHandle)) 36 | if err != nil { 37 | return nil, err 38 | } 39 | event.Result = *result 40 | return event, nil 41 | } 42 | 43 | // SpeechSynthesisEventHandler is the type of the event handler that receives SpeechSynthesisEventArgs 44 | type SpeechSynthesisEventHandler func(event SpeechSynthesisEventArgs) 45 | -------------------------------------------------------------------------------- /speech/cancellation_details.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // 13 | import "C" 14 | 15 | // CancellationDetails contains detailed information about why a result was canceled. 16 | // Added in version 1.17.0 17 | type CancellationDetails struct { 18 | Reason common.CancellationReason 19 | ErrorCode common.CancellationErrorCode 20 | ErrorDetails string 21 | } 22 | 23 | // NewCancellationDetailsFromSpeechSynthesisResult creates the object from the speech synthesis result. 24 | func NewCancellationDetailsFromSpeechSynthesisResult(result *SpeechSynthesisResult) (*CancellationDetails, error) { 25 | cancellationDetails := new(CancellationDetails) 26 | /* Reason */ 27 | var cReason C.Result_CancellationReason 28 | ret := uintptr(C.synth_result_get_reason_canceled(result.handle, &cReason)) 29 | if ret != C.SPX_NOERROR { 30 | return nil, common.NewCarbonError(ret) 31 | } 32 | cancellationDetails.Reason = (common.CancellationReason)(cReason) 33 | /* ErrorCode */ 34 | var cCode C.Result_CancellationErrorCode 35 | ret = uintptr(C.synth_result_get_canceled_error_code(result.handle, &cCode)) 36 | if ret != C.SPX_NOERROR { 37 | return nil, common.NewCarbonError(ret) 38 | } 39 | cancellationDetails.ErrorCode = (common.CancellationErrorCode)(cCode) 40 | cancellationDetails.ErrorDetails = result.Properties.GetProperty(common.CancellationDetailsReasonDetailedText, "") 41 | return cancellationDetails, nil 42 | } 43 | -------------------------------------------------------------------------------- /audio/cfunctions.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | // This file defines the proxy functions required to use callbacks 7 | 8 | // #include 9 | // #include 10 | // #include 11 | // extern int cgoAudioCallReadCallback(SPXHANDLE handle, uint8_t *buffer, uint32_t size); 12 | // extern void cgoAudioCallGetPropertyCallback(SPXHANDLE handle, int id, uint8_t *value, uint32_t size); 13 | // extern void cgoAudioCallCloseCallback(SPXHANDLE handle); 14 | // 15 | // int cgo_audio_read_callback_wrapper(void *context, uint8_t *buffer, uint32_t size) 16 | // { 17 | // return cgoAudioCallReadCallback((SPXHANDLE)context, buffer, size); 18 | // } 19 | // 20 | // void cgo_audio_get_property_callback_wrapper(void* context, int id, uint8_t* value, uint32_t size) 21 | // { 22 | // cgoAudioCallGetPropertyCallback((SPXHANDLE)context, id, value, size); 23 | // } 24 | // 25 | // void cgo_audio_close_callback_wrapper(void *context) 26 | // { 27 | // cgoAudioCallCloseCallback((SPXHANDLE)context); 28 | // } 29 | // 30 | // extern int cgoAudioOutputCallWriteCallback(SPXHANDLE handle, uint8_t *buffer, uint32_t size); 31 | // extern void cgoAudioOutputCallCloseCallback(SPXHANDLE handle); 32 | // 33 | // int cgo_audio_push_stream_write_callback_wrapper(void *context, uint8_t* buffer, uint32_t size) 34 | // { 35 | // return cgoAudioOutputCallWriteCallback((SPXHANDLE)context, buffer, size); 36 | // } 37 | // 38 | // void cgo_audio_push_stream_close_callback_wrapper(void *context) 39 | // { 40 | // cgoAudioOutputCallCloseCallback((SPXHANDLE)context); 41 | // } 42 | import "C" 43 | -------------------------------------------------------------------------------- /speech/speech_recognition_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | import "C" 13 | 14 | // SpeechRecognitionEventArgs represents the speech recognition event arguments. 15 | type SpeechRecognitionEventArgs struct { 16 | RecognitionEventArgs 17 | handle C.SPXHANDLE 18 | Result SpeechRecognitionResult 19 | } 20 | 21 | // Close releases the underlying resources 22 | func (event SpeechRecognitionEventArgs) Close() { 23 | event.RecognitionEventArgs.Close() 24 | event.Result.Close() 25 | } 26 | 27 | // NewSpeechRecognitionEventArgsFromHandle creates the object from the handle (for internal use) 28 | func NewSpeechRecognitionEventArgsFromHandle(handle common.SPXHandle) (*SpeechRecognitionEventArgs, error) { 29 | base, err := NewRecognitionEventArgsFromHandle(handle) 30 | if err != nil { 31 | return nil, err 32 | } 33 | event := new(SpeechRecognitionEventArgs) 34 | event.RecognitionEventArgs = *base 35 | event.handle = uintptr2handle(handle) 36 | var resultHandle C.SPXHANDLE 37 | ret := uintptr(C.recognizer_recognition_event_get_result(event.handle, &resultHandle)) 38 | if ret != C.SPX_NOERROR { 39 | return nil, common.NewCarbonError(ret) 40 | } 41 | result, err := NewSpeechRecognitionResultFromHandle(handle2uintptr(resultHandle)) 42 | if err != nil { 43 | return nil, err 44 | } 45 | event.Result = *result 46 | return event, nil 47 | } 48 | 49 | // SpeechRecognitionEventHandler is the type of the event handler that receives SpeechRecognitionEventArgs 50 | type SpeechRecognitionEventHandler func(event SpeechRecognitionEventArgs) 51 | -------------------------------------------------------------------------------- /ci/azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | 2 | resources: 3 | - repo: self 4 | clean: true 5 | 6 | trigger: 7 | branches: 8 | include: 9 | - master 10 | 11 | schedules: 12 | - cron: "0 18 * * 6" 13 | displayName: "Saturday Evening Schedule" 14 | branches: 15 | include: 16 | - master 17 | 18 | pool: 19 | vmImage: ubuntu-latest 20 | variables: 21 | CARBON_VERSION: "1.42.0" 22 | 23 | steps: 24 | - task: GoTool@0 25 | inputs: 26 | version: '1.13' 27 | - script: | 28 | go version 29 | go get -v -t -d ./... 30 | if [ -f Gopkg.toml ]; then 31 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 32 | dep ensure 33 | fi 34 | displayName: 'Get dependencies' 35 | - task: ComponentGovernanceComponentDetection@0 36 | inputs: 37 | scanType: 'Register' 38 | verbosity: 'Verbose' 39 | alertWarningLevel: 'High' 40 | sourceScanPath: $(Build.SourcesDirectory) 41 | - script: | 42 | mkdir $HOME/carbon 43 | pushd $HOME/carbon 44 | wget https://csspeechstorage.blob.core.windows.net/drop/$(CARBON_VERSION)/SpeechSDK-Linux-$(CARBON_VERSION).tar.gz 45 | tar xzf SpeechSDK-Linux-$(CARBON_VERSION).tar.gz 46 | rm SpeechSDK-Linux-$(CARBON_VERSION).tar.gz 47 | ln -s SpeechSDK-Linux-$(CARBON_VERSION) current 48 | popd 49 | displayName: 'Get Speech SDK' 50 | - script: | 51 | sudo apt-get update 52 | sudo apt-get install libasound2 53 | sudo apt-get install -y libgstreamer1.0-0 gstreamer1.0-plugins-good 54 | dpkg -l | grep gstreamer 55 | displayName: 'Get Speech SDK dependencies' 56 | - script: | 57 | export CGO_CFLAGS="-I$HOME/carbon/current/include/c_api" 58 | export CGO_LDFLAGS="-L$HOME/carbon/current/lib/x64 -lMicrosoft.CognitiveServices.Speech.core" 59 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HOME/carbon/current/lib/x64" 60 | go build -v ./... 61 | displayName: 'Build' 62 | -------------------------------------------------------------------------------- /audio/audio_stream_container_format.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | // AudioStreamContainerFormat defines supported audio stream container format. 7 | type AudioStreamContainerFormat int //nolint:revive 8 | 9 | const ( 10 | // OGGOPUS Stream ContainerFormat definition for OGG OPUS. 11 | OGGOPUS AudioStreamContainerFormat = 0x101 12 | 13 | // MP3 Stream ContainerFormat definition for MP3. 14 | MP3 AudioStreamContainerFormat = 0x102 15 | 16 | // FLAC Stream ContainerFormat definition for FLAC. 17 | FLAC AudioStreamContainerFormat = 0x103 18 | 19 | // ALAW Stream ContainerFormat definition for ALAW. 20 | ALAW AudioStreamContainerFormat = 0x104 21 | 22 | // MULAW Stream ContainerFormat definition for MULAW. 23 | MULAW AudioStreamContainerFormat = 0x105 24 | 25 | // AMRNB Stream ContainerFormat definition for AMRNB. Currently not supported. 26 | AMRNB AudioStreamContainerFormat = 0x106 27 | 28 | // AMRWB Stream ContainerFormat definition for AMRWB. Currently not supported. 29 | AMRWB AudioStreamContainerFormat = 0x107 30 | 31 | // ANY Stream ContainerFormat definition when the actual stream format is not known. 32 | ANY AudioStreamContainerFormat = 0x108 33 | ) 34 | 35 | // AudioStreamWaveFormat represents the format specified inside WAV container which are sent directly as encoded to the speech service. 36 | type AudioStreamWaveFormat int //nolint:revive 37 | 38 | const ( 39 | // AudioStreamWaveFormat definition for PCM (pulse-code modulated) data in integer format. 40 | WavePCM AudioStreamWaveFormat = 0x0001 41 | 42 | // AudioStreamWaveFormat definition A-law-encoded format. 43 | WaveALAW AudioStreamWaveFormat = 0x0006 44 | 45 | // AudioStreamWaveFormat definition for Mu-law-encoded format. 46 | WaveMULAW AudioStreamWaveFormat = 0x0007 47 | 48 | ) 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | This project contains Golang binding for the Microsoft Cognitive Service Speech SDK. 4 | 5 | # Getting Started 6 | 7 | Check the [Speech SDK Setup documentation for Go](https://docs.microsoft.com/azure/cognitive-services/speech-service/quickstarts/setup-platform?tabs=dotnet%2Cwindows%2Cjre%2Cbrowser&pivots=programming-language-go) 8 | 9 | Get started with [speech-to-text sample for Go](https://docs.microsoft.com/azure/cognitive-services/speech-service/get-started-speech-to-text?tabs=windowsinstall&pivots=programming-language-go) 10 | 11 | Get started with [text-to-speech sample for Go](https://docs.microsoft.com/azure/cognitive-services/speech-service/get-started-text-to-speech?tabs=script%2Cwindowsinstall&pivots=programming-language-go) 12 | 13 | This project requires Go 1.13 14 | 15 | # Reference 16 | 17 | Reference documentation for these packages is available at http://aka.ms/csspeech/goref 18 | 19 | # Contributing 20 | 21 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 22 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 23 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 24 | 25 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 26 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 27 | provided by the bot. You will only need to do this once across all repos using our CLA. 28 | 29 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 30 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 31 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 32 | -------------------------------------------------------------------------------- /speech/conversation_transcription_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | import "C" 18 | 19 | // ConversationTranscriptionResult contains detailed information about result of a conversation transcription operation. 20 | type ConversationTranscriptionResult struct { 21 | SpeechRecognitionResult // Embedded for common fields 22 | SpeakerID string 23 | } 24 | 25 | // NewConversationTranscriptionResultFromHandle creates a ConversationTranscriptionResult from a handle (for internal use) 26 | func NewConversationTranscriptionResultFromHandle(handle common.SPXHandle) (*ConversationTranscriptionResult, error) { 27 | // Create base result first 28 | baseResult, err := NewSpeechRecognitionResultFromHandle(handle) 29 | if err != nil { 30 | return nil, err 31 | } 32 | 33 | result := &ConversationTranscriptionResult{ 34 | SpeechRecognitionResult: *baseResult, 35 | } 36 | 37 | // Get speaker ID 38 | buffer := C.malloc(C.sizeof_char * 1024) 39 | defer C.free(unsafe.Pointer(buffer)) 40 | 41 | ret := uintptr(C.conversation_transcription_result_get_speaker_id(result.handle, (*C.char)(buffer), 1024)) 42 | if ret != C.SPX_NOERROR { 43 | return nil, common.NewCarbonError(ret) 44 | } 45 | result.SpeakerID = C.GoString((*C.char)(buffer)) 46 | 47 | return result, nil 48 | } 49 | 50 | // Close releases the underlying resources 51 | func (result ConversationTranscriptionResult) Close() { 52 | // Only call the base Close since we don't have additional resources to clean up 53 | result.SpeechRecognitionResult.Close() 54 | } -------------------------------------------------------------------------------- /speech/speech_synthesis_bookmark_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // #include 13 | import "C" 14 | 15 | // SpeechSynthesisBookmarkEventArgs represents the speech synthesis bookmark event arguments. 16 | type SpeechSynthesisBookmarkEventArgs struct { 17 | handle C.SPXHANDLE 18 | 19 | // AudioOffset is the audio offset of the bookmark event, in ticks (100 nanoseconds). 20 | AudioOffset uint64 21 | 22 | // Text is the text of the bookmark. 23 | Text string 24 | } 25 | 26 | // Close releases the underlying resources 27 | func (event SpeechSynthesisBookmarkEventArgs) Close() { 28 | C.synthesizer_event_handle_release(event.handle) 29 | } 30 | 31 | // NewSpeechSynthesisBookmarkEventArgsFromHandle creates the object from the handle (for internal use) 32 | func NewSpeechSynthesisBookmarkEventArgsFromHandle(handle common.SPXHandle) (*SpeechSynthesisBookmarkEventArgs, error) { 33 | event := new(SpeechSynthesisBookmarkEventArgs) 34 | event.handle = uintptr2handle(handle) 35 | /* AudioOffset */ 36 | var cAudioOffset C.uint64_t 37 | ret := uintptr(C.synthesizer_bookmark_event_get_values(event.handle, &cAudioOffset)) 38 | if ret != C.SPX_NOERROR { 39 | return nil, common.NewCarbonError(ret) 40 | } 41 | event.AudioOffset = uint64(cAudioOffset) 42 | /* Text */ 43 | value := C.synthesizer_event_get_text(event.handle) 44 | event.Text = C.GoString(value) 45 | C.property_bag_free_string(value) 46 | return event, nil 47 | } 48 | 49 | // SpeechSynthesisBookmarkEventHandler is the type of the event handler that receives SpeechSynthesisBookmarkEventArgs 50 | type SpeechSynthesisBookmarkEventHandler func(event SpeechSynthesisBookmarkEventArgs) 51 | -------------------------------------------------------------------------------- /common/cancellation_error_code.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // CancellationErrorCode defines error code in case that CancellationReason is Error. 7 | type CancellationErrorCode int 8 | 9 | const ( 10 | 11 | // NoError if CancellationReason is EndOfStream, CancellationErrorCode 12 | // is set to NoError. 13 | NoError CancellationErrorCode = 0 14 | 15 | // AuthenticationFailure indicates an authentication error. 16 | // An authentication error occurs if subscription key or authorization token is invalid, expired, 17 | // or does not match the region being used. 18 | AuthenticationFailure CancellationErrorCode = 1 19 | 20 | // BadRequest indicates that one or more recognition parameters are invalid or the audio format is not supported. 21 | BadRequest CancellationErrorCode = 2 22 | 23 | // TooManyRequests indicates that the number of parallel requests exceeded the number of allowed concurrent transcriptions for the subscription. 24 | TooManyRequests CancellationErrorCode = 3 25 | 26 | // Forbidden indicates that the free subscription used by the request ran out of quota. 27 | Forbidden CancellationErrorCode = 4 28 | 29 | // ConnectionFailure indicates a connection error. 30 | ConnectionFailure CancellationErrorCode = 5 31 | 32 | // ServiceTimeout indicates a time-out error when waiting for response from service. 33 | ServiceTimeout CancellationErrorCode = 6 34 | 35 | // ServiceError indicates that an error is returned by the service. 36 | ServiceError CancellationErrorCode = 7 37 | 38 | // ServiceUnavailable indicates that the service is currently unavailable. 39 | ServiceUnavailable CancellationErrorCode = 8 40 | 41 | // RuntimeError indicates an unexpected runtime error. 42 | RuntimeError CancellationErrorCode = 9 43 | ) 44 | 45 | //go:generate stringer -type=CancellationErrorCode -output=cancellation_error_code_string.go 46 | -------------------------------------------------------------------------------- /dialog/cfunctions.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog 5 | 6 | // This file defines the proxy functions required to use callbacks 7 | 8 | // #include 9 | // #include 10 | // extern void dialogFireEventSessionStarted(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 11 | // 12 | // void cgo_dialog_session_started(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 13 | // { 14 | // dialogFireEventSessionStarted(handle, event); 15 | // } 16 | // 17 | // extern void dialogFireEventSessionStopped(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 18 | // 19 | // void cgo_dialog_session_stopped(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 20 | // { 21 | // dialogFireEventSessionStopped(handle, event); 22 | // } 23 | // 24 | // extern void dialogFireEventRecognized(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 25 | // 26 | // void cgo_dialog_recognized(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 27 | // { 28 | // dialogFireEventRecognized(handle, event); 29 | // } 30 | // 31 | // extern void dialogFireEventRecognizing(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 32 | // 33 | // void cgo_dialog_recognizing(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 34 | // { 35 | // dialogFireEventRecognizing(handle, event); 36 | // } 37 | // 38 | // extern void dialogFireEventCanceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 39 | // 40 | // void cgo_dialog_canceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 41 | // { 42 | // dialogFireEventCanceled(handle, event); 43 | // } 44 | // 45 | // extern void dialogFireEventActivityReceived(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 46 | // 47 | // void cgo_dialog_activity_received(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 48 | // { 49 | // dialogFireEventActivityReceived(handle, event); 50 | // } 51 | // 52 | import "C" 53 | -------------------------------------------------------------------------------- /common/result_reason_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=ResultReason -output=result_reason_string.go"; DO NOT EDIT. 2 | 3 | package common 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[NoMatch-0] 12 | _ = x[Canceled-1] 13 | _ = x[RecognizingSpeech-2] 14 | _ = x[RecognizedSpeech-3] 15 | _ = x[RecognizingIntent-4] 16 | _ = x[RecognizedIntent-5] 17 | _ = x[TranslatingSpeech-6] 18 | _ = x[TranslatedSpeech-7] 19 | _ = x[SynthesizingAudio-8] 20 | _ = x[SynthesizingAudioCompleted-9] 21 | _ = x[RecognizingKeyword-10] 22 | _ = x[RecognizedKeyword-11] 23 | _ = x[SynthesizingAudioStarted-12] 24 | _ = x[EnrollingVoiceProfile-17] 25 | _ = x[EnrolledVoiceProfile-18] 26 | _ = x[RecognizedSpeakers-19] 27 | _ = x[RecognizedSpeaker-20] 28 | _ = x[ResetVoiceProfile-21] 29 | _ = x[DeletedVoiceProfile-22] 30 | _ = x[VoicesListRetrieved-23] 31 | } 32 | 33 | const ( 34 | _ResultReason_name_0 = "NoMatchCanceledRecognizingSpeechRecognizedSpeechRecognizingIntentRecognizedIntentTranslatingSpeechTranslatedSpeechSynthesizingAudioSynthesizingAudioCompletedRecognizingKeywordRecognizedKeywordSynthesizingAudioStarted" 35 | _ResultReason_name_1 = "EnrollingVoiceProfileEnrolledVoiceProfileRecognizedSpeakersRecognizedSpeakerResetVoiceProfileDeletedVoiceProfileVoicesListRetrieved" 36 | ) 37 | 38 | var ( 39 | _ResultReason_index_0 = [...]uint8{0, 7, 15, 32, 48, 65, 81, 98, 114, 131, 157, 175, 192, 216} 40 | _ResultReason_index_1 = [...]uint8{0, 21, 41, 59, 76, 93, 112, 131} 41 | ) 42 | 43 | func (i ResultReason) String() string { 44 | switch { 45 | case 0 <= i && i <= 12: 46 | return _ResultReason_name_0[_ResultReason_index_0[i]:_ResultReason_index_0[i+1]] 47 | case 17 <= i && i <= 23: 48 | i -= 17 49 | return _ResultReason_name_1[_ResultReason_index_1[i]:_ResultReason_index_1[i+1]] 50 | default: 51 | return "ResultReason(" + strconv.FormatInt(int64(i), 10) + ")" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /speech/speech_synthesis_viseme_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // #include 13 | import "C" 14 | 15 | // SpeechSynthesisVisemeEventArgs represents the speech synthesis viseme event arguments. 16 | type SpeechSynthesisVisemeEventArgs struct { 17 | handle C.SPXHANDLE 18 | 19 | // AudioOffset is the audio offset of the viseme event, in ticks (100 nanoseconds). 20 | AudioOffset uint64 21 | 22 | // VisemeID is the viseme ID. 23 | VisemeID uint 24 | 25 | // Animation is the animation. 26 | Animation string 27 | } 28 | 29 | // Close releases the underlying resources 30 | func (event SpeechSynthesisVisemeEventArgs) Close() { 31 | C.synthesizer_event_handle_release(event.handle) 32 | } 33 | 34 | // NewSpeechSynthesisVisemeEventArgsFromHandle creates the object from the handle (for internal use) 35 | func NewSpeechSynthesisVisemeEventArgsFromHandle(handle common.SPXHandle) (*SpeechSynthesisVisemeEventArgs, error) { 36 | event := new(SpeechSynthesisVisemeEventArgs) 37 | event.handle = uintptr2handle(handle) 38 | /* AudioOffset and VisemeID */ 39 | var cAudioOffset C.uint64_t 40 | var cVisemeID C.uint32_t 41 | ret := uintptr(C.synthesizer_viseme_event_get_values(event.handle, &cAudioOffset, &cVisemeID)) 42 | if ret != C.SPX_NOERROR { 43 | return nil, common.NewCarbonError(ret) 44 | } 45 | event.AudioOffset = uint64(cAudioOffset) 46 | event.VisemeID = uint(cVisemeID) 47 | /* Animation */ 48 | value := C.synthesizer_viseme_event_get_animation(event.handle) 49 | event.Animation = C.GoString(value) 50 | C.property_bag_free_string(value) 51 | return event, nil 52 | } 53 | 54 | // SpeechSynthesisVisemeEventHandler is the type of the event handler that receives SpeechSynthesisVisemeEventArgs 55 | type SpeechSynthesisVisemeEventHandler func(event SpeechSynthesisVisemeEventArgs) 56 | -------------------------------------------------------------------------------- /speech/speech_recognition_canceled_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | import "C" 13 | 14 | // SpeechRecognitionCanceledEventArgs represents speech recognition canceled event arguments. 15 | type SpeechRecognitionCanceledEventArgs struct { 16 | SpeechRecognitionEventArgs 17 | Reason common.CancellationReason 18 | ErrorCode common.CancellationErrorCode 19 | ErrorDetails string 20 | } 21 | 22 | // NewSpeechRecognitionCanceledEventArgsFromHandle creates the object from the handle (for internal use) 23 | func NewSpeechRecognitionCanceledEventArgsFromHandle(handle common.SPXHandle) (*SpeechRecognitionCanceledEventArgs, error) { 24 | baseArgs, err := NewSpeechRecognitionEventArgsFromHandle(handle) 25 | if err != nil { 26 | return nil, err 27 | } 28 | event := new(SpeechRecognitionCanceledEventArgs) 29 | event.SpeechRecognitionEventArgs = *baseArgs 30 | /* Reason */ 31 | var cReason C.Result_CancellationReason 32 | ret := uintptr(C.result_get_reason_canceled(event.Result.handle, &cReason)) 33 | if ret != C.SPX_NOERROR { 34 | event.Close() 35 | return nil, common.NewCarbonError(ret) 36 | } 37 | event.Reason = (common.CancellationReason)(cReason) 38 | /* ErrorCode */ 39 | var cCode C.Result_CancellationErrorCode 40 | ret = uintptr(C.result_get_canceled_error_code(event.Result.handle, &cCode)) 41 | if ret != C.SPX_NOERROR { 42 | event.Close() 43 | return nil, common.NewCarbonError(ret) 44 | } 45 | event.ErrorCode = (common.CancellationErrorCode)(cCode) 46 | event.ErrorDetails = event.Result.Properties.GetProperty(common.SpeechServiceResponseJSONErrorDetails, "") 47 | return event, nil 48 | } 49 | 50 | // SpeechRecognitionCanceledEventHandler is the type of the event handler that receives SpeechRecognitionCanceledEventArgs 51 | type SpeechRecognitionCanceledEventHandler func(event SpeechRecognitionCanceledEventArgs) 52 | -------------------------------------------------------------------------------- /speech/speech_config_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | "testing" 9 | ) 10 | 11 | func TestFromSubscription(t *testing.T) { 12 | subscription := "test" 13 | region := "region" 14 | config, err := NewSpeechConfigFromSubscription(subscription, region) 15 | if err != nil { 16 | t.Error("Unexpected error") 17 | } 18 | if config.SubscriptionKey() != subscription { 19 | t.Error("Subscription not properly set") 20 | } 21 | if config.Region() != region { 22 | t.Error("Region not properly set") 23 | } 24 | } 25 | 26 | func TestFromAuthorizationToken(t *testing.T) { 27 | auth := "test" 28 | region := "region" 29 | config, err := NewSpeechConfigFromAuthorizationToken(auth, region) 30 | if err != nil { 31 | t.Error("Unexpected error") 32 | } 33 | if config.AuthorizationToken() != auth { 34 | t.Error("Authorization Token not properly set") 35 | } 36 | if config.Region() != region { 37 | t.Error("Region not properly set") 38 | } 39 | } 40 | 41 | func TestPropertiesByID(t *testing.T) { 42 | subscription := "test" 43 | region := "region" 44 | config, err := NewSpeechConfigFromSubscription(subscription, region) 45 | if err != nil { 46 | t.Error("Unexpected error") 47 | } 48 | value := "value1" 49 | err = config.SetProperty(common.SpeechServiceConnectionKey, value) 50 | if err != nil { 51 | t.Error("Unexpected error") 52 | } 53 | if config.GetProperty(common.SpeechServiceConnectionKey) != value { 54 | t.Error("Propery value not valid") 55 | } 56 | } 57 | 58 | func TestPropertiesByString(t *testing.T) { 59 | subscription := "test" 60 | region := "region" 61 | config, err := NewSpeechConfigFromSubscription(subscription, region) 62 | if err != nil { 63 | t.Error("Unexpected error") 64 | } 65 | value := "value1" 66 | err = config.SetPropertyByString("key1", value) 67 | if err != nil { 68 | t.Error("Unexpected error") 69 | } 70 | if config.GetPropertyByString("key1") != value { 71 | t.Error("Propery value not valid") 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /samples/recognizer/from_microphone.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package recognizer 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "os" 10 | 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 13 | ) 14 | 15 | func sessionStartedHandler(event speech.SessionEventArgs) { 16 | defer event.Close() 17 | fmt.Println("Session Started (ID=", event.SessionID, ")") 18 | } 19 | 20 | func sessionStoppedHandler(event speech.SessionEventArgs) { 21 | defer event.Close() 22 | fmt.Println("Session Stopped (ID=", event.SessionID, ")") 23 | } 24 | 25 | func recognizingHandler(event speech.SpeechRecognitionEventArgs) { 26 | defer event.Close() 27 | fmt.Println("Recognizing:", event.Result.Text) 28 | } 29 | 30 | func recognizedHandler(event speech.SpeechRecognitionEventArgs) { 31 | defer event.Close() 32 | fmt.Println("Recognized:", event.Result.Text) 33 | } 34 | 35 | func cancelledHandler(event speech.SpeechRecognitionCanceledEventArgs) { 36 | defer event.Close() 37 | fmt.Println("Received a cancellation: ", event.ErrorDetails) 38 | } 39 | 40 | func ContinuousFromMicrophone(subscription string, region string, file string) { 41 | audioConfig, err := audio.NewAudioConfigFromDefaultMicrophoneInput() 42 | if err != nil { 43 | fmt.Println("Got an error: ", err) 44 | return 45 | } 46 | defer audioConfig.Close() 47 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 48 | if err != nil { 49 | fmt.Println("Got an error: ", err) 50 | return 51 | } 52 | defer config.Close() 53 | speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) 54 | if err != nil { 55 | fmt.Println("Got an error: ", err) 56 | return 57 | } 58 | defer speechRecognizer.Close() 59 | speechRecognizer.SessionStarted(sessionStartedHandler) 60 | speechRecognizer.SessionStopped(sessionStoppedHandler) 61 | speechRecognizer.Recognizing(recognizingHandler) 62 | speechRecognizer.Recognized(recognizedHandler) 63 | speechRecognizer.Canceled(cancelledHandler) 64 | speechRecognizer.StartContinuousRecognitionAsync() 65 | defer speechRecognizer.StopContinuousRecognitionAsync() 66 | bufio.NewReader(os.Stdin).ReadBytes('\n') 67 | } 68 | -------------------------------------------------------------------------------- /samples/main.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package main demonstrates usages for the speech recognizer and dialog service connector 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | "os" 10 | 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/dialog_service_connector" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/recognizer" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/synthesizer" 14 | ) 15 | 16 | type functionMap = map[string]func(string, string, string) 17 | 18 | func printHelp(executableName string, samples functionMap) { 19 | fmt.Println("Input not valid") 20 | fmt.Println("Usage: ") 21 | fmt.Println(executableName, " ") 22 | fmt.Println("Where sample is of the format :") 23 | fmt.Println("Available samples:") 24 | for id, _ := range samples { 25 | fmt.Println(" -- ", id) 26 | } 27 | } 28 | 29 | func main() { 30 | samples := functionMap{ 31 | "speech_recognizer:RecognizeOnceFromWavFile": recognizer.RecognizeOnceFromWavFile, 32 | "speech_recognizer:RecognizeOnceFromCompressedFile": recognizer.RecognizeOnceFromCompressedFile, 33 | "speech_recognizer:RecognizeOnceFromALAWFile": recognizer.RecognizeOnceFromALAWFile, 34 | "speech_recognizer:ContinuousFromMicrophone": recognizer.ContinuousFromMicrophone, 35 | "speech_recognizer:RecognizeContinuousUsingWrapper": recognizer.RecognizeContinuousUsingWrapper, 36 | "dialog_service_connector:ListenOnce": dialog_service_connector.ListenOnce, 37 | "dialog_service_connector:KWS": dialog_service_connector.KWS, 38 | "dialog_service_connector:ListenOnceFromStream": dialog_service_connector.ListenOnceFromStream, 39 | "speech_synthesizer:SynthesisToSpeaker": synthesizer.SynthesisToSpeaker, 40 | "speech_synthesizer:SynthesisToAudioDataStream": synthesizer.SynthesisToAudioDataStream, 41 | } 42 | args := os.Args[1:] 43 | if len(args) != 4 { 44 | printHelp(os.Args[0], samples) 45 | return 46 | } 47 | subscription := args[0] 48 | region := args[1] 49 | file := args[2] 50 | sample := args[3] 51 | sampleFunction := samples[sample] 52 | if sampleFunction == nil { 53 | printHelp(os.Args[0], samples) 54 | return 55 | } 56 | sampleFunction(subscription, region, file) 57 | } 58 | -------------------------------------------------------------------------------- /dialog/activity_received_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 8 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 9 | ) 10 | 11 | // #include 12 | // #include 13 | // #include 14 | import "C" 15 | import "unsafe" 16 | 17 | type ActivityReceivedEventArgs struct { 18 | handle C.SPXHANDLE 19 | Activity string 20 | } 21 | 22 | // Close releases the underlying resources 23 | func (event ActivityReceivedEventArgs) Close() { 24 | C.dialog_service_connector_activity_received_event_release(event.handle) 25 | } 26 | 27 | // HasAudio checks if the event contains audio 28 | func (event ActivityReceivedEventArgs) HasAudio() bool { 29 | return bool(C.dialog_service_connector_activity_received_event_has_audio(event.handle)) 30 | } 31 | 32 | // GetAudio gets the audio associated with the event. 33 | func (event ActivityReceivedEventArgs) GetAudio() (*audio.PullAudioOutputStream, error) { 34 | var handle C.SPXHANDLE 35 | ret := uintptr(C.dialog_service_connector_activity_received_event_get_audio(event.handle, &handle)) 36 | if ret != C.SPX_NOERROR { 37 | return nil, common.NewCarbonError(ret) 38 | } 39 | return audio.NewPullAudioOutputStreamFromHandle(handle2uintptr(handle)), nil 40 | } 41 | 42 | // NewSpeechRecognitionCanceledEventArgsFromHandle creates the object from the handle (for internal use) 43 | func NewActivityReceivedEventArgsFromHandle(handle common.SPXHandle) (*ActivityReceivedEventArgs, error) { 44 | event := new(ActivityReceivedEventArgs) 45 | event.handle = uintptr2handle(handle) 46 | var size C.size_t 47 | ret := uintptr(C.dialog_service_connector_activity_received_event_get_activity_size(event.handle, &size)) 48 | if ret != C.SPX_NOERROR { 49 | event.Close() 50 | return nil, common.NewCarbonError(ret) 51 | } 52 | actBuffer := C.malloc(C.sizeof_char * (size + 1)) 53 | defer C.free(unsafe.Pointer(actBuffer)) 54 | ret = uintptr(C.dialog_service_connector_activity_received_event_get_activity(event.handle, (*C.char)(actBuffer), size+1)) 55 | if ret != C.SPX_NOERROR { 56 | event.Close() 57 | return nil, common.NewCarbonError(ret) 58 | } 59 | event.Activity = C.GoString((*C.char)(actBuffer)) 60 | return event, nil 61 | } 62 | 63 | type ActivityReceivedEventHandler func(event ActivityReceivedEventArgs) 64 | -------------------------------------------------------------------------------- /samples/dialog_service_connector/from_push_audio_stream.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog_service_connector 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/dialog" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/helpers" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 14 | ) 15 | 16 | func ListenOnceFromStream(subscription string, region string, file string) { 17 | stream, err := audio.CreatePushAudioInputStream() 18 | if err != nil { 19 | fmt.Println("Got an error: ", err) 20 | return 21 | } 22 | defer stream.Close() 23 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 24 | if err != nil { 25 | fmt.Println("Got an error: ", err) 26 | return 27 | } 28 | defer audioConfig.Close() 29 | config, err := dialog.NewBotFrameworkConfigFromSubscription(subscription, region) 30 | if err != nil { 31 | fmt.Println("Got an error: ", err) 32 | return 33 | } 34 | defer config.Close() 35 | connector, err := dialog.NewDialogServiceConnectorFromConfig(config, audioConfig) 36 | if err != nil { 37 | fmt.Println("Got an error: ", err) 38 | return 39 | } 40 | defer connector.Close() 41 | sessionStartedHandler := func(event speech.SessionEventArgs) { 42 | defer event.Close() 43 | fmt.Println("Session Started") 44 | } 45 | sessionStoppedHandler := func(event speech.SessionEventArgs) { 46 | defer event.Close() 47 | fmt.Println("Session Stopped") 48 | } 49 | connector.SessionStarted(sessionStartedHandler) 50 | connector.SessionStopped(sessionStoppedHandler) 51 | activityReceivedHandler := func(event dialog.ActivityReceivedEventArgs) { 52 | defer event.Close() 53 | fmt.Println("Received an activity.") 54 | } 55 | connector.ActivityReceived(activityReceivedHandler) 56 | recognizedHandle := func(event speech.SpeechRecognitionEventArgs) { 57 | defer event.Close() 58 | fmt.Println("Recognized ", event.Result.Text) 59 | } 60 | connector.Recognized(recognizedHandle) 61 | recognizingHandler := func(event speech.SpeechRecognitionEventArgs) { 62 | defer event.Close() 63 | fmt.Println("Recognizing ", event.Result.Text) 64 | } 65 | connector.Recognizing(recognizingHandler) 66 | helpers.PumpFileIntoStream(file, stream) 67 | connector.ListenOnceAsync() 68 | <-time.After(10 * time.Second) 69 | } 70 | -------------------------------------------------------------------------------- /samples/synthesizer/to_audio_data_stream.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package synthesizer 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "io" 10 | "os" 11 | "strings" 12 | "time" 13 | 14 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 15 | ) 16 | 17 | func SynthesisToAudioDataStream(subscription string, region string, file string) { 18 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 19 | if err != nil { 20 | fmt.Println("Got an error: ", err) 21 | return 22 | } 23 | defer config.Close() 24 | speechSynthesizer, err := speech.NewSpeechSynthesizerFromConfig(config, nil) 25 | if err != nil { 26 | fmt.Println("Got an error: ", err) 27 | return 28 | } 29 | defer speechSynthesizer.Close() 30 | 31 | speechSynthesizer.SynthesisStarted(synthesizeStartedHandler) 32 | speechSynthesizer.Synthesizing(synthesizingHandler) 33 | speechSynthesizer.SynthesisCompleted(synthesizedHandler) 34 | speechSynthesizer.SynthesisCanceled(cancelledHandler) 35 | 36 | for { 37 | fmt.Printf("Enter some text that you want to speak, or enter empty text to exit.\n> ") 38 | text, _ := bufio.NewReader(os.Stdin).ReadString('\n') 39 | text = strings.TrimSuffix(text, "\n") 40 | if len(text) == 0 { 41 | break 42 | } 43 | 44 | // StartSpeakingTextAsync sends the result to channel when the synthesis starts. 45 | task := speechSynthesizer.StartSpeakingTextAsync(text) 46 | var outcome speech.SpeechSynthesisOutcome 47 | select { 48 | case outcome = <-task: 49 | case <-time.After(60 * time.Second): 50 | fmt.Println("Timed out") 51 | return 52 | } 53 | defer outcome.Close() 54 | if outcome.Error != nil { 55 | fmt.Println("Got an error: ", outcome.Error) 56 | return 57 | } 58 | 59 | // in most case we want to streaming receive the audio to lower the latency, 60 | // we can use AudioDataStream to do so. 61 | stream, err := speech.NewAudioDataStreamFromSpeechSynthesisResult(outcome.Result) 62 | defer stream.Close() 63 | if err != nil { 64 | fmt.Println("Got an error: ", err) 65 | return 66 | } 67 | 68 | var all_audio []byte 69 | audio_chunk := make([]byte, 2048) 70 | for { 71 | n, err := stream.Read(audio_chunk) 72 | 73 | if err == io.EOF { 74 | break 75 | } 76 | 77 | all_audio = append(all_audio, audio_chunk[:n]...) 78 | } 79 | 80 | fmt.Printf("Read [%d] bytes from audio data stream.\n", len(all_audio)) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /samples/dialog_service_connector/listen_once.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog_service_connector 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/dialog" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/helpers" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 14 | ) 15 | 16 | func ListenOnce(subscription string, region string, file string) { 17 | stream, err := audio.CreatePushAudioInputStream() 18 | if err != nil { 19 | fmt.Println("Got an error: ", err) 20 | return 21 | } 22 | defer stream.Close() 23 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 24 | if err != nil { 25 | fmt.Println("Got an error: ", err) 26 | return 27 | } 28 | defer audioConfig.Close() 29 | config, err := dialog.NewBotFrameworkConfigFromSubscription(subscription, region) 30 | if err != nil { 31 | fmt.Println("Got an error: ", err) 32 | return 33 | } 34 | defer config.Close() 35 | connector, err := dialog.NewDialogServiceConnectorFromConfig(config, audioConfig) 36 | if err != nil { 37 | fmt.Println("Got an error: ", err) 38 | return 39 | } 40 | defer connector.Close() 41 | sessionStartedHandler := func(event speech.SessionEventArgs) { 42 | defer event.Close() 43 | fmt.Println("Session Started") 44 | } 45 | sessionStoppedHandler := func(event speech.SessionEventArgs) { 46 | defer event.Close() 47 | fmt.Println("Session Stopped") 48 | } 49 | connector.SessionStarted(sessionStartedHandler) 50 | connector.SessionStopped(sessionStoppedHandler) 51 | activityReceivedHandler := func(event dialog.ActivityReceivedEventArgs) { 52 | defer event.Close() 53 | fmt.Println("Received an activity.") 54 | } 55 | connector.ActivityReceived(activityReceivedHandler) 56 | recognizedHandle := func(event speech.SpeechRecognitionEventArgs) { 57 | defer event.Close() 58 | fmt.Println("Recognized ", event.Result.Text) 59 | } 60 | connector.Recognized(recognizedHandle) 61 | recognizingHandler := func(event speech.SpeechRecognitionEventArgs) { 62 | defer event.Close() 63 | fmt.Println("Recognizing ", event.Result.Text) 64 | } 65 | connector.Recognizing(recognizingHandler) 66 | helpers.PumpFileIntoStream(file, stream) 67 | connector.ListenOnceAsync() 68 | <-time.After(10 * time.Second) 69 | } 70 | -------------------------------------------------------------------------------- /speech/speech_synthesis_word_boundary_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "time" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | import "C" 16 | 17 | // SpeechSynthesisWordBoundaryEventArgs represents the speech synthesis word boundary event arguments. 18 | type SpeechSynthesisWordBoundaryEventArgs struct { 19 | handle C.SPXHANDLE 20 | 21 | // AudioOffset is the audio offset of the word boundary event, in ticks (100 nanoseconds). 22 | AudioOffset uint64 23 | 24 | // Duration is the duration of the word boundary event. 25 | Duration time.Duration 26 | 27 | // TextOffset is the text offset. 28 | TextOffset uint 29 | 30 | // WordLength is the length of the word. 31 | WordLength uint 32 | 33 | // Text is the text. 34 | Text string 35 | 36 | // BoundaryType is the boundary type. 37 | BoundaryType common.SpeechSynthesisBoundaryType 38 | } 39 | 40 | // Close releases the underlying resources 41 | func (event SpeechSynthesisWordBoundaryEventArgs) Close() { 42 | C.synthesizer_event_handle_release(event.handle) 43 | } 44 | 45 | // NewSpeechSynthesisWordBoundaryEventArgsFromHandle creates the object from the handle (for internal use) 46 | func NewSpeechSynthesisWordBoundaryEventArgsFromHandle(handle common.SPXHandle) (*SpeechSynthesisWordBoundaryEventArgs, error) { 47 | event := new(SpeechSynthesisWordBoundaryEventArgs) 48 | event.handle = uintptr2handle(handle) 49 | var cAudioOffset, cDuration C.uint64_t 50 | var cTextOffset, cWordLength C.uint32_t 51 | var cBoundaryType C.SpeechSynthesis_BoundaryType 52 | ret := uintptr(C.synthesizer_word_boundary_event_get_values(event.handle, &cAudioOffset, &cDuration, &cTextOffset, &cWordLength, &cBoundaryType)) 53 | if ret != C.SPX_NOERROR { 54 | return nil, common.NewCarbonError(ret) 55 | } 56 | event.AudioOffset = uint64(cAudioOffset) 57 | event.Duration = time.Duration(cDuration*100) * time.Nanosecond 58 | event.TextOffset = uint(cTextOffset) 59 | event.WordLength = uint(cWordLength) 60 | event.BoundaryType = (common.SpeechSynthesisBoundaryType)(cBoundaryType) 61 | /* Text */ 62 | value := C.synthesizer_event_get_text(event.handle) 63 | event.Text = C.GoString(value) 64 | C.property_bag_free_string(value) 65 | return event, nil 66 | } 67 | 68 | // SpeechSynthesisWordBoundaryEventHandler is the type of the event handler that receives SpeechSynthesisWordBoundaryEventArgs 69 | type SpeechSynthesisWordBoundaryEventHandler func(event SpeechSynthesisWordBoundaryEventArgs) 70 | -------------------------------------------------------------------------------- /speech/source_language_config.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | import "C" 15 | 16 | // SourceLanguageConfig defines source language configuration. 17 | type SourceLanguageConfig struct { 18 | handle C.SPXHANDLE 19 | properties *common.PropertyCollection 20 | } 21 | 22 | func newSourceLanguageConfigFromHandle(handle C.SPXHANDLE) (*SourceLanguageConfig, error) { 23 | var propBagHandle C.SPXHANDLE 24 | ret := uintptr(C.source_lang_config_get_property_bag(handle, &propBagHandle)) 25 | if ret != C.SPX_NOERROR { 26 | C.source_lang_config_release(handle) 27 | return nil, common.NewCarbonError(ret) 28 | } 29 | config := new(SourceLanguageConfig) 30 | config.handle = handle 31 | config.properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 32 | return config, nil 33 | } 34 | 35 | // NewSourceLanguageConfigFromLanguage creates an instance of the SourceLanguageConfig with source language 36 | func NewSourceLanguageConfigFromLanguage(language string) (*SourceLanguageConfig, error) { 37 | var handle C.SPXHANDLE 38 | languageCStr := C.CString(language) 39 | defer C.free(unsafe.Pointer(languageCStr)) 40 | ret := uintptr(C.source_lang_config_from_language(&handle, languageCStr)) 41 | if ret != C.SPX_NOERROR { 42 | return nil, common.NewCarbonError(ret) 43 | } 44 | return newSourceLanguageConfigFromHandle(handle) 45 | } 46 | 47 | // NewSourceLanguageConfigFromLanguageAndEndpointId creates an instance of the SourceLanguageConfig with source language and custom endpoint id. A custom endpoint id corresponds to custom models. 48 | //nolint:revive 49 | func NewSourceLanguageConfigFromLanguageAndEndpointId(language string, endpointID string) (*SourceLanguageConfig, error) { 50 | var handle C.SPXHANDLE 51 | languageCStr := C.CString(language) 52 | defer C.free(unsafe.Pointer(languageCStr)) 53 | endpointCStr := C.CString(endpointID) 54 | defer C.free(unsafe.Pointer(endpointCStr)) 55 | ret := uintptr(C.source_lang_config_from_language_and_endpointId(&handle, languageCStr, endpointCStr)) 56 | if ret != C.SPX_NOERROR { 57 | return nil, common.NewCarbonError(ret) 58 | } 59 | return newSourceLanguageConfigFromHandle(handle) 60 | } 61 | 62 | func (config SourceLanguageConfig) getHandle() C.SPXHANDLE { 63 | return config.handle 64 | } 65 | 66 | // Close performs cleanup of resources. 67 | func (config SourceLanguageConfig) Close() { 68 | config.properties.Close() 69 | C.source_lang_config_release(config.handle) 70 | } 71 | -------------------------------------------------------------------------------- /common/property_collection.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // #include 7 | // #include 8 | // #include 9 | import "C" 10 | import "unsafe" 11 | 12 | // PropertyCollection is a class to retrieve or set a property value from a property collection. 13 | type PropertyCollection struct { 14 | handle C.SPXHANDLE 15 | } 16 | 17 | // GetProperty returns value of a property. 18 | // If the property value is not defined, the specified default value is returned. 19 | func (properties PropertyCollection) GetProperty(id PropertyID, defaultValue string) string { 20 | defValue := C.CString(defaultValue) 21 | defer C.free(unsafe.Pointer(defValue)) 22 | value := C.property_bag_get_string(properties.handle, (C.int)(id), nil, defValue) 23 | goValue := C.GoString(value) 24 | C.property_bag_free_string(value) 25 | return goValue 26 | } 27 | 28 | // GetPropertyByString returns value of a property. 29 | // If the property value is not defined, the specified default value is returned. 30 | func (properties PropertyCollection) GetPropertyByString(name string, defaultValue string) string { 31 | defValue := C.CString(defaultValue) 32 | defer C.free(unsafe.Pointer(defValue)) 33 | n := C.CString(name) 34 | defer C.free(unsafe.Pointer(n)) 35 | value := C.property_bag_get_string(properties.handle, -1, n, defValue) 36 | goValue := C.GoString(value) 37 | C.property_bag_free_string(value) 38 | return goValue 39 | } 40 | 41 | // SetProperty sets the value of a property. 42 | func (properties PropertyCollection) SetProperty(id PropertyID, value string) error { 43 | v := C.CString(value) 44 | defer C.free(unsafe.Pointer(v)) 45 | ret := uintptr(C.property_bag_set_string(properties.handle, (C.int)(id), nil, v)) 46 | if ret != C.SPX_NOERROR { 47 | return NewCarbonError(ret) 48 | } 49 | return nil 50 | } 51 | 52 | // SetPropertyByString sets the value of a property. 53 | func (properties PropertyCollection) SetPropertyByString(name string, value string) error { 54 | n := C.CString(name) 55 | defer C.free(unsafe.Pointer(n)) 56 | v := C.CString(value) 57 | defer C.free(unsafe.Pointer(v)) 58 | ret := uintptr(C.property_bag_set_string(properties.handle, -1, n, v)) 59 | if ret != C.SPX_NOERROR { 60 | return NewCarbonError(ret) 61 | } 62 | return nil 63 | } 64 | 65 | // Close disposes the associated resources. 66 | func (properties PropertyCollection) Close() { 67 | C.property_bag_release(properties.handle) 68 | } 69 | 70 | // NewPropertyCollectionFromHandle creates a PropertyCollection from a handle (for internal use) 71 | func NewPropertyCollectionFromHandle(handle SPXHandle) *PropertyCollection { 72 | propertyCollection := new(PropertyCollection) 73 | propertyCollection.handle = uintptr2handle(handle) 74 | return propertyCollection 75 | } 76 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets Microsoft's [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)) of a security vulnerability, please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /samples/dialog_service_connector/start_keyword_listening.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog_service_connector 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/dialog" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/helpers" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 14 | ) 15 | 16 | func KWS(subscription string, region string, file string) { 17 | stream, err := audio.CreatePushAudioInputStream() 18 | if err != nil { 19 | fmt.Println("Got an error: ", err) 20 | return 21 | } 22 | defer stream.Close() 23 | model, err := speech.NewKeywordRecognitionModelFromFile("../../test_files/kws.table") 24 | if err != nil { 25 | fmt.Println("Got an error: ", err) 26 | } 27 | defer model.Close() 28 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 29 | if err != nil { 30 | fmt.Println("Got an error: ", err) 31 | return 32 | } 33 | defer audioConfig.Close() 34 | config, err := dialog.NewBotFrameworkConfigFromSubscription(subscription, region) 35 | if err != nil { 36 | fmt.Println("Got an error: ", err) 37 | return 38 | } 39 | defer config.Close() 40 | connector, err := dialog.NewDialogServiceConnectorFromConfig(config, audioConfig) 41 | if err != nil { 42 | fmt.Println("Got an error: ", err) 43 | return 44 | } 45 | defer connector.Close() 46 | sessionStartedHandler := func(event speech.SessionEventArgs) { 47 | defer event.Close() 48 | fmt.Println("Session Started") 49 | } 50 | sessionStoppedHandler := func(event speech.SessionEventArgs) { 51 | defer event.Close() 52 | fmt.Println("Session Stopped") 53 | } 54 | connector.SessionStarted(sessionStartedHandler) 55 | connector.SessionStopped(sessionStoppedHandler) 56 | activityReceivedHandler := func(event dialog.ActivityReceivedEventArgs) { 57 | defer event.Close() 58 | fmt.Println("Received an activity.") 59 | } 60 | connector.ActivityReceived(activityReceivedHandler) 61 | recognizedHandle := func(event speech.SpeechRecognitionEventArgs) { 62 | defer event.Close() 63 | fmt.Println("Recognized ", event.Result.Text) 64 | } 65 | connector.Recognized(recognizedHandle) 66 | recognizingHandler := func(event speech.SpeechRecognitionEventArgs) { 67 | defer event.Close() 68 | fmt.Println("Recognizing ", event.Result.Text) 69 | } 70 | connector.Recognizing(recognizingHandler) 71 | canceledHandler := func(event speech.SpeechRecognitionCanceledEventArgs) { 72 | defer event.Close() 73 | fmt.Println("Canceled ", event.Reason) 74 | fmt.Println("Error code ", event.ErrorCode) 75 | } 76 | connector.Canceled(canceledHandler) 77 | helpers.PumpFileIntoStream(file, stream) 78 | connector.StartKeywordRecognitionAsync(model) 79 | <-time.After(10 * time.Second) 80 | } 81 | -------------------------------------------------------------------------------- /common/result_reason.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // ResultReason specifies the possible reasons a recognition result might be generated. 7 | type ResultReason int 8 | 9 | const ( 10 | // NoMatch indicates speech could not be recognized. More details can be found in the NoMatchDetails object. 11 | NoMatch ResultReason = 0 12 | 13 | // Canceled indicates that the recognition was canceled. More details can be found using the CancellationDetails object. 14 | Canceled ResultReason = 1 15 | 16 | // RecognizingSpeech indicates the speech result contains hypothesis text. 17 | RecognizingSpeech ResultReason = 2 18 | 19 | // RecognizedSpeech indicates the speech result contains final text that has been recognized. 20 | // Speech Recognition is now complete for this phrase. 21 | RecognizedSpeech ResultReason = 3 22 | 23 | // This result reason is deprecated and not used anymore. 24 | RecognizingIntent ResultReason = 4 25 | 26 | // This result reason is deprecated and not used anymore. 27 | RecognizedIntent ResultReason = 5 28 | 29 | // TranslatingSpeech indicates the translation result contains hypothesis text and its translation(s). 30 | TranslatingSpeech ResultReason = 6 31 | 32 | // TranslatedSpeech indicates the translation result contains final text and corresponding translation(s). 33 | // Speech Recognition and Translation are now complete for this phrase. 34 | TranslatedSpeech ResultReason = 7 35 | 36 | // SynthesizingAudio indicates the synthesized audio result contains a non-zero amount of audio data 37 | SynthesizingAudio ResultReason = 8 38 | 39 | // SynthesizingAudioCompleted indicates the synthesized audio is now complete for this phrase. 40 | SynthesizingAudioCompleted ResultReason = 9 41 | 42 | // RecognizingKeyword indicates the speech result contains (unverified) keyword text. 43 | RecognizingKeyword ResultReason = 10 44 | 45 | // RecognizedKeyword indicates that keyword recognition completed recognizing the given keyword. 46 | RecognizedKeyword ResultReason = 11 47 | 48 | // SynthesizingAudioStarted indicates the speech synthesis is now started 49 | SynthesizingAudioStarted ResultReason = 12 50 | 51 | // This result reason is deprecated and not used anymore. 52 | EnrollingVoiceProfile ResultReason = 17 53 | 54 | // This result reason is deprecated and not used anymore. 55 | EnrolledVoiceProfile ResultReason = 18 56 | 57 | // This result reason is deprecated and not used anymore. 58 | RecognizedSpeakers ResultReason = 19 59 | 60 | // This result reason is deprecated and not used anymore. 61 | RecognizedSpeaker ResultReason = 20 62 | 63 | // This result reason is deprecated and not used anymore. 64 | ResetVoiceProfile ResultReason = 21 65 | 66 | // This result reason is deprecated and not used anymore. 67 | DeletedVoiceProfile ResultReason = 22 68 | 69 | // VoicesListRetrieved indicates the voices list has been retrieved successfully. 70 | VoicesListRetrieved ResultReason = 23 71 | ) 72 | 73 | //go:generate stringer -type=ResultReason -output=result_reason_string.go 74 | -------------------------------------------------------------------------------- /samples/synthesizer/to_speaker.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package synthesizer 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "os" 10 | "strings" 11 | "time" 12 | 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 14 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 15 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 16 | ) 17 | 18 | func synthesizeStartedHandler(event speech.SpeechSynthesisEventArgs) { 19 | defer event.Close() 20 | fmt.Println("Synthesis started.") 21 | } 22 | 23 | func synthesizingHandler(event speech.SpeechSynthesisEventArgs) { 24 | defer event.Close() 25 | fmt.Printf("Synthesizing, audio chunk size %d.\n", len(event.Result.AudioData)) 26 | } 27 | 28 | func synthesizedHandler(event speech.SpeechSynthesisEventArgs) { 29 | defer event.Close() 30 | fmt.Printf("Synthesized, audio length %d.\n", len(event.Result.AudioData)) 31 | } 32 | 33 | func cancelledHandler(event speech.SpeechSynthesisEventArgs) { 34 | defer event.Close() 35 | fmt.Println("Received a cancellation.") 36 | } 37 | 38 | func SynthesisToSpeaker(subscription string, region string, file string) { 39 | audioConfig, err := audio.NewAudioConfigFromDefaultSpeakerOutput() 40 | if err != nil { 41 | fmt.Println("Got an error: ", err) 42 | return 43 | } 44 | defer audioConfig.Close() 45 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 46 | if err != nil { 47 | fmt.Println("Got an error: ", err) 48 | return 49 | } 50 | defer config.Close() 51 | speechSynthesizer, err := speech.NewSpeechSynthesizerFromConfig(config, audioConfig) 52 | if err != nil { 53 | fmt.Println("Got an error: ", err) 54 | return 55 | } 56 | defer speechSynthesizer.Close() 57 | 58 | speechSynthesizer.SynthesisStarted(synthesizeStartedHandler) 59 | speechSynthesizer.Synthesizing(synthesizingHandler) 60 | speechSynthesizer.SynthesisCompleted(synthesizedHandler) 61 | speechSynthesizer.SynthesisCanceled(cancelledHandler) 62 | 63 | for { 64 | fmt.Printf("Enter some text that you want to speak, or enter empty text to exit.\n> ") 65 | text, _ := bufio.NewReader(os.Stdin).ReadString('\n') 66 | text = strings.TrimSuffix(text, "\n") 67 | if len(text) == 0 { 68 | break 69 | } 70 | 71 | task := speechSynthesizer.SpeakTextAsync(text) 72 | var outcome speech.SpeechSynthesisOutcome 73 | select { 74 | case outcome = <-task: 75 | case <-time.After(60 * time.Second): 76 | fmt.Println("Timed out") 77 | return 78 | } 79 | defer outcome.Close() 80 | if outcome.Error != nil { 81 | fmt.Println("Got an error: ", outcome.Error) 82 | return 83 | } 84 | 85 | if outcome.Result.Reason == common.SynthesizingAudioCompleted { 86 | fmt.Printf("Speech synthesized to speaker for text [%s].\n", text) 87 | } else { 88 | cancellation, _ := speech.NewCancellationDetailsFromSpeechSynthesisResult(outcome.Result) 89 | fmt.Printf("CANCELED: Reason=%v.\n", cancellation.Reason) 90 | 91 | if cancellation.Reason == common.Error { 92 | fmt.Printf("CANCELED: ErrorCode=%v\nCANCELED: ErrorDetails=[%s]\nCANCELED: Did you update the subscription info?\n", 93 | cancellation.ErrorCode, 94 | cancellation.ErrorDetails) 95 | } 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /speech/translation_callback_helpers.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "sync" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // #include 13 | // 14 | import "C" 15 | 16 | var ( 17 | translationRecognizingCallbacks = make(map[C.SPXHANDLE]TranslationRecognitionEventHandler) 18 | translationRecognizedCallbacks = make(map[C.SPXHANDLE]TranslationRecognitionEventHandler) 19 | translationCanceledCallbacks = make(map[C.SPXHANDLE]TranslationRecognitionCanceledEventHandler) 20 | translationSynthesisCallbacks = make(map[C.SPXHANDLE]TranslationSynthesisEventHandler) 21 | translationCallbacksLock sync.Mutex 22 | ) 23 | 24 | func registerTranslationRecognizingCallback(callback TranslationRecognitionEventHandler, handle C.SPXHANDLE) { 25 | translationCallbacksLock.Lock() 26 | defer translationCallbacksLock.Unlock() 27 | translationRecognizingCallbacks[handle] = callback 28 | } 29 | 30 | func registerTranslationRecognizedCallback(callback TranslationRecognitionEventHandler, handle C.SPXHANDLE) { 31 | translationCallbacksLock.Lock() 32 | defer translationCallbacksLock.Unlock() 33 | translationRecognizedCallbacks[handle] = callback 34 | } 35 | 36 | func registerTranslationCanceledCallback(callback TranslationRecognitionCanceledEventHandler, handle C.SPXHANDLE) { 37 | translationCallbacksLock.Lock() 38 | defer translationCallbacksLock.Unlock() 39 | translationCanceledCallbacks[handle] = callback 40 | } 41 | 42 | func registerTranslationSynthesisCallback(callback TranslationSynthesisEventHandler, handle C.SPXHANDLE) { 43 | translationCallbacksLock.Lock() 44 | defer translationCallbacksLock.Unlock() 45 | translationSynthesisCallbacks[handle] = callback 46 | } 47 | 48 | //export cgoTranslationRecognizing 49 | func cgoTranslationRecognizing(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 50 | translationCallbacksLock.Lock() 51 | callback := translationRecognizingCallbacks[handle] 52 | translationCallbacksLock.Unlock() 53 | if callback != nil { 54 | eventArgs, _ := NewTranslationRecognitionEventArgsFromHandle(handle2uintptr(eventHandle)) 55 | callback(*eventArgs) 56 | } 57 | } 58 | 59 | //export cgoTranslationRecognized 60 | func cgoTranslationRecognized(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 61 | translationCallbacksLock.Lock() 62 | callback := translationRecognizedCallbacks[handle] 63 | translationCallbacksLock.Unlock() 64 | if callback != nil { 65 | eventArgs, _ := NewTranslationRecognitionEventArgsFromHandle(handle2uintptr(eventHandle)) 66 | callback(*eventArgs) 67 | } 68 | } 69 | 70 | //export cgoTranslationCanceled 71 | func cgoTranslationCanceled(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 72 | translationCallbacksLock.Lock() 73 | callback := translationCanceledCallbacks[handle] 74 | translationCallbacksLock.Unlock() 75 | if callback != nil { 76 | eventArgs, _ := NewTranslationRecognitionCanceledEventArgsFromHandle(handle2uintptr(eventHandle)) 77 | callback(*eventArgs) 78 | } 79 | } 80 | 81 | //export cgoTranslationSynthesis 82 | func cgoTranslationSynthesis(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 83 | translationCallbacksLock.Lock() 84 | callback := translationSynthesisCallbacks[handle] 85 | translationCallbacksLock.Unlock() 86 | if callback != nil { 87 | eventArgs, _ := NewTranslationSynthesisEventArgsFromHandle(handle2uintptr(eventHandle)) 88 | callback(*eventArgs) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /diagnostics/diagnostics.go: -------------------------------------------------------------------------------- 1 | package diagnostics 2 | 3 | // #include 4 | // #include 5 | import "C" 6 | import "unsafe" 7 | 8 | // StartMemoryLogging starts logging to memory 9 | func StartMemoryLogging() { 10 | C.diagnostics_log_memory_start_logging() 11 | } 12 | 13 | // StopMemoryLogging stops logging to memory 14 | func StopMemoryLogging() { 15 | C.diagnostics_log_memory_stop_logging() 16 | } 17 | 18 | // SetMemoryLogFilters sets filters for memory logging 19 | func SetMemoryLogFilters(filters string) { 20 | cFilters := C.CString(filters) 21 | defer C.free(unsafe.Pointer(cFilters)) 22 | C.diagnostics_log_memory_set_filters(cFilters) 23 | } 24 | 25 | // GetMemoryLogLineNumOldest gets the line number of the oldest memory log entry 26 | func GetMemoryLogLineNumOldest() uint { 27 | return uint(C.diagnostics_log_memory_get_line_num_oldest()) 28 | } 29 | 30 | // GetMemoryLogLineNumNewest gets the line number of the newest memory log entry 31 | func GetMemoryLogLineNumNewest() uint { 32 | return uint(C.diagnostics_log_memory_get_line_num_newest()) 33 | } 34 | 35 | // GetMemoryLogLine gets a specific line from the memory log 36 | func GetMemoryLogLine(lineNum uint) string { 37 | cLine := C.diagnostics_log_memory_get_line(C.size_t(lineNum)) 38 | if cLine == nil { 39 | return "" 40 | } 41 | return C.GoString(cLine) 42 | 43 | } 44 | 45 | // DumpMemoryLogToStderr dumps the memory log to stderr 46 | func DumpMemoryLogToStderr() error { 47 | ret := uintptr(C.diagnostics_log_memory_dump_to_stderr()) 48 | if ret != 0 { 49 | return newDiagnosticsError("dumpMemoryLogToStderr", ret) 50 | } 51 | return nil 52 | } 53 | 54 | // DumpMemoryLog dumps the memory log to a file and/or standard output 55 | func DumpMemoryLog(filename string, linePrefix string, emitToStdOut bool, emitToStdErr bool) error { 56 | var cFilename *C.char 57 | if filename != "" { 58 | cFilename = C.CString(filename) 59 | defer C.free(unsafe.Pointer(cFilename)) 60 | } 61 | var cLinePrefix *C.char 62 | if linePrefix != "" { 63 | cLinePrefix = C.CString(linePrefix) 64 | defer C.free(unsafe.Pointer(cLinePrefix)) 65 | } 66 | ret := uintptr(C.diagnostics_log_memory_dump(cFilename, cLinePrefix, C.bool(emitToStdOut), C.bool(emitToStdErr))) 67 | if ret != 0 { 68 | return newDiagnosticsError("dumpMemoryLog", ret) 69 | } 70 | return nil 71 | } 72 | 73 | // DumpMemoryLogOnExit dumps the memory log when the program exits 74 | func DumpMemoryLogOnExit(filename string, linePrefix string, emitToStdOut bool, emitToStdErr bool) error { 75 | var cFilename *C.char 76 | if filename != "" { 77 | cFilename = C.CString(filename) 78 | defer C.free(unsafe.Pointer(cFilename)) 79 | } 80 | var cLinePrefix *C.char 81 | if linePrefix != "" { 82 | cLinePrefix = C.CString(linePrefix) 83 | defer C.free(unsafe.Pointer(cLinePrefix)) 84 | } 85 | ret := uintptr(C.diagnostics_log_memory_dump_on_exit(cFilename, cLinePrefix, C.bool(emitToStdOut), C.bool(emitToStdErr))) 86 | if ret != 0 { 87 | return newDiagnosticsError("dumpMemoryLogOnExit", ret) 88 | } 89 | return nil 90 | } 91 | 92 | // StartConsoleLogging starts logging to the console 93 | func StartConsoleLogging(logToStderr bool) { 94 | C.diagnostics_log_console_start_logging(C.bool(logToStderr)) 95 | } 96 | 97 | // StopConsoleLogging stops logging to the console 98 | func StopConsoleLogging() { 99 | C.diagnostics_log_console_stop_logging() 100 | } 101 | 102 | // SetConsoleLogFilters sets filters for console logging 103 | func SetConsoleLogFilters(filters string) { 104 | cFilters := C.CString(filters) 105 | defer C.free(unsafe.Pointer(cFilters)) 106 | C.diagnostics_log_console_set_filters(cFilters) 107 | } 108 | -------------------------------------------------------------------------------- /speech/conversation_callback_helpers.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | // #include 7 | // #include 8 | import "C" 9 | 10 | // ConversationTranscriptionEventHandler is the callback type for conversation transcription events. 11 | type ConversationTranscriptionEventHandler func(event ConversationTranscriptionEventArgs) 12 | 13 | // ConversationTranscriptionCanceledEventHandler is the callback type for conversation transcription canceled events. 14 | type ConversationTranscriptionCanceledEventHandler func(event ConversationTranscriptionCanceledEventArgs) 15 | 16 | var ( 17 | conversationTranscribingCallbacks = make(map[C.SPXHANDLE]ConversationTranscriptionEventHandler) 18 | conversationTranscribedCallbacks = make(map[C.SPXHANDLE]ConversationTranscriptionEventHandler) 19 | conversationCanceledCallbacks = make(map[C.SPXHANDLE]ConversationTranscriptionCanceledEventHandler) 20 | ) 21 | 22 | func registerConversationTranscribingCallback(handler ConversationTranscriptionEventHandler, handle C.SPXHANDLE) { 23 | mu.Lock() 24 | defer mu.Unlock() 25 | conversationTranscribingCallbacks[handle] = handler 26 | } 27 | 28 | func getConversationTranscribingCallback(handle C.SPXHANDLE) ConversationTranscriptionEventHandler { 29 | mu.Lock() 30 | defer mu.Unlock() 31 | return conversationTranscribingCallbacks[handle] 32 | } 33 | 34 | //export conversationTranscriberFireEventTranscribing 35 | func conversationTranscriberFireEventTranscribing(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 36 | handler := getConversationTranscribingCallback(handle) 37 | event, err := NewConversationTranscriptionEventArgsFromHandle(handle2uintptr(eventHandle)) 38 | if err != nil || handler == nil { 39 | C.recognizer_event_handle_release(eventHandle) 40 | return 41 | } 42 | handler(*event) 43 | } 44 | 45 | func registerConversationTranscribedCallback(handler ConversationTranscriptionEventHandler, handle C.SPXHANDLE) { 46 | mu.Lock() 47 | defer mu.Unlock() 48 | conversationTranscribedCallbacks[handle] = handler 49 | } 50 | 51 | func getConversationTranscribedCallback(handle C.SPXHANDLE) ConversationTranscriptionEventHandler { 52 | mu.Lock() 53 | defer mu.Unlock() 54 | return conversationTranscribedCallbacks[handle] 55 | } 56 | 57 | //export conversationTranscriberFireEventTranscribed 58 | func conversationTranscriberFireEventTranscribed(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 59 | handler := getConversationTranscribedCallback(handle) 60 | event, err := NewConversationTranscriptionEventArgsFromHandle(handle2uintptr(eventHandle)) 61 | if err != nil || handler == nil { 62 | C.recognizer_event_handle_release(eventHandle) 63 | return 64 | } 65 | handler(*event) 66 | } 67 | 68 | func registerConversationCanceledCallback(handler ConversationTranscriptionCanceledEventHandler, handle C.SPXHANDLE) { 69 | mu.Lock() 70 | defer mu.Unlock() 71 | conversationCanceledCallbacks[handle] = handler 72 | } 73 | 74 | func getConversationCanceledCallback(handle C.SPXHANDLE) ConversationTranscriptionCanceledEventHandler { 75 | mu.Lock() 76 | defer mu.Unlock() 77 | return conversationCanceledCallbacks[handle] 78 | } 79 | 80 | //export conversationTranscriberFireEventCanceled 81 | func conversationTranscriberFireEventCanceled(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 82 | handler := getConversationCanceledCallback(handle) 83 | event, err := NewConversationTranscriptionCanceledEventArgsFromHandle(handle2uintptr(eventHandle)) 84 | if err != nil || handler == nil { 85 | C.recognizer_event_handle_release(eventHandle) 86 | return 87 | } 88 | handler(*event) 89 | } -------------------------------------------------------------------------------- /speech/voice_info.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "strings" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // 18 | import "C" 19 | 20 | // VoiceInfo contains information about result from voices list of speech synthesizers. 21 | type VoiceInfo struct { 22 | handle C.SPXHANDLE 23 | 24 | // Name specifies the voice name. 25 | Name string 26 | 27 | // Locale specifies the locale of the voice 28 | Locale string 29 | 30 | // ShortName specifies the voice name in short format 31 | ShortName string 32 | 33 | // LocalName specifies the local name of the voice 34 | LocalName string 35 | 36 | // Gender specifies the gender of the voice. 37 | Gender common.SynthesisVoiceGender 38 | 39 | // VoiceType specifies the voice type. 40 | VoiceType common.SynthesisVoiceType 41 | 42 | // StyleList specifies the styles the voice supports. 43 | StyleList []string 44 | 45 | // VoicePath specifies the voice path 46 | VoicePath string 47 | 48 | // Collection of additional properties. 49 | Properties *common.PropertyCollection 50 | } 51 | 52 | // Close releases the underlying resources 53 | func (result VoiceInfo) Close() { 54 | result.Properties.Close() 55 | C.voice_info_handle_release(result.handle) 56 | } 57 | 58 | // NewVoiceInfoFromHandle creates a VoiceInfo from a handle (for internal use) 59 | func NewVoiceInfoFromHandle(handle common.SPXHandle) (*VoiceInfo, error) { 60 | voiceInfo := new(VoiceInfo) 61 | voiceInfo.handle = uintptr2handle(handle) 62 | /* Name */ 63 | value := C.voice_info_get_name(voiceInfo.handle) 64 | voiceInfo.Name = C.GoString(value) 65 | C.property_bag_free_string(value) 66 | /* Locale */ 67 | value = C.voice_info_get_locale(voiceInfo.handle) 68 | voiceInfo.Locale = C.GoString(value) 69 | C.property_bag_free_string(value) 70 | /* ShortName */ 71 | value = C.voice_info_get_short_name(voiceInfo.handle) 72 | voiceInfo.ShortName = C.GoString(value) 73 | C.property_bag_free_string(value) 74 | /* LocalName */ 75 | value = C.voice_info_get_local_name(voiceInfo.handle) 76 | voiceInfo.LocalName = C.GoString(value) 77 | C.property_bag_free_string(value) 78 | /* StyleList */ 79 | value = C.voice_info_get_style_list(voiceInfo.handle) 80 | voiceInfo.StyleList = strings.Split(C.GoString(value), "|") 81 | C.property_bag_free_string(value) 82 | /* VoiceType */ 83 | var cVoiceType C.Synthesis_VoiceType 84 | ret := uintptr(C.voice_info_get_voice_type(voiceInfo.handle, &cVoiceType)) 85 | if ret != C.SPX_NOERROR { 86 | return nil, common.NewCarbonError(ret) 87 | } 88 | voiceInfo.VoiceType = (common.SynthesisVoiceType)(cVoiceType) 89 | /* VoicePath */ 90 | value = C.voice_info_get_voice_path(voiceInfo.handle) 91 | voiceInfo.VoicePath = C.GoString(value) 92 | C.property_bag_free_string(value) 93 | /* Properties */ 94 | var propBagHandle C.SPXHANDLE 95 | ret = uintptr(C.voice_info_get_property_bag(uintptr2handle(handle), &propBagHandle)) 96 | if ret != C.SPX_NOERROR { 97 | return nil, common.NewCarbonError(ret) 98 | } 99 | voiceInfo.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 100 | gender := voiceInfo.Properties.GetPropertyByString("Gender", "") 101 | if gender == "Female" { 102 | voiceInfo.Gender = common.Female 103 | } else if gender == "Male" { 104 | voiceInfo.Gender = common.Male 105 | } else { 106 | voiceInfo.Gender = common.GenderUnknown 107 | } 108 | return voiceInfo, nil 109 | } 110 | -------------------------------------------------------------------------------- /common/error.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | // #include 11 | import "C" 12 | 13 | type CarbonError struct { 14 | Code int 15 | Message string 16 | } 17 | 18 | var errorString = map[int]string{ 19 | 0x000: "SPX_NOERROR", 20 | 0xfff: "SPXERR_NOT_IMPL", 21 | 0x001: "SPXERR_UNINITIALIZED", 22 | 0x002: "SPXERR_ALREADY_INITIALIZED", 23 | 0x003: "SPXERR_UNHANDLED_EXCEPTION", 24 | 0x004: "SPXERR_NOT_FOUND", 25 | 0x005: "SPXERR_INVALID_ARG", 26 | 0x006: "SPXERR_TIMEOUT", 27 | 0x007: "SPXERR_ALREADY_IN_PROGRESS", 28 | 0x008: "SPXERR_FILE_OPEN_FAILED", 29 | 0x009: "SPXERR_UNEXPECTED_EOF", 30 | 0x00a: "SPXERR_INVALID_HEADER", 31 | 0x00b: "SPXERR_AUDIO_IS_PUMPING", 32 | 0x00c: "SPXERR_UNSUPPORTED_FORMAT", 33 | 0x00d: "SPXERR_ABORT", 34 | 0x00e: "SPXERR_MIC_NOT_AVAILABLE", 35 | 0x00f: "SPXERR_INVALID_STATE", 36 | 0x010: "SPXERR_UUID_CREATE_FAILED", 37 | 0x011: "SPXERR_SETFORMAT_UNEXPECTED_STATE_TRANSITION", 38 | 0x012: "SPXERR_PROCESS_AUDIO_INVALID_STATE", 39 | 0x013: "SPXERR_START_RECOGNIZING_INVALID_STATE_TRANSITION", 40 | 0x014: "SPXERR_UNEXPECTED_CREATE_OBJECT_FAILURE", 41 | 0x015: "SPXERR_MIC_ERROR", 42 | 0x016: "SPXERR_NO_AUDIO_INPUT", 43 | 0x017: "SPXERR_UNEXPECTED_USP_SITE_FAILURE", 44 | 0x018: "SPXERR_UNEXPECTED_UNIDEC_SITE_FAILURE", 45 | 0x019: "SPXERR_BUFFER_TOO_SMALL", 46 | 0x01A: "SPXERR_OUT_OF_MEMORY", 47 | 0x01B: "SPXERR_RUNTIME_ERROR", 48 | 0x01C: "SPXERR_INVALID_URL", 49 | 0x01D: "SPXERR_INVALID_REGION", 50 | 0x01E: "SPXERR_SWITCH_MODE_NOT_ALLOWED", 51 | 0x01F: "SPXERR_CHANGE_CONNECTION_STATUS_NOT_ALLOWED", 52 | 0x020: "SPXERR_EXPLICIT_CONNECTION_NOT_SUPPORTED_BY_RECOGNIZER", 53 | 0x021: "SPXERR_INVALID_HANDLE", 54 | 0x022: "SPXERR_INVALID_RECOGNIZER", 55 | 0x023: "SPXERR_OUT_OF_RANGE", 56 | 0x024: "SPXERR_EXTENSION_LIBRARY_NOT_FOUND", 57 | 0x025: "SPXERR_UNEXPECTED_TTS_ENGINE_SITE_FAILURE", 58 | 0x026: "SPXERR_UNEXPECTED_AUDIO_OUTPUT_FAILURE", 59 | 0x027: "SPXERR_GSTREAMER_INTERNAL_ERROR", 60 | 0x028: "SPXERR_CONTAINER_FORMAT_NOT_SUPPORTED_ERROR", 61 | 0x029: "SPXERR_GSTREAMER_NOT_FOUND_ERROR", 62 | 0x02A: "SPXERR_INVALID_LANGUAGE", 63 | 0x02B: "SPXERR_UNSUPPORTED_API_ERROR", 64 | 0x02C: "SPXERR_RINGBUFFER_DATA_UNAVAILABLE", 65 | 0x030: "SPXERR_UNEXPECTED_CONVERSATION_SITE_FAILURE", 66 | 0x031: "SPXERR_UNEXPECTED_CONVERSATION_TRANSLATOR_SITE_FAILURE", 67 | 0x032: "SPXERR_CANCELED", 68 | } 69 | 70 | func NewCarbonError(errorHandle uintptr) CarbonError { 71 | var carbonError CarbonError 72 | carbonError.Code = getErrorCode(SPXHandle(errorHandle)) 73 | carbonError.Message = getErrorMessage(SPXHandle(errorHandle)) 74 | // When the message is empty, construct the error message using the errorHandle value directly. 75 | if carbonError.Message == "" { 76 | codeAsHexString := fmt.Sprintf("0x%0x", carbonError.Code) 77 | carbonError.Message = "Exception with an error code: " + codeAsHexString + " (" + errorString[carbonError.Code] + ")" 78 | } 79 | return carbonError 80 | } 81 | 82 | func (e CarbonError) Error() string { 83 | return e.Message 84 | } 85 | 86 | func getErrorCode(errorHandle SPXHandle) int { 87 | ret := int(C.error_get_error_code(uintptr2handle(errorHandle))) 88 | // A 0 means there was no corresponding event stored. 89 | // So this must be a SPX_* error and not a stored exception. 90 | // Return the HR as the error. 91 | if ret == 0 { 92 | return int(errorHandle) 93 | } 94 | return ret 95 | } 96 | 97 | func getErrorMessage(errorHandle SPXHandle) string { 98 | message := "" 99 | ret := C.error_get_message(uintptr2handle(errorHandle)) 100 | if ret != nil { 101 | message = C.GoString(ret) 102 | } 103 | return message 104 | } 105 | -------------------------------------------------------------------------------- /speech/speech_recognition_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "time" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | // 19 | import "C" 20 | 21 | // SpeechRecognitionResult contains detailed information about result of a recognition operation. 22 | type SpeechRecognitionResult struct { 23 | handle C.SPXHANDLE 24 | 25 | // ResultID specifies the result identifier. 26 | ResultID string 27 | 28 | // Reason specifies status of speech recognition result. 29 | Reason common.ResultReason 30 | 31 | // Text presents the recognized text in the result. 32 | Text string 33 | 34 | // Duration of the recognized speech. 35 | Duration time.Duration 36 | 37 | // Offset of the recognized speech in ticks. 38 | Offset time.Duration 39 | 40 | // Collection of additional RecognitionResult properties. 41 | Properties *common.PropertyCollection 42 | } 43 | 44 | // Close releases the underlying resources 45 | func (result SpeechRecognitionResult) Close() { 46 | result.Properties.Close() 47 | C.recognizer_result_handle_release(result.handle) 48 | } 49 | 50 | // NewSpeechRecognitionResultFromHandle creates a SpeechRecognitionResult from a handle (for internal use) 51 | func NewSpeechRecognitionResultFromHandle(handle common.SPXHandle) (*SpeechRecognitionResult, error) { 52 | buffer := C.malloc(C.sizeof_char * 1024) 53 | defer C.free(unsafe.Pointer(buffer)) 54 | result := new(SpeechRecognitionResult) 55 | result.handle = uintptr2handle(handle) 56 | /* ResultID */ 57 | ret := uintptr(C.result_get_result_id(result.handle, (*C.char)(buffer), 1024)) 58 | if ret != C.SPX_NOERROR { 59 | return nil, common.NewCarbonError(ret) 60 | } 61 | result.ResultID = C.GoString((*C.char)(buffer)) 62 | /* Reason */ 63 | var cReason C.Result_Reason 64 | ret = uintptr(C.result_get_reason(result.handle, &cReason)) 65 | if ret != C.SPX_NOERROR { 66 | return nil, common.NewCarbonError(ret) 67 | } 68 | result.Reason = (common.ResultReason)(cReason) 69 | /* Text */ 70 | ret = uintptr(C.result_get_text(result.handle, (*C.char)(buffer), 1024)) 71 | if ret != C.SPX_NOERROR { 72 | return nil, common.NewCarbonError(ret) 73 | } 74 | result.Text = C.GoString((*C.char)(buffer)) 75 | /* Duration */ 76 | var cDuration C.uint64_t 77 | ret = uintptr(C.result_get_duration(result.handle, &cDuration)) 78 | if ret != C.SPX_NOERROR { 79 | return nil, common.NewCarbonError(ret) 80 | } 81 | result.Duration = time.Nanosecond * time.Duration(100*cDuration) 82 | /* Offset */ 83 | var cOffset C.uint64_t 84 | ret = uintptr(C.result_get_offset(result.handle, &cOffset)) 85 | if ret != C.SPX_NOERROR { 86 | return nil, common.NewCarbonError(ret) 87 | } 88 | result.Offset = time.Nanosecond * time.Duration(100*cOffset) 89 | /* Properties */ 90 | var propBagHandle C.SPXHANDLE 91 | ret = uintptr(C.result_get_property_bag(uintptr2handle(handle), &propBagHandle)) 92 | if ret != C.SPX_NOERROR { 93 | return nil, common.NewCarbonError(ret) 94 | } 95 | result.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 96 | return result, nil 97 | } 98 | 99 | // SpeechRecognitionOutcome is a wrapper type to be returned by operations returning SpeechRecognitionResult and error 100 | type SpeechRecognitionOutcome struct { 101 | common.OperationOutcome 102 | 103 | // Result is the result of the operation 104 | Result *SpeechRecognitionResult 105 | } 106 | 107 | // Close releases the underlying resources 108 | func (outcome SpeechRecognitionOutcome) Close() { 109 | if outcome.Result != nil { 110 | outcome.Result.Close() 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /audio/audio_stream_format.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // #include 13 | // #include 14 | import "C" 15 | 16 | // AudioStreamFormat represents the audio stream format used for custom audio input configurations. 17 | // Updated in version 1.5.0. 18 | type AudioStreamFormat struct { 19 | handle C.SPXHANDLE 20 | } 21 | 22 | // GetDefaultInputFormat creates an audio stream format object representing the default audio stream format 23 | // (16 kHz, 16 bit, mono PCM). 24 | func GetDefaultInputFormat() (*AudioStreamFormat, error) { 25 | var handle C.SPXHANDLE 26 | ret := uintptr(C.audio_stream_format_create_from_default_input(&handle)) 27 | if ret != C.SPX_NOERROR { 28 | return nil, common.NewCarbonError(ret) 29 | } 30 | format := new(AudioStreamFormat) 31 | format.handle = handle 32 | return format, nil 33 | } 34 | 35 | // GetWaveFormat creates an audio stream format object with the specified waveformat characteristics. 36 | func GetWaveFormat(samplesPerSecond uint32, bitsPerSample uint8, channels uint8, waveFormat AudioStreamWaveFormat) (*AudioStreamFormat, error) { 37 | var handle C.SPXHANDLE 38 | ret := uintptr(C.audio_stream_format_create_from_waveformat( 39 | &handle, 40 | (C.uint32_t)(samplesPerSecond), 41 | (C.uint8_t)(bitsPerSample), 42 | (C.uint8_t)(channels), 43 | (C.Audio_Stream_Wave_Format)(waveFormat))) 44 | if ret != C.SPX_NOERROR { 45 | return nil, common.NewCarbonError(ret) 46 | } 47 | format := new(AudioStreamFormat) 48 | format.handle = handle 49 | return format, nil 50 | } 51 | 52 | // GetWaveFormatPCM creates an audio stream format object with the specified PCM waveformat characteristics. 53 | // Note: Currently, only WAV / PCM with 16-bit samples, 16 kHz sample rate, and a single channel (Mono) is supported. When 54 | // used with Conversation Transcription, eight channels are supported. 55 | func GetWaveFormatPCM(samplesPerSecond uint32, bitsPerSample uint8, channels uint8) (*AudioStreamFormat, error) { 56 | var handle C.SPXHANDLE 57 | ret := uintptr(C.audio_stream_format_create_from_waveformat_pcm( 58 | &handle, 59 | (C.uint32_t)(samplesPerSecond), 60 | (C.uint8_t)(bitsPerSample), 61 | (C.uint8_t)(channels))) 62 | if ret != C.SPX_NOERROR { 63 | return nil, common.NewCarbonError(ret) 64 | } 65 | format := new(AudioStreamFormat) 66 | format.handle = handle 67 | return format, nil 68 | } 69 | 70 | // GetDefaultOutputFormat creates an audio stream format object representing the default audio stream format 71 | // (16 kHz, 16 bit, mono PCM). 72 | func GetDefaultOutputFormat() (*AudioStreamFormat, error) { 73 | var handle C.SPXHANDLE 74 | ret := uintptr(C.audio_stream_format_create_from_default_output(&handle)) 75 | if ret != C.SPX_NOERROR { 76 | return nil, common.NewCarbonError(ret) 77 | } 78 | format := new(AudioStreamFormat) 79 | format.handle = handle 80 | return format, nil 81 | } 82 | 83 | // GetCompressedFormat creates an audio stream format object with the specified compressed audio container format, to be 84 | // used as input format. 85 | func GetCompressedFormat(compressedFormat AudioStreamContainerFormat) (*AudioStreamFormat, error) { 86 | var handle C.SPXHANDLE 87 | ret := uintptr(C.audio_stream_format_create_from_compressed_format(&handle, (C.Audio_Stream_Container_Format)(compressedFormat))) 88 | if ret != C.SPX_NOERROR { 89 | return nil, common.NewCarbonError(ret) 90 | } 91 | format := new(AudioStreamFormat) 92 | format.handle = handle 93 | return format, nil 94 | } 95 | 96 | // Close disposes the associated resources. 97 | func (format *AudioStreamFormat) Close() { 98 | C.audio_stream_format_release(format.handle) 99 | } 100 | -------------------------------------------------------------------------------- /speech/auto_detect_source_language_config.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "strings" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | import "C" 16 | 17 | // AutoDetectSourceLanguageConfig defines auto detection source configuration 18 | type AutoDetectSourceLanguageConfig struct { 19 | handle C.SPXHANDLE 20 | properties *common.PropertyCollection 21 | } 22 | 23 | func newAutoDetectSourceLanguageConfigFromHandle(handle C.SPXHANDLE) (*AutoDetectSourceLanguageConfig, error) { 24 | var propBagHandle C.SPXHANDLE 25 | ret := uintptr(C.auto_detect_source_lang_config_get_property_bag(handle, &propBagHandle)) 26 | if ret != C.SPX_NOERROR { 27 | C.auto_detect_source_lang_config_release(handle) 28 | return nil, common.NewCarbonError(ret) 29 | } 30 | config := new(AutoDetectSourceLanguageConfig) 31 | config.handle = handle 32 | config.properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 33 | return config, nil 34 | } 35 | 36 | // NewAutoDetectSourceLanguageConfigFromOpenRange creates an instance of the AutoDetectSourceLanguageConfig with open range as source languages 37 | func NewAutoDetectSourceLanguageConfigFromOpenRange() (*AutoDetectSourceLanguageConfig, error) { 38 | var handle C.SPXHANDLE 39 | ret := uintptr(C.create_auto_detect_source_lang_config_from_open_range(&handle)) 40 | if ret != C.SPX_NOERROR { 41 | return nil, common.NewCarbonError(ret) 42 | } 43 | return newAutoDetectSourceLanguageConfigFromHandle(handle) 44 | } 45 | 46 | // NewAutoDetectSourceLanguageConfigFromLanguages creates an instance of the AutoDetectSourceLanguageConfig with source languages 47 | func NewAutoDetectSourceLanguageConfigFromLanguages(languages []string) (*AutoDetectSourceLanguageConfig, error) { 48 | var handle C.SPXHANDLE 49 | languageStr := strings.Join(languages, ",") 50 | languageCStr := C.CString(languageStr) 51 | defer C.free(unsafe.Pointer(languageCStr)) 52 | ret := uintptr(C.create_auto_detect_source_lang_config_from_languages(&handle, languageCStr)) 53 | if ret != C.SPX_NOERROR { 54 | return nil, common.NewCarbonError(ret) 55 | } 56 | return newAutoDetectSourceLanguageConfigFromHandle(handle) 57 | } 58 | 59 | // NewAutoDetectSourceLanguageConfigFromLanguageConfigs creates an instance of the AutoDetectSourceLanguageConfig with a list of source language config 60 | func NewAutoDetectSourceLanguageConfigFromLanguageConfigs(configs []*SourceLanguageConfig) (*AutoDetectSourceLanguageConfig, error) { 61 | if len(configs) == 0 { 62 | return nil, common.NewCarbonError(C.SPXERR_INVALID_ARG) 63 | } 64 | var handle C.SPXHANDLE 65 | var ret uintptr 66 | first := true 67 | for i := 0; i < len(configs); i++ { 68 | c := configs[i] 69 | if c == nil { 70 | if !first { 71 | C.auto_detect_source_lang_config_release(handle) 72 | } 73 | return nil, common.NewCarbonError(C.SPXERR_INVALID_ARG) 74 | } 75 | if first { 76 | ret = uintptr(C.create_auto_detect_source_lang_config_from_source_lang_config(&handle, c.getHandle())) 77 | first = false 78 | if ret != C.SPX_NOERROR { 79 | return nil, common.NewCarbonError(ret) 80 | } 81 | } else { 82 | ret = uintptr(C.add_source_lang_config_to_auto_detect_source_lang_config(handle, c.getHandle())) 83 | if ret != C.SPX_NOERROR { 84 | return nil, common.NewCarbonError(ret) 85 | } 86 | } 87 | } 88 | return newAutoDetectSourceLanguageConfigFromHandle(handle) 89 | } 90 | 91 | func (config AutoDetectSourceLanguageConfig) getHandle() C.SPXHANDLE { 92 | return config.handle 93 | } 94 | 95 | // Close performs cleanup of resources. 96 | func (config AutoDetectSourceLanguageConfig) Close() { 97 | config.properties.Close() 98 | C.auto_detect_source_lang_config_release(config.handle) 99 | } 100 | -------------------------------------------------------------------------------- /speech/synthesis_voices_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // 18 | import "C" 19 | 20 | // SynthesisVoicesResult contains information about result from voices list of speech synthesizers. 21 | type SynthesisVoicesResult struct { 22 | handle C.SPXHANDLE 23 | 24 | // Voices specifies all voices retrieved 25 | Voices []*VoiceInfo 26 | 27 | // ResultID specifies the result identifier. 28 | ResultID string 29 | 30 | // Reason specifies status of speech synthesis result. 31 | Reason common.ResultReason 32 | 33 | // ErrorDetails presents error details. 34 | ErrorDetails string 35 | 36 | // Collection of additional properties. 37 | Properties *common.PropertyCollection 38 | } 39 | 40 | // Close releases the underlying resources 41 | func (result SynthesisVoicesResult) Close() { 42 | for _, voice := range result.Voices { 43 | voice.Close() 44 | } 45 | result.Properties.Close() 46 | C.synthesizer_result_handle_release(result.handle) 47 | } 48 | 49 | // NewSynthesisVoicesResultFromHandle creates a SynthesisVoicesResult from a handle (for internal use) 50 | func NewSynthesisVoicesResultFromHandle(handle common.SPXHandle) (*SynthesisVoicesResult, error) { 51 | result := new(SynthesisVoicesResult) 52 | result.handle = uintptr2handle(handle) 53 | buffer := C.malloc(C.sizeof_char * 1024) 54 | defer C.free(unsafe.Pointer(buffer)) 55 | /* ResultID */ 56 | ret := uintptr(C.synthesis_voices_result_get_result_id(result.handle, (*C.char)(buffer), 1024)) 57 | if ret != C.SPX_NOERROR { 58 | return nil, common.NewCarbonError(ret) 59 | } 60 | result.ResultID = C.GoString((*C.char)(buffer)) 61 | /* Reason */ 62 | var cReason C.Result_Reason 63 | ret = uintptr(C.synthesis_voices_result_get_reason(result.handle, &cReason)) 64 | if ret != C.SPX_NOERROR { 65 | return nil, common.NewCarbonError(ret) 66 | } 67 | result.Reason = (common.ResultReason)(cReason) 68 | /* Properties */ 69 | var propBagHandle C.SPXHANDLE 70 | ret = uintptr(C.synthesis_voices_result_get_property_bag(result.handle, &propBagHandle)) 71 | if ret != C.SPX_NOERROR { 72 | return nil, common.NewCarbonError(ret) 73 | } 74 | result.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 75 | result.ErrorDetails = result.Properties.GetProperty(common.CancellationDetailsReasonDetailedText, "") 76 | /* Voices */ 77 | var voiceNum C.uint32_t 78 | ret = uintptr(C.synthesis_voices_result_get_voice_num(result.handle, &voiceNum)) 79 | if ret != C.SPX_NOERROR { 80 | return nil, common.NewCarbonError(ret) 81 | } 82 | voices := make([]*VoiceInfo, voiceNum) 83 | var voice *VoiceInfo 84 | var hVoice C.SPXRESULTHANDLE 85 | var err error 86 | for i := 0; i < int(voiceNum); i++ { 87 | ret = uintptr(C.synthesis_voices_result_get_voice_info(result.handle, (C.uint32_t)(i), &hVoice)) 88 | if ret != C.SPX_NOERROR { 89 | return nil, common.NewCarbonError(ret) 90 | } 91 | voice, err = NewVoiceInfoFromHandle(handle2uintptr(hVoice)) 92 | if err != nil { 93 | return nil, err 94 | } 95 | voices[i] = voice 96 | } 97 | result.Voices = voices 98 | return result, nil 99 | } 100 | 101 | // SpeechSynthesisVoicesOutcome is a wrapper type to be returned by operations returning SynthesisVoicesResult and error 102 | type SpeechSynthesisVoicesOutcome struct { 103 | common.OperationOutcome 104 | 105 | // Result is the result of the operation 106 | Result *SynthesisVoicesResult 107 | } 108 | 109 | // Close releases the underlying resources 110 | func (outcome SpeechSynthesisVoicesOutcome) Close() { 111 | if outcome.Result != nil { 112 | outcome.Result.Close() 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /speech/speech_synthesis_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "time" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | // 19 | import "C" 20 | 21 | // SpeechSynthesisResult contains detailed information about result of a synthesis operation. 22 | type SpeechSynthesisResult struct { 23 | handle C.SPXHANDLE 24 | 25 | // ResultID specifies the result identifier. 26 | ResultID string 27 | 28 | // Reason specifies status of speech synthesis result. 29 | Reason common.ResultReason 30 | 31 | // AudioData presents the synthesized audio. 32 | AudioData []byte 33 | 34 | // AudioDuration presents the time duration of synthesized audio. 35 | AudioDuration time.Duration 36 | 37 | // Collection of additional synthesisResult properties. 38 | Properties *common.PropertyCollection 39 | } 40 | 41 | // Close releases the underlying resources 42 | func (result *SpeechSynthesisResult) Close() { 43 | result.Properties.Close() 44 | if result.handle != C.SPXHANDLE_INVALID { 45 | C.synthesizer_result_handle_release(result.handle) 46 | result.handle = C.SPXHANDLE_INVALID 47 | } 48 | } 49 | 50 | // NewSpeechSynthesisResultFromHandle creates a SpeechSynthesisResult from a handle (for internal use) 51 | func NewSpeechSynthesisResultFromHandle(handle common.SPXHandle) (*SpeechSynthesisResult, error) { 52 | 53 | result := new(SpeechSynthesisResult) 54 | result.handle = uintptr2handle(handle) 55 | /* AudioData length and duration */ 56 | var cAudioLength C.uint32_t 57 | var cAudioDuration C.uint64_t 58 | ret := uintptr(C.synth_result_get_audio_length_duration(result.handle, &cAudioLength, &cAudioDuration)) 59 | if ret != C.SPX_NOERROR { 60 | return nil, common.NewCarbonError(ret) 61 | } 62 | result.AudioDuration = time.Duration(cAudioDuration) * time.Millisecond 63 | // using max(1024, cAudioLength) as buffer size 64 | if cAudioLength < 1024 { 65 | cAudioLength = 1024 66 | } 67 | buffer := C.malloc(C.sizeof_char * (C.size_t)(cAudioLength)) 68 | defer C.free(unsafe.Pointer(buffer)) 69 | /* ResultID */ 70 | ret = uintptr(C.synth_result_get_result_id(result.handle, (*C.char)(buffer), 1024)) 71 | if ret != C.SPX_NOERROR { 72 | return nil, common.NewCarbonError(ret) 73 | } 74 | result.ResultID = C.GoString((*C.char)(buffer)) 75 | /* Reason */ 76 | var cReason C.Result_Reason 77 | ret = uintptr(C.synth_result_get_reason(result.handle, &cReason)) 78 | if ret != C.SPX_NOERROR { 79 | return nil, common.NewCarbonError(ret) 80 | } 81 | result.Reason = (common.ResultReason)(cReason) 82 | /* AudioData */ 83 | var outSize C.uint32_t 84 | ret = uintptr(C.synth_result_get_audio_data(result.handle, (*C.uint8_t)(buffer), cAudioLength, &outSize)) 85 | if ret != C.SPX_NOERROR { 86 | return nil, common.NewCarbonError(ret) 87 | } 88 | result.AudioData = C.GoBytes(buffer, (C.int)(outSize)) 89 | /* Properties */ 90 | var propBagHandle C.SPXHANDLE 91 | ret = uintptr(C.synth_result_get_property_bag(uintptr2handle(handle), &propBagHandle)) 92 | if ret != C.SPX_NOERROR { 93 | return nil, common.NewCarbonError(ret) 94 | } 95 | result.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 96 | return result, nil 97 | } 98 | 99 | // SpeechSynthesisOutcome is a wrapper type to be returned by operations returning SpeechSynthesisResult and error 100 | type SpeechSynthesisOutcome struct { 101 | common.OperationOutcome 102 | 103 | // Result is the result of the operation 104 | Result *SpeechSynthesisResult 105 | } 106 | 107 | // Close releases the underlying resources 108 | func (outcome SpeechSynthesisOutcome) Close() { 109 | if outcome.Result != nil { 110 | outcome.Result.Close() 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /speech/conversation_transcription_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // #include 13 | // #include 14 | import "C" 15 | 16 | // ConversationTranscriptionEventArgs is used for conversation transcription events. 17 | type ConversationTranscriptionEventArgs struct { 18 | RecognitionEventArgs // Inherit from RecognitionEventArgs for consistency 19 | handle C.SPXHANDLE 20 | Result ConversationTranscriptionResult // Direct field instead of pointer 21 | } 22 | 23 | // NewConversationTranscriptionEventArgsFromHandle creates a ConversationTranscriptionEventArgs from an event handle 24 | func NewConversationTranscriptionEventArgsFromHandle(handle common.SPXHandle) (*ConversationTranscriptionEventArgs, error) { 25 | // Create the base RecognitionEventArgs first 26 | base, err := NewRecognitionEventArgsFromHandle(handle) 27 | if err != nil { 28 | return nil, err 29 | } 30 | 31 | event := new(ConversationTranscriptionEventArgs) 32 | event.RecognitionEventArgs = *base 33 | event.handle = uintptr2handle(handle) 34 | 35 | // Get the result handle 36 | var resultHandle C.SPXHANDLE 37 | ret := uintptr(C.recognizer_recognition_event_get_result(event.handle, &resultHandle)) 38 | if ret != C.SPX_NOERROR { 39 | return nil, common.NewCarbonError(ret) 40 | } 41 | 42 | // Create the result 43 | result, err := NewConversationTranscriptionResultFromHandle(handle2uintptr(resultHandle)) 44 | if err != nil { 45 | return nil, err 46 | } 47 | 48 | event.Result = *result 49 | return event, nil 50 | } 51 | 52 | // Close releases the underlying resources 53 | func (event ConversationTranscriptionEventArgs) Close() { 54 | event.RecognitionEventArgs.Close() 55 | event.Result.Close() 56 | } 57 | 58 | // ConversationTranscriptionEventHandler is the type of the event handler that receives ConversationTranscriptionEventArgs 59 | // type ConversationTranscriptionEventHandler func(event ConversationTranscriptionEventArgs) 60 | 61 | // ConversationTranscriptionCanceledEventArgs is used for conversation transcription canceled events. 62 | type ConversationTranscriptionCanceledEventArgs struct { 63 | ConversationTranscriptionEventArgs 64 | Reason common.CancellationReason // Direct field instead of nested object 65 | ErrorCode common.CancellationErrorCode // Direct field instead of nested object 66 | ErrorDetails string // Direct field instead of nested object 67 | } 68 | 69 | // NewConversationTranscriptionCanceledEventArgsFromHandle creates a ConversationTranscriptionCanceledEventArgs from an event handle 70 | func NewConversationTranscriptionCanceledEventArgsFromHandle(handle common.SPXHandle) (*ConversationTranscriptionCanceledEventArgs, error) { 71 | baseArgs, err := NewConversationTranscriptionEventArgsFromHandle(handle) 72 | if err != nil { 73 | return nil, err 74 | } 75 | 76 | event := new(ConversationTranscriptionCanceledEventArgs) 77 | event.ConversationTranscriptionEventArgs = *baseArgs 78 | 79 | /* Reason */ 80 | var cReason C.Result_CancellationReason 81 | ret := uintptr(C.result_get_reason_canceled(event.Result.handle, &cReason)) 82 | if ret != C.SPX_NOERROR { 83 | event.Close() 84 | return nil, common.NewCarbonError(ret) 85 | } 86 | event.Reason = (common.CancellationReason)(cReason) 87 | 88 | /* ErrorCode */ 89 | var cCode C.Result_CancellationErrorCode 90 | ret = uintptr(C.result_get_canceled_error_code(event.Result.handle, &cCode)) 91 | if ret != C.SPX_NOERROR { 92 | event.Close() 93 | return nil, common.NewCarbonError(ret) 94 | } 95 | event.ErrorCode = (common.CancellationErrorCode)(cCode) 96 | event.ErrorDetails = event.Result.Properties.GetProperty(common.SpeechServiceResponseJSONErrorDetails, "") 97 | 98 | return event, nil 99 | } 100 | 101 | // Close releases the associated resources. 102 | func (event ConversationTranscriptionCanceledEventArgs) Close() { 103 | event.ConversationTranscriptionEventArgs.Close() 104 | } 105 | 106 | // ConversationTranscriptionCanceledEventHandler is the type of the event handler that receives ConversationTranscriptionCanceledEventArgs 107 | //type ConversationTranscriptionCanceledEventHandler func(event ConversationTranscriptionCanceledEventArgs) -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - main 7 | workflow_dispatch: 8 | 9 | env: 10 | CARBON_VERSION: "1.43.0" 11 | 12 | jobs: 13 | linux: 14 | name: Build (Linux) 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: [ubuntu-20.04, ubuntu-22.04] 19 | go: [1.13, 1.17, 1.18] 20 | steps: 21 | - name: Set up Go ${{ matrix.go }} 22 | uses: actions/setup-go@v3 23 | with: 24 | go-version: ${{ matrix.go }} 25 | id: go 26 | - name: Check out code into the Go module directory 27 | uses: actions/checkout@v3 28 | 29 | - name: Get dependencies 30 | run: | 31 | go get -v -t -d ./... 32 | if [ -f Gopkg.toml ]; then 33 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 34 | dep ensure 35 | fi 36 | 37 | - name: Get Speech SDK (Linux) 38 | run: | 39 | mkdir $HOME/carbon 40 | pushd $HOME/carbon 41 | wget https://csspeechstorage.blob.core.windows.net/drop/$CARBON_VERSION/SpeechSDK-Linux-$CARBON_VERSION.tar.gz 42 | tar xzf SpeechSDK-Linux-$CARBON_VERSION.tar.gz 43 | rm SpeechSDK-Linux-$CARBON_VERSION.tar.gz 44 | ln -s SpeechSDK-Linux-$CARBON_VERSION current 45 | popd 46 | - name: Get Carbon Dependencies 47 | run: | 48 | sudo apt-get update 49 | sudo apt-get install libasound2 50 | sudo apt-get install -y libgstreamer1.0-0 gstreamer1.0-plugins-good 51 | dpkg -l | grep gstreamer 52 | - name: Build 53 | run: | 54 | export CGO_CFLAGS="-I$HOME/carbon/current/include/c_api" 55 | export CGO_LDFLAGS="-L$HOME/carbon/current/lib/x64 -lMicrosoft.CognitiveServices.Speech.core" 56 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HOME/carbon/current/lib/x64" 57 | go build -v ./... 58 | - name: Test 59 | env: 60 | DIALOG_SUBSCRIPTION_KEY: ${{ secrets.DIALOG_SUBSCRIPTION_KEY }} 61 | DIALOG_SUBSCRIPTION_REGION: ${{ secrets.DIALOG_SUBSCRIPTION_REGION }} 62 | SPEECH_SUBSCRIPTION_KEY: ${{ secrets.SR_SUBSCRIPTION_KEY }} 63 | SPEECH_SUBSCRIPTION_REGION: ${{ secrets.SR_SUBSCRIPTION_REGION }} 64 | run: | 65 | export CGO_CFLAGS="-I$HOME/carbon/current/include/c_api" 66 | export CGO_LDFLAGS="-L$HOME/carbon/current/lib/x64 -lMicrosoft.CognitiveServices.Speech.core" 67 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HOME/carbon/current/lib/x64" 68 | go test -v ./... 69 | - name: 'Upload Test Logs' 70 | uses: actions/upload-artifact@v4 71 | if: always() 72 | with: 73 | name: Log-${{ matrix.os }}-${{ matrix.go }} 74 | path: '**/*.log' 75 | retention-days: 3 76 | 77 | macos: 78 | name: Build (macOS) 79 | runs-on: ${{ matrix.os }} 80 | strategy: 81 | matrix: 82 | os: [macos-10.15, macos-11.0] 83 | steps: 84 | - name: Install modern bash for macOS 85 | run: | 86 | export HOMEBREW_NO_INSTALL_CLEANUP=1 87 | brew config && brew install bash 88 | - name: Check out code into the Go module directory 89 | uses: actions/checkout@v3 90 | 91 | - name: Get dependencies 92 | run: | 93 | go get -v -t -d ./... 94 | if [ -f Gopkg.toml ]; then 95 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 96 | dep ensure 97 | fi 98 | 99 | - name: Get Speech SDK (macOS) 100 | run: | 101 | cd ~ 102 | wget https://csspeechstorage.blob.core.windows.net/drop/$CARBON_VERSION/MicrosoftCognitiveServicesSpeech-XCFramework-$CARBON_VERSION.zip -O SpeechSDK.zip 103 | unzip SpeechSDK.zip -d speechsdk 104 | - name: Build 105 | run: | 106 | export SDK_HOME="$HOME/speechsdk/MicrosoftCognitiveServicesSpeech.xcframework/macos-arm64_x86_64" 107 | export CGO_CFLAGS="-I$SDK_HOME/MicrosoftCognitiveServicesSpeech.framework/Headers" 108 | export CGO_LDFLAGS="-F$SDK_HOME -framework MicrosoftCognitiveServicesSpeech" 109 | export DYLD_FRAMEWORK_PATH="$DYLD_FRAMEWORK_PATH:$SDK_HOME" 110 | go build -v ./... 111 | - name: Test 112 | env: 113 | DIALOG_SUBSCRIPTION_KEY: ${{ secrets.DIALOG_SUBSCRIPTION_KEY }} 114 | DIALOG_SUBSCRIPTION_REGION: ${{ secrets.DIALOG_SUBSCRIPTION_REGION }} 115 | SPEECH_SUBSCRIPTION_KEY: ${{ secrets.SR_SUBSCRIPTION_KEY }} 116 | SPEECH_SUBSCRIPTION_REGION: ${{ secrets.SR_SUBSCRIPTION_REGION }} 117 | run: | 118 | export SDK_HOME="$HOME/speechsdk/MicrosoftCognitiveServicesSpeech.xcframework/macos-arm64_x86_64" 119 | export CGO_CFLAGS="-I$SDK_HOME/MicrosoftCognitiveServicesSpeech.framework/Headers" 120 | export CGO_LDFLAGS="-F$SDK_HOME -framework MicrosoftCognitiveServicesSpeech" 121 | export DYLD_FRAMEWORK_PATH="$DYLD_FRAMEWORK_PATH:$SDK_HOME" 122 | go test -v ./... 123 | -------------------------------------------------------------------------------- /audio/audio_output_stream.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // 18 | // /* Proxy functions forward declarations */ 19 | // int cgo_audio_push_stream_write_callback_wrapper(void *context, uint8_t* buffer, uint32_t size); 20 | // void cgo_audio_push_stream_close_callback_wrapper(void *context); 21 | import "C" 22 | 23 | // AudioOutputStream represents audio output stream used for custom audio output configurations. 24 | // Updated in version 1.7.0 25 | type AudioOutputStream interface { 26 | Close() 27 | getHandle() C.SPXHANDLE 28 | } 29 | 30 | type audioOutputStreamBase struct { 31 | handle C.SPXHANDLE 32 | } 33 | 34 | func (stream *audioOutputStreamBase) getHandle() C.SPXHANDLE { 35 | return stream.handle 36 | } 37 | 38 | func (stream *audioOutputStreamBase) Close() { 39 | C.audio_stream_release(stream.handle) 40 | } 41 | 42 | // PullAudioOutputStream represents memory backed pull audio output stream used for custom audio output configurations. 43 | type PullAudioOutputStream struct { 44 | audioOutputStreamBase 45 | } 46 | 47 | // NewPullAudioOutputStreamFromHandle creates a new PullAudioOutputStream from a handle (for internal use) 48 | func NewPullAudioOutputStreamFromHandle(handle common.SPXHandle) *PullAudioOutputStream { 49 | stream := new(PullAudioOutputStream) 50 | stream.handle = uintptr2handle(handle) 51 | return stream 52 | } 53 | 54 | // CreatePullAudioOutputStream creates a memory backed PullAudioOutputStream. 55 | func CreatePullAudioOutputStream() (*PullAudioOutputStream, error) { 56 | var handle C.SPXHANDLE 57 | ret := uintptr(C.audio_stream_create_pull_audio_output_stream(&handle)) 58 | if ret != C.SPX_NOERROR { 59 | return nil, common.NewCarbonError(ret) 60 | } 61 | return NewPullAudioOutputStreamFromHandle(handle2uintptr(handle)), nil 62 | } 63 | 64 | // Read reads audio from the stream. 65 | // The maximal number of bytes to be read is determined from the size parameter. 66 | // If there is no data immediately available, read() blocks until the next data becomes available. 67 | func (stream PullAudioOutputStream) Read(size uint) ([]byte, error) { 68 | cBuffer := C.malloc(C.sizeof_char * (C.size_t)(size)) 69 | defer C.free(unsafe.Pointer(cBuffer)) 70 | var outSize C.uint32_t 71 | ret := uintptr(C.pull_audio_output_stream_read(stream.handle, (*C.uint8_t)(cBuffer), (C.uint32_t)(size), &outSize)) 72 | if ret != C.SPX_NOERROR { 73 | return nil, common.NewCarbonError(ret) 74 | } 75 | buffer := C.GoBytes(cBuffer, (C.int)(outSize)) 76 | return buffer, nil 77 | } 78 | 79 | // PushAudioOutputStream represents audio output stream used for custom audio output configurations. 80 | type PushAudioOutputStream struct { 81 | audioOutputStreamBase 82 | } 83 | 84 | // PushAudioOutputStreamCallback an interface that defines callback methods (Write() and CloseStream()) for custom audio output 85 | // streams). 86 | type PushAudioOutputStreamCallback interface { 87 | Write(buffer []byte) int 88 | CloseStream() 89 | } 90 | 91 | var pushStreamCallbacks = make(map[C.SPXHANDLE]PushAudioOutputStreamCallback) 92 | 93 | func registerPushStreamCallback(handle C.SPXHANDLE, callback PushAudioOutputStreamCallback) { 94 | mu.Lock() 95 | defer mu.Unlock() 96 | pushStreamCallbacks[handle] = callback 97 | } 98 | 99 | func getPushStreamCallback(handle C.SPXHANDLE) *PushAudioOutputStreamCallback { 100 | mu.Lock() 101 | defer mu.Unlock() 102 | cb, ok := pushStreamCallbacks[handle] 103 | if ok { 104 | return &cb 105 | } 106 | return nil 107 | } 108 | 109 | //nolint:deadcode 110 | func deregisterPushStreamCallback(handle C.SPXHANDLE) { 111 | mu.Lock() 112 | defer mu.Unlock() 113 | pushStreamCallbacks[handle] = nil 114 | } 115 | 116 | //export cgoAudioOutputCallWriteCallback 117 | func cgoAudioOutputCallWriteCallback(handle C.SPXHANDLE, buffer *C.uint8_t, size C.uint32_t) int { 118 | callback := getPushStreamCallback(handle) 119 | if callback != nil { 120 | goBuffer := C.GoBytes(unsafe.Pointer(buffer), (C.int)(size)) 121 | return (*callback).Write(goBuffer) 122 | } 123 | return 0 124 | } 125 | 126 | //export cgoAudioOutputCallCloseCallback 127 | func cgoAudioOutputCallCloseCallback(handle C.SPXHANDLE) { 128 | callback := getPushStreamCallback(handle) 129 | if callback != nil { 130 | (*callback).CloseStream() 131 | } 132 | } 133 | 134 | // CreatePushAudioOutputStream creates a PushAudioOutputStream that delegates to the specified callback interface for Write() 135 | // and CloseStream() methods. 136 | func CreatePushAudioOutputStream(callback PushAudioOutputStreamCallback) (*PushAudioOutputStream, error) { 137 | var handle C.SPXHANDLE 138 | ret := uintptr(C.audio_stream_create_push_audio_output_stream(&handle)) 139 | if ret != C.SPX_NOERROR { 140 | return nil, common.NewCarbonError(ret) 141 | } 142 | ret = uintptr(C.push_audio_output_stream_set_callbacks( 143 | handle, 144 | unsafe.Pointer(handle), 145 | (C.CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK)(unsafe.Pointer(C.cgo_audio_push_stream_write_callback_wrapper)), 146 | (C.CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK)(unsafe.Pointer(C.cgo_audio_push_stream_close_callback_wrapper)))) 147 | if ret != C.SPX_NOERROR { 148 | return nil, common.NewCarbonError(ret) 149 | } 150 | registerPushStreamCallback(handle, callback) 151 | stream := new(PushAudioOutputStream) 152 | stream.handle = handle 153 | return stream, nil 154 | } 155 | -------------------------------------------------------------------------------- /dialog/callback_helpers.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog 5 | 6 | import ( 7 | "sync" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | import "C" 16 | 17 | var mu sync.Mutex 18 | var sessionStartedCallbacks = make(map[C.SPXHANDLE]speech.SessionEventHandler) 19 | 20 | func registerSessionStartedCallback(handler speech.SessionEventHandler, handle C.SPXHANDLE) { 21 | mu.Lock() 22 | defer mu.Unlock() 23 | sessionStartedCallbacks[handle] = handler 24 | } 25 | 26 | func getSessionStartedCallback(handle C.SPXHANDLE) speech.SessionEventHandler { 27 | mu.Lock() 28 | defer mu.Unlock() 29 | return sessionStartedCallbacks[handle] 30 | } 31 | 32 | //export dialogFireEventSessionStarted 33 | func dialogFireEventSessionStarted(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 34 | handler := getSessionStartedCallback(handle) 35 | event, err := speech.NewSessionEventArgsFromHandle(handle2uintptr(eventHandle)) 36 | if err != nil || handler == nil { 37 | C.recognizer_event_handle_release(handle) 38 | return 39 | } 40 | handler(*event) 41 | } 42 | 43 | var sessionStoppedCallbacks = make(map[C.SPXHANDLE]speech.SessionEventHandler) 44 | 45 | func registerSessionStoppedCallback(handler speech.SessionEventHandler, handle C.SPXHANDLE) { 46 | mu.Lock() 47 | defer mu.Unlock() 48 | sessionStoppedCallbacks[handle] = handler 49 | } 50 | 51 | func getSessionStoppedCallback(handle C.SPXHANDLE) speech.SessionEventHandler { 52 | mu.Lock() 53 | defer mu.Unlock() 54 | return sessionStoppedCallbacks[handle] 55 | } 56 | 57 | //export dialogFireEventSessionStopped 58 | func dialogFireEventSessionStopped(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 59 | handler := getSessionStoppedCallback(handle) 60 | event, err := speech.NewSessionEventArgsFromHandle(handle2uintptr(eventHandle)) 61 | if err != nil || handler == nil { 62 | C.recognizer_event_handle_release(handle) 63 | return 64 | } 65 | handler(*event) 66 | } 67 | 68 | var recognizedCallbacks = make(map[C.SPXHANDLE]speech.SpeechRecognitionEventHandler) 69 | 70 | func registerRecognizedCallback(handler speech.SpeechRecognitionEventHandler, handle C.SPXHANDLE) { 71 | mu.Lock() 72 | defer mu.Unlock() 73 | recognizedCallbacks[handle] = handler 74 | } 75 | 76 | func getRecognizedCallback(handle C.SPXHANDLE) speech.SpeechRecognitionEventHandler { 77 | mu.Lock() 78 | defer mu.Unlock() 79 | return recognizedCallbacks[handle] 80 | } 81 | 82 | //export dialogFireEventRecognized 83 | func dialogFireEventRecognized(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 84 | handler := getRecognizedCallback(handle) 85 | event, err := speech.NewSpeechRecognitionEventArgsFromHandle(handle2uintptr(eventHandle)) 86 | if err != nil || handler == nil { 87 | C.recognizer_event_handle_release(handle) 88 | return 89 | } 90 | handler(*event) 91 | } 92 | 93 | var recognizingCallbacks = make(map[C.SPXHANDLE]speech.SpeechRecognitionEventHandler) 94 | 95 | func registerRecognizingCallback(handler speech.SpeechRecognitionEventHandler, handle C.SPXHANDLE) { 96 | mu.Lock() 97 | defer mu.Unlock() 98 | recognizingCallbacks[handle] = handler 99 | } 100 | 101 | func getRecognizingCallback(handle C.SPXHANDLE) speech.SpeechRecognitionEventHandler { 102 | mu.Lock() 103 | defer mu.Unlock() 104 | return recognizingCallbacks[handle] 105 | } 106 | 107 | //export dialogFireEventRecognizing 108 | func dialogFireEventRecognizing(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 109 | handler := getRecognizingCallback(handle) 110 | event, err := speech.NewSpeechRecognitionEventArgsFromHandle(handle2uintptr(eventHandle)) 111 | if err != nil || handler == nil { 112 | C.recognizer_event_handle_release(handle) 113 | return 114 | } 115 | handler(*event) 116 | } 117 | 118 | var canceledCallbacks = make(map[C.SPXHANDLE]speech.SpeechRecognitionCanceledEventHandler) 119 | 120 | func registerCanceledCallback(handler speech.SpeechRecognitionCanceledEventHandler, handle C.SPXHANDLE) { 121 | mu.Lock() 122 | defer mu.Unlock() 123 | canceledCallbacks[handle] = handler 124 | } 125 | 126 | func getCanceledCallback(handle C.SPXHANDLE) speech.SpeechRecognitionCanceledEventHandler { 127 | mu.Lock() 128 | defer mu.Unlock() 129 | return canceledCallbacks[handle] 130 | } 131 | 132 | //export dialogFireEventCanceled 133 | func dialogFireEventCanceled(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 134 | handler := getCanceledCallback(handle) 135 | event, err := speech.NewSpeechRecognitionCanceledEventArgsFromHandle(handle2uintptr(eventHandle)) 136 | if err != nil || handler == nil { 137 | C.recognizer_event_handle_release(handle) 138 | return 139 | } 140 | handler(*event) 141 | } 142 | 143 | var activityReceivedCallbacks = make(map[C.SPXHANDLE]ActivityReceivedEventHandler) 144 | 145 | func registerActivityReceivedCallback(handler ActivityReceivedEventHandler, handle C.SPXHANDLE) { 146 | mu.Lock() 147 | defer mu.Unlock() 148 | activityReceivedCallbacks[handle] = handler 149 | } 150 | 151 | func getActivityReceivedCallback(handle C.SPXHANDLE) ActivityReceivedEventHandler { 152 | mu.Lock() 153 | defer mu.Unlock() 154 | return activityReceivedCallbacks[handle] 155 | } 156 | 157 | //export dialogFireEventActivityReceived 158 | func dialogFireEventActivityReceived(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 159 | handler := getActivityReceivedCallback(handle) 160 | event, err := NewActivityReceivedEventArgsFromHandle(handle2uintptr(eventHandle)) 161 | if err != nil || handler == nil { 162 | C.dialog_service_connector_activity_received_event_release(handle) 163 | return 164 | } 165 | handler(*event) 166 | } 167 | -------------------------------------------------------------------------------- /samples/recognizer/wrapper.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package recognizer 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "io" 10 | "os" 11 | "sync/atomic" 12 | "time" 13 | 14 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 15 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 16 | ) 17 | 18 | type SDKWrapperEventType int 19 | 20 | const ( 21 | Cancellation SDKWrapperEventType = iota 22 | Recognizing 23 | Recognized 24 | ) 25 | 26 | type SDKWrapperEvent struct { 27 | EventType SDKWrapperEventType 28 | Cancellation *speech.SpeechRecognitionCanceledEventArgs 29 | Recognized *speech.SpeechRecognitionEventArgs 30 | Recognizing *speech.SpeechRecognitionEventArgs 31 | } 32 | 33 | func (event *SDKWrapperEvent) Close() { 34 | if event.Cancellation != nil { 35 | event.Cancellation.Close() 36 | } 37 | if event.Recognizing != nil { 38 | event.Recognizing.Close() 39 | } 40 | if event.Recognized != nil { 41 | event.Recognized.Close() 42 | } 43 | } 44 | 45 | type SDKWrapper struct { 46 | stream *audio.PushAudioInputStream 47 | recognizer *speech.SpeechRecognizer 48 | started int32 49 | } 50 | 51 | func NewWrapper(subscription string, region string) (*SDKWrapper, error) { 52 | format, err := audio.GetDefaultInputFormat() 53 | if err != nil { 54 | return nil, err 55 | } 56 | defer format.Close() 57 | stream, err := audio.CreatePushAudioInputStreamFromFormat(format) 58 | if err != nil { 59 | return nil, err 60 | } 61 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 62 | if err != nil { 63 | stream.Close() 64 | return nil, err 65 | } 66 | defer audioConfig.Close() 67 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 68 | if err != nil { 69 | stream.Close() 70 | return nil, err 71 | } 72 | defer config.Close() 73 | recognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) 74 | if err != nil { 75 | stream.Close() 76 | return nil, err 77 | } 78 | wrapper := new(SDKWrapper) 79 | wrapper.recognizer = recognizer 80 | wrapper.stream = stream 81 | return wrapper, nil 82 | } 83 | 84 | func (wrapper *SDKWrapper) Close() { 85 | wrapper.stream.CloseStream() 86 | <-wrapper.recognizer.StopContinuousRecognitionAsync() 87 | wrapper.stream.Close() 88 | wrapper.recognizer.Close() 89 | } 90 | 91 | func (wrapper *SDKWrapper) Write(buffer []byte) error { 92 | if atomic.LoadInt32(&wrapper.started) != 1 { 93 | return fmt.Errorf("Trying to write when recognizer is stopped") 94 | } 95 | return wrapper.stream.Write(buffer) 96 | } 97 | 98 | func (wrapper *SDKWrapper) StartContinuous(callback func(*SDKWrapperEvent)) error { 99 | if atomic.SwapInt32(&wrapper.started, 1) == 1 { 100 | return nil 101 | } 102 | wrapper.recognizer.Recognized(func(event speech.SpeechRecognitionEventArgs) { 103 | wrapperEvent := new(SDKWrapperEvent) 104 | wrapperEvent.EventType = Recognized 105 | wrapperEvent.Recognized = &event 106 | callback(wrapperEvent) 107 | }) 108 | wrapper.recognizer.Recognizing(func(event speech.SpeechRecognitionEventArgs) { 109 | wrapperEvent := new(SDKWrapperEvent) 110 | wrapperEvent.EventType = Recognizing 111 | wrapperEvent.Recognizing = &event 112 | callback(wrapperEvent) 113 | }) 114 | wrapper.recognizer.Canceled(func(event speech.SpeechRecognitionCanceledEventArgs) { 115 | wrapperEvent := new(SDKWrapperEvent) 116 | wrapperEvent.EventType = Cancellation 117 | wrapperEvent.Cancellation = &event 118 | callback(wrapperEvent) 119 | }) 120 | return <-wrapper.recognizer.StartContinuousRecognitionAsync() 121 | } 122 | 123 | func (wrapper *SDKWrapper) StopContinuous() error { 124 | if atomic.SwapInt32(&wrapper.started, 0) == 0 { 125 | return nil 126 | } 127 | var empty = []byte{} 128 | wrapper.stream.Write(empty) 129 | wrapper.recognizer.Recognized(nil) 130 | wrapper.recognizer.Recognizing(nil) 131 | wrapper.recognizer.Canceled(nil) 132 | return <-wrapper.recognizer.StopContinuousRecognitionAsync() 133 | } 134 | 135 | func PumpFileContinuously(stop chan int, filename string, wrapper *SDKWrapper) { 136 | file, err := os.Open(filename) 137 | if err != nil { 138 | fmt.Println("Error opening file: ", err) 139 | return 140 | } 141 | defer file.Close() 142 | reader := bufio.NewReader(file) 143 | buffer := make([]byte, 3200) 144 | for { 145 | select { 146 | case <-stop: 147 | fmt.Println("Stopping pump...") 148 | return 149 | case <-time.After(100 * time.Millisecond): 150 | } 151 | n, err := reader.Read(buffer) 152 | if err == io.EOF { 153 | file.Seek(44, io.SeekStart) 154 | continue 155 | } 156 | if err != nil { 157 | fmt.Println("Error reading file: ", err) 158 | break 159 | } 160 | err = wrapper.Write(buffer[0:n]) 161 | if err != nil { 162 | fmt.Println("Error writing to the stream") 163 | } 164 | } 165 | } 166 | 167 | func RecognizeContinuousUsingWrapper(subscription string, region string, file string) { 168 | /* If running this in a server, each worker thread should run something similar to this */ 169 | wrapper, err := NewWrapper(subscription, region) 170 | if err != nil { 171 | fmt.Println("Got an error: ", err) 172 | } 173 | defer wrapper.Close() 174 | stop := make(chan int) 175 | go PumpFileContinuously(stop, file, wrapper) 176 | fmt.Println("Starting Continuous...") 177 | wrapper.StartContinuous(func(event *SDKWrapperEvent) { 178 | defer event.Close() 179 | switch event.EventType { 180 | case Recognized: 181 | fmt.Println("Got a recognized event") 182 | case Recognizing: 183 | fmt.Println("Got a recognizing event") 184 | case Cancellation: 185 | fmt.Println("Got a cancellation event") 186 | } 187 | }) 188 | <-time.After(10 * time.Second) 189 | stop <- 1 190 | fmt.Println("Stopping Continuous...") 191 | wrapper.StopContinuous() 192 | fmt.Println("Exiting...") 193 | } 194 | -------------------------------------------------------------------------------- /samples/recognizer/from_file.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package recognizer 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | "strings" 10 | 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/helpers" 14 | ) 15 | 16 | func RecognizeOnceFromWavFile(subscription string, region string, file string) { 17 | audioConfig, err := audio.NewAudioConfigFromWavFileInput(file) 18 | if err != nil { 19 | fmt.Println("Got an error: ", err) 20 | return 21 | } 22 | defer audioConfig.Close() 23 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 24 | if err != nil { 25 | fmt.Println("Got an error: ", err) 26 | return 27 | } 28 | defer config.Close() 29 | speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) 30 | if err != nil { 31 | fmt.Println("Got an error: ", err) 32 | return 33 | } 34 | defer speechRecognizer.Close() 35 | speechRecognizer.SessionStarted(func(event speech.SessionEventArgs) { 36 | defer event.Close() 37 | fmt.Println("Session Started (ID=", event.SessionID, ")") 38 | }) 39 | speechRecognizer.SessionStopped(func(event speech.SessionEventArgs) { 40 | defer event.Close() 41 | fmt.Println("Session Stopped (ID=", event.SessionID, ")") 42 | }) 43 | task := speechRecognizer.RecognizeOnceAsync() 44 | var outcome speech.SpeechRecognitionOutcome 45 | select { 46 | case outcome = <-task: 47 | case <-time.After(5 * time.Second): 48 | fmt.Println("Timed out") 49 | return 50 | } 51 | defer outcome.Close() 52 | if outcome.Error != nil { 53 | fmt.Println("Got an error: ", outcome.Error) 54 | } 55 | fmt.Println("Got a recognition!") 56 | fmt.Println(outcome.Result.Text) 57 | } 58 | 59 | func RecognizeOnceFromCompressedFile(subscription string, region string, file string) { 60 | var containerFormat audio.AudioStreamContainerFormat 61 | if strings.Contains(file, ".mulaw") { 62 | containerFormat = audio.MULAW 63 | } else if strings.Contains(file, ".alaw") { 64 | containerFormat = audio.ALAW 65 | } else if strings.Contains(file, ".mp3") { 66 | containerFormat = audio.MP3 67 | } else if strings.Contains(file, ".flac") { 68 | containerFormat = audio.FLAC 69 | } else if strings.Contains(file, ".opus") { 70 | containerFormat = audio.OGGOPUS 71 | } else { 72 | containerFormat = audio.ANY 73 | } 74 | format, err := audio.GetCompressedFormat(containerFormat) 75 | if err != nil { 76 | fmt.Println("Got an error: ", err) 77 | return 78 | } 79 | defer format.Close() 80 | stream, err := audio.CreatePushAudioInputStreamFromFormat(format) 81 | if err != nil { 82 | fmt.Println("Got an error: ", err) 83 | return 84 | } 85 | defer stream.Close() 86 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 87 | if err != nil { 88 | fmt.Println("Got an error: ", err) 89 | return 90 | } 91 | defer audioConfig.Close() 92 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 93 | if err != nil { 94 | fmt.Println("Got an error: ", err) 95 | return 96 | } 97 | defer config.Close() 98 | speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) 99 | if err != nil { 100 | fmt.Println("Got an error: ", err) 101 | return 102 | } 103 | defer speechRecognizer.Close() 104 | speechRecognizer.SessionStarted(func(event speech.SessionEventArgs) { 105 | defer event.Close() 106 | fmt.Println("Session Started (ID=", event.SessionID, ")") 107 | }) 108 | speechRecognizer.SessionStopped(func(event speech.SessionEventArgs) { 109 | defer event.Close() 110 | fmt.Println("Session Stopped (ID=", event.SessionID, ")") 111 | }) 112 | helpers.PumpFileIntoStream(file, stream) 113 | task := speechRecognizer.RecognizeOnceAsync() 114 | var outcome speech.SpeechRecognitionOutcome 115 | select { 116 | case outcome = <-task: 117 | case <-time.After(40 * time.Second): 118 | fmt.Println("Timed out") 119 | return 120 | } 121 | defer outcome.Close() 122 | if outcome.Error != nil { 123 | fmt.Println("Got an error: ", outcome.Error) 124 | } 125 | fmt.Println("Got a recognition!") 126 | fmt.Println(outcome.Result.Text) 127 | } 128 | 129 | func RecognizeOnceFromALAWFile(subscription string, region string, file string) { 130 | var waveFormat audio.AudioStreamWaveFormat 131 | waveFormat = audio.WaveALAW 132 | format, err := audio.GetWaveFormat(8000, 16, 1, waveFormat) 133 | if err != nil { 134 | fmt.Println("Got an error: ", err) 135 | return 136 | } 137 | defer format.Close() 138 | stream, err := audio.CreatePushAudioInputStreamFromFormat(format) 139 | if err != nil { 140 | fmt.Println("Got an error: ", err) 141 | return 142 | } 143 | defer stream.Close() 144 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 145 | if err != nil { 146 | fmt.Println("Got an error: ", err) 147 | return 148 | } 149 | defer audioConfig.Close() 150 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 151 | if err != nil { 152 | fmt.Println("Got an error: ", err) 153 | return 154 | } 155 | defer config.Close() 156 | speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) 157 | if err != nil { 158 | fmt.Println("Got an error: ", err) 159 | return 160 | } 161 | defer speechRecognizer.Close() 162 | speechRecognizer.SessionStarted(func(event speech.SessionEventArgs) { 163 | defer event.Close() 164 | fmt.Println("Session Started (ID=", event.SessionID, ")") 165 | }) 166 | speechRecognizer.SessionStopped(func(event speech.SessionEventArgs) { 167 | defer event.Close() 168 | fmt.Println("Session Stopped (ID=", event.SessionID, ")") 169 | }) 170 | helpers.PumpFileIntoStream(file, stream) 171 | task := speechRecognizer.RecognizeOnceAsync() 172 | var outcome speech.SpeechRecognitionOutcome 173 | select { 174 | case outcome = <-task: 175 | case <-time.After(40 * time.Second): 176 | fmt.Println("Timed out") 177 | return 178 | } 179 | defer outcome.Close() 180 | if outcome.Error != nil { 181 | fmt.Println("Got an error: ", outcome.Error) 182 | } 183 | fmt.Println("Got a recognition!") 184 | fmt.Println(outcome.Result.Text) 185 | } -------------------------------------------------------------------------------- /audio/audio_config.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | import "C" 17 | 18 | // AudioConfig represents specific audio configuration, such as microphone, file, or custom audio streams. 19 | type AudioConfig struct { 20 | handle C.SPXHANDLE 21 | properties *common.PropertyCollection 22 | } 23 | 24 | // GetHandle gets the handle to the resource (for internal use) 25 | func (config AudioConfig) GetHandle() common.SPXHandle { 26 | return handle2uintptr(config.handle) 27 | } 28 | 29 | // Close releases the underlying resources 30 | func (config AudioConfig) Close() { 31 | config.properties.Close() 32 | C.audio_config_release(config.handle) 33 | } 34 | 35 | func newAudioConfigFromHandle(handle C.SPXHANDLE) (*AudioConfig, error) { 36 | var propBagHandle C.SPXPROPERTYBAGHANDLE 37 | ret := uintptr(C.audio_config_get_property_bag(handle, &propBagHandle)) 38 | if ret != C.SPX_NOERROR { 39 | return nil, common.NewCarbonError(ret) 40 | } 41 | config := new(AudioConfig) 42 | config.handle = handle 43 | config.properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 44 | return config, nil 45 | } 46 | 47 | // NewAudioConfigFromDefaultMicrophoneInput creates an AudioConfig object representing the default microphone on the system. 48 | func NewAudioConfigFromDefaultMicrophoneInput() (*AudioConfig, error) { 49 | var handle C.SPXHANDLE 50 | ret := uintptr(C.audio_config_create_audio_input_from_default_microphone(&handle)) 51 | if ret != C.SPX_NOERROR { 52 | return nil, common.NewCarbonError(ret) 53 | } 54 | return newAudioConfigFromHandle(handle) 55 | } 56 | 57 | // NewAudioConfigFromMicrophoneInput creates an AudioConfig object representing a specific microphone on the system. 58 | func NewAudioConfigFromMicrophoneInput(deviceName string) (*AudioConfig, error) { 59 | var handle C.SPXHANDLE 60 | dn := C.CString(deviceName) 61 | defer C.free(unsafe.Pointer(dn)) 62 | ret := uintptr(C.audio_config_create_audio_input_from_a_microphone(&handle, dn)) 63 | if ret != C.SPX_NOERROR { 64 | return nil, common.NewCarbonError(ret) 65 | } 66 | return newAudioConfigFromHandle(handle) 67 | } 68 | 69 | // NewAudioConfigFromWavFileInput creates an AudioConfig object representing the specified file. 70 | func NewAudioConfigFromWavFileInput(filename string) (*AudioConfig, error) { 71 | var handle C.SPXHANDLE 72 | fn := C.CString(filename) 73 | defer C.free(unsafe.Pointer(fn)) 74 | ret := uintptr(C.audio_config_create_audio_input_from_wav_file_name(&handle, fn)) 75 | if ret != C.SPX_NOERROR { 76 | return nil, common.NewCarbonError(ret) 77 | } 78 | return newAudioConfigFromHandle(handle) 79 | } 80 | 81 | // NewAudioConfigFromStreamInput creates an AudioConfig object representing the specified stream. 82 | func NewAudioConfigFromStreamInput(stream AudioInputStream) (*AudioConfig, error) { 83 | var handle C.SPXHANDLE 84 | ret := uintptr(C.audio_config_create_audio_input_from_stream(&handle, stream.getHandle())) 85 | if ret != C.SPX_NOERROR { 86 | return nil, common.NewCarbonError(ret) 87 | } 88 | return newAudioConfigFromHandle(handle) 89 | } 90 | 91 | // NewAudioConfigFromDefaultSpeakerOutput creates an AudioConfig object representing the default audio output device 92 | // (speaker) on the system. 93 | func NewAudioConfigFromDefaultSpeakerOutput() (*AudioConfig, error) { 94 | var handle C.SPXHANDLE 95 | ret := uintptr(C.audio_config_create_audio_output_from_default_speaker(&handle)) 96 | if ret != C.SPX_NOERROR { 97 | return nil, common.NewCarbonError(ret) 98 | } 99 | return newAudioConfigFromHandle(handle) 100 | } 101 | 102 | // NewAudioConfigFromSpeakerOutput creates an AudioConfig object representing the specific audio output device 103 | // (speaker) on the system. 104 | func NewAudioConfigFromSpeakerOutput(deviceName string) (*AudioConfig, error) { 105 | var handle C.SPXHANDLE 106 | dn := C.CString(deviceName) 107 | defer C.free(unsafe.Pointer(dn)) 108 | ret := uintptr(C.audio_config_create_audio_output_from_a_speaker(&handle, dn)) 109 | if ret != C.SPX_NOERROR { 110 | return nil, common.NewCarbonError(ret) 111 | } 112 | return newAudioConfigFromHandle(handle) 113 | } 114 | 115 | // NewAudioConfigFromWavFileOutput creates an AudioConfig object representing the specified file for audio output. 116 | func NewAudioConfigFromWavFileOutput(filename string) (*AudioConfig, error) { 117 | var handle C.SPXHANDLE 118 | fn := C.CString(filename) 119 | defer C.free(unsafe.Pointer(fn)) 120 | ret := uintptr(C.audio_config_create_audio_output_from_wav_file_name(&handle, fn)) 121 | if ret != C.SPX_NOERROR { 122 | return nil, common.NewCarbonError(ret) 123 | } 124 | return newAudioConfigFromHandle(handle) 125 | } 126 | 127 | // NewAudioConfigFromStreamOutput creates an AudioConfig object representing the specified output stream. 128 | func NewAudioConfigFromStreamOutput(stream AudioOutputStream) (*AudioConfig, error) { 129 | var handle C.SPXHANDLE 130 | streamHandle := stream.getHandle() 131 | ret := uintptr(C.audio_config_create_audio_output_from_stream(&handle, streamHandle)) 132 | if ret != C.SPX_NOERROR { 133 | return nil, common.NewCarbonError(ret) 134 | } 135 | return newAudioConfigFromHandle(handle) 136 | } 137 | 138 | // SetProperty sets a property value by ID. 139 | func (config AudioConfig) SetProperty(id common.PropertyID, value string) error { 140 | return config.properties.SetProperty(id, value) 141 | } 142 | 143 | // GetProperty gets a property value by ID. 144 | func (config AudioConfig) GetProperty(id common.PropertyID) string { 145 | return config.properties.GetProperty(id, "") 146 | } 147 | 148 | // SetPropertyByString sets a property value by name. 149 | func (config AudioConfig) SetPropertyByString(name string, value string) error { 150 | return config.properties.SetPropertyByString(name, value) 151 | } 152 | 153 | // GetPropertyByString gets a property value by name. 154 | func (config AudioConfig) GetPropertyByString(name string) string { 155 | return config.properties.GetPropertyByString(name, "") 156 | } 157 | -------------------------------------------------------------------------------- /speech/audio_data_stream.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "io" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | // 19 | import "C" 20 | 21 | // AudioDataStream represents audio data stream used for operating audio data as a stream. 22 | // Added in version 1.17.0 23 | type AudioDataStream struct { 24 | handle C.SPXHANDLE 25 | 26 | // Properties represents the collection of additional properties. 27 | Properties *common.PropertyCollection 28 | } 29 | 30 | // Close disposes the associated resources. 31 | func (stream AudioDataStream) Close() { 32 | stream.Properties.Close() 33 | C.audio_data_stream_release(stream.handle) 34 | } 35 | 36 | // NewAudioDataStreamFromHandle creates a new AudioDataStream from a handle (for internal use) 37 | func NewAudioDataStreamFromHandle(handle common.SPXHandle) (*AudioDataStream, error) { 38 | stream := new(AudioDataStream) 39 | stream.handle = uintptr2handle(handle) 40 | /* Properties */ 41 | var propBagHandle C.SPXHANDLE 42 | ret := uintptr(C.audio_data_stream_get_property_bag(uintptr2handle(handle), &propBagHandle)) 43 | if ret != C.SPX_NOERROR { 44 | return nil, common.NewCarbonError(ret) 45 | } 46 | stream.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 47 | return stream, nil 48 | } 49 | 50 | // NewAudioDataStreamFromWavFileInput creates a memory backed AudioDataStream for the specified audio input file. 51 | func NewAudioDataStreamFromWavFileInput(filename string) (*AudioDataStream, error) { 52 | var handle C.SPXHANDLE 53 | fn := C.CString(filename) 54 | defer C.free(unsafe.Pointer(fn)) 55 | ret := uintptr(C.audio_data_stream_create_from_file(&handle, fn)) 56 | if ret != C.SPX_NOERROR { 57 | return nil, common.NewCarbonError(ret) 58 | } 59 | return NewAudioDataStreamFromHandle(handle2uintptr(handle)) 60 | } 61 | 62 | // NewAudioDataStreamFromSpeechSynthesisResult creates a memory backed AudioDataStream from given speech synthesis result. 63 | func NewAudioDataStreamFromSpeechSynthesisResult(result *SpeechSynthesisResult) (*AudioDataStream, error) { 64 | var handle C.SPXHANDLE 65 | ret := uintptr(C.audio_data_stream_create_from_result(&handle, result.handle)) 66 | if ret != C.SPX_NOERROR { 67 | return nil, common.NewCarbonError(ret) 68 | } 69 | return NewAudioDataStreamFromHandle(handle2uintptr(handle)) 70 | } 71 | 72 | // GetStatus gets the current status of the audio data stream. 73 | func (stream AudioDataStream) GetStatus() (common.StreamStatus, error) { 74 | var cStatus C.Stream_Status 75 | ret := uintptr(C.audio_data_stream_get_status(stream.handle, &cStatus)) 76 | if ret != C.SPX_NOERROR { 77 | return common.StreamStatusUnknown, common.NewCarbonError(ret) 78 | } 79 | return (common.StreamStatus)(cStatus), nil 80 | } 81 | 82 | // CanReadData checks whether the stream has enough data to be read. 83 | func (stream AudioDataStream) CanReadData(bytesRequested uint) bool { 84 | return (bool)(C.audio_data_stream_can_read_data(stream.handle, (C.uint32_t)(bytesRequested))) 85 | } 86 | 87 | // CanReadDataAt checks whether the stream has enough data to be read, at the specified offset. 88 | func (stream AudioDataStream) CanReadDataAt(bytesRequested uint, off int64) bool { 89 | return (bool)(C.audio_data_stream_can_read_data_from_position(stream.handle, (C.uint32_t)(bytesRequested), (C.uint32_t)(off))) 90 | } 91 | 92 | // Read reads a chunk of the audio data stream and fill it to given buffer. 93 | // It returns size of data filled to the buffer and any write error encountered. 94 | func (stream AudioDataStream) Read(buffer []byte) (int, error) { 95 | if len(buffer) == 0 { 96 | return 0, common.NewCarbonError(0x005) // SPXERR_INVALID_ARG 97 | } 98 | var outSize C.uint32_t 99 | ret := uintptr(C.audio_data_stream_read(stream.handle, (*C.uint8_t)(unsafe.Pointer(&buffer[0])), (C.uint32_t)(len(buffer)), &outSize)) 100 | if ret != C.SPX_NOERROR { 101 | return 0, common.NewCarbonError(ret) 102 | } 103 | if outSize == 0 { 104 | return 0, io.EOF 105 | } 106 | return (int)(outSize), nil 107 | } 108 | 109 | // ReadAt reads a chunk of the audio data stream and fill it to given buffer, at specified offset. 110 | // It returns size of data filled to the buffer and any write error encountered. 111 | func (stream AudioDataStream) ReadAt(buffer []byte, off int64) (int, error) { 112 | if len(buffer) == 0 { 113 | return 0, common.NewCarbonError(0x005) // SPXERR_INVALID_ARG 114 | } 115 | var outSize C.uint32_t 116 | ret := uintptr(C.audio_data_stream_read_from_position(stream.handle, (*C.uint8_t)(unsafe.Pointer(&buffer[0])), (C.uint32_t)(len(buffer)), (C.uint32_t)(off), &outSize)) 117 | if ret != C.SPX_NOERROR { 118 | return 0, common.NewCarbonError(ret) 119 | } 120 | if outSize == 0 { 121 | return 0, io.EOF 122 | } 123 | return (int)(outSize), nil 124 | } 125 | 126 | // SaveToWavFileAsync saves the audio data to a file, asynchronously. 127 | func (stream AudioDataStream) SaveToWavFileAsync(filename string) chan error { 128 | outcome := make(chan error) 129 | go func() { 130 | fn := C.CString(filename) 131 | defer C.free(unsafe.Pointer(fn)) 132 | ret := uintptr(C.audio_data_stream_save_to_wave_file(stream.handle, fn)) 133 | if ret != C.SPX_NOERROR { 134 | outcome <- common.NewCarbonError(ret) 135 | } else { 136 | outcome <- nil 137 | } 138 | }() 139 | return outcome 140 | } 141 | 142 | // GetOffset gets current offset of the audio data stream. 143 | func (stream AudioDataStream) GetOffset() (int, error) { 144 | var position C.uint32_t 145 | ret := uintptr(C.audio_data_stream_get_position(stream.handle, &position)) 146 | if ret != C.SPX_NOERROR { 147 | return 0, common.NewCarbonError(ret) 148 | } 149 | return (int)(position), nil 150 | } 151 | 152 | // SetOffset sets current offset of the audio data stream. 153 | func (stream AudioDataStream) SetOffset(offset int) error { 154 | ret := uintptr(C.audio_data_stream_set_position(stream.handle, (C.uint32_t)(offset))) 155 | if ret != C.SPX_NOERROR { 156 | return common.NewCarbonError(ret) 157 | } 158 | return nil 159 | } 160 | -------------------------------------------------------------------------------- /common/speech_synthesis_output_format.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // SpeechSynthesisOutputFormat defines the possible speech synthesis output audio formats. 7 | type SpeechSynthesisOutputFormat int 8 | 9 | const ( 10 | // Raw8Khz8BitMonoMULaw stands for raw-8khz-8bit-mono-mulaw 11 | Raw8Khz8BitMonoMULaw SpeechSynthesisOutputFormat = 1 12 | 13 | // Riff16Khz16KbpsMonoSiren stands for riff-16khz-16kbps-mono-siren 14 | // Unsupported by the service. Do not use this value. 15 | Riff16Khz16KbpsMonoSiren SpeechSynthesisOutputFormat = 2 16 | 17 | // Audio16Khz16KbpsMonoSiren stands for audio-16khz-16kbps-mono-siren 18 | // Unsupported by the service. Do not use this value. 19 | Audio16Khz16KbpsMonoSiren SpeechSynthesisOutputFormat = 3 20 | 21 | // Audio16Khz32KBitRateMonoMp3 stands for audio-16khz-32kbitrate-mono-mp3 22 | Audio16Khz32KBitRateMonoMp3 SpeechSynthesisOutputFormat = 4 23 | 24 | // Audio16Khz128KBitRateMonoMp3 stands for audio-16khz-128kbitrate-mono-mp3 25 | Audio16Khz128KBitRateMonoMp3 SpeechSynthesisOutputFormat = 5 26 | 27 | // Audio16Khz64KBitRateMonoMp3 stands for audio-16khz-64kbitrate-mono-mp3 28 | Audio16Khz64KBitRateMonoMp3 SpeechSynthesisOutputFormat = 6 29 | 30 | // Audio24Khz48KBitRateMonoMp3 stands for audio-24khz-48kbitrate-mono-mp3 31 | Audio24Khz48KBitRateMonoMp3 SpeechSynthesisOutputFormat = 7 32 | 33 | // Audio24Khz96KBitRateMonoMp3 stands for audio-24khz-96kbitrate-mono-mp3 34 | Audio24Khz96KBitRateMonoMp3 SpeechSynthesisOutputFormat = 8 35 | 36 | // Audio24Khz160KBitRateMonoMp3 stands for audio-24khz-160kbitrate-mono-mp3 37 | Audio24Khz160KBitRateMonoMp3 SpeechSynthesisOutputFormat = 9 38 | 39 | // Raw16Khz16BitMonoTrueSilk stands for raw-16khz-16bit-mono-truesilk 40 | Raw16Khz16BitMonoTrueSilk SpeechSynthesisOutputFormat = 10 41 | 42 | // Riff16Khz16BitMonoPcm stands for riff-16khz-16bit-mono-pcm 43 | Riff16Khz16BitMonoPcm SpeechSynthesisOutputFormat = 11 44 | 45 | // Riff8Khz16BitMonoPcm stands for riff-8khz-16bit-mono-pcm 46 | Riff8Khz16BitMonoPcm SpeechSynthesisOutputFormat = 12 47 | 48 | // Riff24Khz16BitMonoPcm stands for riff-24khz-16bit-mono-pcm 49 | Riff24Khz16BitMonoPcm SpeechSynthesisOutputFormat = 13 50 | 51 | // Riff8Khz8BitMonoMULaw stands for riff-8khz-8bit-mono-mulaw 52 | Riff8Khz8BitMonoMULaw SpeechSynthesisOutputFormat = 14 53 | 54 | // Raw16Khz16BitMonoPcm stands for raw-16khz-16bit-mono-pcm 55 | Raw16Khz16BitMonoPcm SpeechSynthesisOutputFormat = 15 56 | 57 | // Raw24Khz16BitMonoPcm stands for raw-24khz-16bit-mono-pcm 58 | Raw24Khz16BitMonoPcm SpeechSynthesisOutputFormat = 16 59 | 60 | // Raw8Khz16BitMonoPcm stands for raw-8khz-16bit-mono-pcm 61 | Raw8Khz16BitMonoPcm SpeechSynthesisOutputFormat = 17 62 | 63 | // Ogg16Khz16BitMonoOpus stands for ogg-16khz-16bit-mono-opus 64 | Ogg16Khz16BitMonoOpus SpeechSynthesisOutputFormat = 18 65 | 66 | // Ogg24Khz16BitMonoOpus stands for ogg-24khz-16bit-mono-opus 67 | Ogg24Khz16BitMonoOpus SpeechSynthesisOutputFormat = 19 68 | 69 | // Raw48Khz16BitMonoPcm stands for raw-48khz-16bit-mono-pcm 70 | Raw48Khz16BitMonoPcm SpeechSynthesisOutputFormat = 20 71 | 72 | // Riff48Khz16BitMonoPcm stands for riff-48khz-16bit-mono-pcm 73 | Riff48Khz16BitMonoPcm SpeechSynthesisOutputFormat = 21 74 | 75 | // Audio48Khz96KBitRateMonoMp3 stands for audio-48khz-96kbitrate-mono-mp3 76 | Audio48Khz96KBitRateMonoMp3 SpeechSynthesisOutputFormat = 22 77 | 78 | // Audio48Khz192KBitRateMonoMp3 stands for audio-48khz-192kbitrate-mono-mp3 79 | Audio48Khz192KBitRateMonoMp3 SpeechSynthesisOutputFormat = 23 80 | 81 | // Ogg48Khz16BitMonoOpus stands for ogg-48khz-16bit-mono-opus 82 | Ogg48Khz16BitMonoOpus SpeechSynthesisOutputFormat = 24 83 | 84 | // Webm16Khz16BitMonoOpus stands for webm-16khz-16bit-mono-opus 85 | Webm16Khz16BitMonoOpus SpeechSynthesisOutputFormat = 25 86 | 87 | // Webm24Khz16BitMonoOpus stands for webm-24khz-16bit-mono-opus 88 | Webm24Khz16BitMonoOpus SpeechSynthesisOutputFormat = 26 89 | 90 | // Raw24Khz16BitMonoTrueSilk stands for raw-24khz-16bit-mono-truesilk 91 | Raw24Khz16BitMonoTrueSilk SpeechSynthesisOutputFormat = 27 92 | 93 | // Raw8Khz8BitMonoALaw stands for raw-8khz-8bit-mono-alaw 94 | Raw8Khz8BitMonoALaw SpeechSynthesisOutputFormat = 28 95 | 96 | // Riff8Khz8BitMonoALaw stands for riff-8khz-8bit-mono-alaw 97 | Riff8Khz8BitMonoALaw SpeechSynthesisOutputFormat = 29 98 | 99 | // Webm24Khz16Bit24KbpsMonoOpus stands for webm-24khz-16bit-24kbps-mono-opus 100 | // Audio compressed by OPUS codec in a WebM container, with bitrate of 24kbps, optimized for IoT scenario. 101 | Webm24Khz16Bit24KbpsMonoOpus SpeechSynthesisOutputFormat = 30 102 | 103 | // Audio16Khz16Bit32KbpsMonoOpus stands for audio-16khz-16bit-32kbps-mono-opus 104 | // Audio compressed by OPUS codec without container, with bitrate of 32kbps. 105 | Audio16Khz16Bit32KbpsMonoOpus SpeechSynthesisOutputFormat = 31 106 | 107 | // Audio24Khz16Bit48KbpsMonoOpus stands for audio-24khz-16bit-48kbps-mono-opus 108 | // Audio compressed by OPUS codec without container, with bitrate of 48kbps. 109 | Audio24Khz16Bit48KbpsMonoOpus SpeechSynthesisOutputFormat = 32 110 | 111 | // Audio24Khz16Bit24KbpsMonoOpus stands for audio-24khz-16bit-24kbps-mono-opus 112 | // Audio compressed by OPUS codec without container, with bitrate of 24kbps. 113 | Audio24Khz16Bit24KbpsMonoOpus SpeechSynthesisOutputFormat = 33 114 | 115 | // Raw22050Hz16BitMonoPcm stands for raw-22050hz-16bit-mono-pcm 116 | // Raw PCM audio at 22050Hz sampling rate and 16-bit depth. 117 | Raw22050Hz16BitMonoPcm SpeechSynthesisOutputFormat = 34 118 | 119 | // Riff22050Hz16BitMonoPcm stands for riff-22050hz-16bit-mono-pcm 120 | // PCM audio at 22050Hz sampling rate and 16-bit depth, with RIFF header. 121 | Riff22050Hz16BitMonoPcm SpeechSynthesisOutputFormat = 35 122 | 123 | // Raw44100Hz16BitMonoPcm stands for raw-44100hz-16bit-mono-pcm 124 | // Raw PCM audio at 44100Hz sampling rate and 16-bit depth. 125 | Raw44100Hz16BitMonoPcm SpeechSynthesisOutputFormat = 36 126 | 127 | // Riff44100Hz16BitMonoPcm stands for riff-44100hz-16bit-mono-pcm 128 | // PCM audio at 44100Hz sampling rate and 16-bit depth, with RIFF header. 129 | Riff44100Hz16BitMonoPcm SpeechSynthesisOutputFormat = 37 130 | 131 | // AmrWb16000Hz stands for amr-wb-16000hz 132 | // AMR-WB audio at 16kHz sampling rate. 133 | AmrWb16000Hz SpeechSynthesisOutputFormat = 38 134 | ) 135 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.suo 8 | *.user 9 | *.userosscache 10 | *.sln.docstates 11 | 12 | # User-specific files (MonoDevelop/Xamarin Studio) 13 | *.userprefs 14 | 15 | # Build results 16 | [Dd]ebug/ 17 | [Dd]ebugPublic/ 18 | [Rr]elease/ 19 | [Rr]eleases/ 20 | x64/ 21 | x86/ 22 | bld/ 23 | [Bb]in/ 24 | [Oo]bj/ 25 | [Ll]og/ 26 | 27 | # Visual Studio 2015/2017 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # Visual Studio 2017 auto generated files 33 | Generated\ Files/ 34 | 35 | # MSTest test Results 36 | [Tt]est[Rr]esult*/ 37 | [Bb]uild[Ll]og.* 38 | 39 | # NUNIT 40 | *.VisualState.xml 41 | TestResult.xml 42 | 43 | # Build Results of an ATL Project 44 | [Dd]ebugPS/ 45 | [Rr]eleasePS/ 46 | dlldata.c 47 | 48 | # Benchmark Results 49 | BenchmarkDotNet.Artifacts/ 50 | 51 | # .NET Core 52 | project.lock.json 53 | project.fragment.lock.json 54 | artifacts/ 55 | **/Properties/launchSettings.json 56 | 57 | # StyleCop 58 | StyleCopReport.xml 59 | 60 | # Files built by Visual Studio 61 | *_i.c 62 | *_p.c 63 | *_i.h 64 | *.ilk 65 | *.meta 66 | *.obj 67 | *.iobj 68 | *.pch 69 | *.pdb 70 | *.ipdb 71 | *.pgc 72 | *.pgd 73 | *.rsp 74 | *.sbr 75 | *.tlb 76 | *.tli 77 | *.tlh 78 | *.tmp 79 | *.tmp_proj 80 | *.log 81 | *.vspscc 82 | *.vssscc 83 | .builds 84 | *.pidb 85 | *.svclog 86 | *.scc 87 | 88 | # Chutzpah Test files 89 | _Chutzpah* 90 | 91 | # Visual C++ cache files 92 | ipch/ 93 | *.aps 94 | *.ncb 95 | *.opendb 96 | *.opensdf 97 | *.sdf 98 | *.cachefile 99 | *.VC.db 100 | *.VC.VC.opendb 101 | 102 | # Visual Studio profiler 103 | *.psess 104 | *.vsp 105 | *.vspx 106 | *.sap 107 | 108 | # Visual Studio Trace Files 109 | *.e2e 110 | 111 | # TFS 2012 Local Workspace 112 | $tf/ 113 | 114 | # Guidance Automation Toolkit 115 | *.gpState 116 | 117 | # ReSharper is a .NET coding add-in 118 | _ReSharper*/ 119 | *.[Rr]e[Ss]harper 120 | *.DotSettings.user 121 | 122 | # JustCode is a .NET coding add-in 123 | .JustCode 124 | 125 | # TeamCity is a build add-in 126 | _TeamCity* 127 | 128 | # DotCover is a Code Coverage Tool 129 | *.dotCover 130 | 131 | # AxoCover is a Code Coverage Tool 132 | .axoCover/* 133 | !.axoCover/settings.json 134 | 135 | # Visual Studio code coverage results 136 | *.coverage 137 | *.coveragexml 138 | 139 | # NCrunch 140 | _NCrunch_* 141 | .*crunch*.local.xml 142 | nCrunchTemp_* 143 | 144 | # MightyMoose 145 | *.mm.* 146 | AutoTest.Net/ 147 | 148 | # Web workbench (sass) 149 | .sass-cache/ 150 | 151 | # Installshield output folder 152 | [Ee]xpress/ 153 | 154 | # DocProject is a documentation generator add-in 155 | DocProject/buildhelp/ 156 | DocProject/Help/*.HxT 157 | DocProject/Help/*.HxC 158 | DocProject/Help/*.hhc 159 | DocProject/Help/*.hhk 160 | DocProject/Help/*.hhp 161 | DocProject/Help/Html2 162 | DocProject/Help/html 163 | 164 | # Click-Once directory 165 | publish/ 166 | 167 | # Publish Web Output 168 | *.[Pp]ublish.xml 169 | *.azurePubxml 170 | # Note: Comment the next line if you want to checkin your web deploy settings, 171 | # but database connection strings (with potential passwords) will be unencrypted 172 | *.pubxml 173 | *.publishproj 174 | 175 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 176 | # checkin your Azure Web App publish settings, but sensitive information contained 177 | # in these scripts will be unencrypted 178 | PublishScripts/ 179 | 180 | # NuGet Packages 181 | *.nupkg 182 | # The packages folder can be ignored because of Package Restore 183 | **/[Pp]ackages/* 184 | # except build/, which is used as an MSBuild target. 185 | !**/[Pp]ackages/build/ 186 | # Uncomment if necessary however generally it will be regenerated when needed 187 | #!**/[Pp]ackages/repositories.config 188 | # NuGet v3's project.json files produces more ignorable files 189 | *.nuget.props 190 | *.nuget.targets 191 | 192 | # Microsoft Azure Build Output 193 | csx/ 194 | *.build.csdef 195 | 196 | # Microsoft Azure Emulator 197 | ecf/ 198 | rcf/ 199 | 200 | # Windows Store app package directories and files 201 | AppPackages/ 202 | BundleArtifacts/ 203 | Package.StoreAssociation.xml 204 | _pkginfo.txt 205 | *.appx 206 | 207 | # Visual Studio cache files 208 | # files ending in .cache can be ignored 209 | *.[Cc]ache 210 | # but keep track of directories ending in .cache 211 | !*.[Cc]ache/ 212 | 213 | # Others 214 | ClientBin/ 215 | ~$* 216 | *~ 217 | *.dbmdl 218 | *.dbproj.schemaview 219 | *.jfm 220 | *.pfx 221 | *.publishsettings 222 | orleans.codegen.cs 223 | 224 | # Including strong name files can present a security risk 225 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 226 | #*.snk 227 | 228 | # Since there are multiple workflows, uncomment next line to ignore bower_components 229 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 230 | #bower_components/ 231 | 232 | # RIA/Silverlight projects 233 | Generated_Code/ 234 | 235 | # Backup & report files from converting an old project file 236 | # to a newer Visual Studio version. Backup files are not needed, 237 | # because we have git ;-) 238 | _UpgradeReport_Files/ 239 | Backup*/ 240 | UpgradeLog*.XML 241 | UpgradeLog*.htm 242 | ServiceFabricBackup/ 243 | *.rptproj.bak 244 | 245 | # SQL Server files 246 | *.mdf 247 | *.ldf 248 | *.ndf 249 | 250 | # Business Intelligence projects 251 | *.rdl.data 252 | *.bim.layout 253 | *.bim_*.settings 254 | *.rptproj.rsuser 255 | 256 | # Microsoft Fakes 257 | FakesAssemblies/ 258 | 259 | # GhostDoc plugin setting file 260 | *.GhostDoc.xml 261 | 262 | # Node.js Tools for Visual Studio 263 | .ntvs_analysis.dat 264 | node_modules/ 265 | 266 | # Visual Studio 6 build log 267 | *.plg 268 | 269 | # Visual Studio 6 workspace options file 270 | *.opt 271 | 272 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 273 | *.vbw 274 | 275 | # Visual Studio LightSwitch build output 276 | **/*.HTMLClient/GeneratedArtifacts 277 | **/*.DesktopClient/GeneratedArtifacts 278 | **/*.DesktopClient/ModelManifest.xml 279 | **/*.Server/GeneratedArtifacts 280 | **/*.Server/ModelManifest.xml 281 | _Pvt_Extensions 282 | 283 | # Paket dependency manager 284 | .paket/paket.exe 285 | paket-files/ 286 | 287 | # FAKE - F# Make 288 | .fake/ 289 | 290 | # JetBrains Rider 291 | .idea/ 292 | *.sln.iml 293 | 294 | # CodeRush 295 | .cr/ 296 | 297 | # Python Tools for Visual Studio (PTVS) 298 | __pycache__/ 299 | *.pyc 300 | 301 | # Cake - Uncomment if you are using it 302 | # tools/** 303 | # !tools/packages.config 304 | 305 | # Tabs Studio 306 | *.tss 307 | 308 | # Telerik's JustMock configuration file 309 | *.jmconfig 310 | 311 | # BizTalk build output 312 | *.btp.cs 313 | *.btm.cs 314 | *.odx.cs 315 | *.xsd.cs 316 | 317 | # OpenCover UI analysis results 318 | OpenCover/ 319 | 320 | # Azure Stream Analytics local run output 321 | ASALocalRun/ 322 | 323 | # MSBuild Binary and Structured Log 324 | *.binlog 325 | 326 | # NVidia Nsight GPU debugger configuration file 327 | *.nvuser 328 | 329 | # MFractors (Xamarin productivity tool) working folder 330 | .mfractor/ 331 | 332 | # temp files 333 | tmp_* -------------------------------------------------------------------------------- /speech/cfunctions.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | // This file defines the proxy functions required to use callbacks 7 | 8 | // #include 9 | // #include 10 | // extern void recognizerFireEventSessionStarted(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 11 | // 12 | // void cgo_recognizer_session_started(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 13 | // { 14 | // recognizerFireEventSessionStarted(handle, event); 15 | // } 16 | // 17 | // extern void recognizerFireEventSessionStopped(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 18 | // 19 | // void cgo_recognizer_session_stopped(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 20 | // { 21 | // recognizerFireEventSessionStopped(handle, event); 22 | // } 23 | // 24 | // extern void recognizerFireEventSpeechStartDetected(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 25 | // 26 | // void cgo_recognizer_speech_start_detected(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 27 | // { 28 | // recognizerFireEventSpeechStartDetected(handle, event); 29 | // } 30 | // 31 | // extern void recognizerFireEventSpeechEndDetected(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 32 | // 33 | // void cgo_recognizer_speech_end_detected(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 34 | // { 35 | // recognizerFireEventSpeechEndDetected(handle, event); 36 | // } 37 | // 38 | // extern void recognizerFireEventRecognized(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 39 | // 40 | // void cgo_recognizer_recognized(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 41 | // { 42 | // recognizerFireEventRecognized(handle, event); 43 | // } 44 | // 45 | // extern void recognizerFireEventRecognizing(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 46 | // 47 | // void cgo_recognizer_recognizing(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 48 | // { 49 | // recognizerFireEventRecognizing(handle, event); 50 | // } 51 | // 52 | // extern void recognizerFireEventCanceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 53 | // 54 | // void cgo_recognizer_canceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 55 | // { 56 | // recognizerFireEventCanceled(handle, event); 57 | // } 58 | // 59 | // extern void synthesizerFireEventSynthesisStarted(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 60 | // 61 | // void cgo_synthesizer_synthesis_started(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 62 | // { 63 | // synthesizerFireEventSynthesisStarted(handle, event); 64 | // } 65 | // 66 | // extern void synthesizerFireEventSynthesizing(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 67 | // 68 | // void cgo_synthesizer_synthesizing(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 69 | // { 70 | // synthesizerFireEventSynthesizing(handle, event); 71 | // } 72 | // 73 | // extern void synthesizerFireEventSynthesisCompleted(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 74 | // 75 | // void cgo_synthesizer_synthesis_completed(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 76 | // { 77 | // synthesizerFireEventSynthesisCompleted(handle, event); 78 | // } 79 | // 80 | // extern void synthesizerFireEventSynthesisCanceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 81 | // 82 | // void cgo_synthesizer_synthesis_canceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 83 | // { 84 | // synthesizerFireEventSynthesisCanceled(handle, event); 85 | // } 86 | // 87 | // extern void synthesizerFireEventWordBoundary(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 88 | // 89 | // void cgo_synthesizer_word_boundary(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 90 | // { 91 | // synthesizerFireEventWordBoundary(handle, event); 92 | // } 93 | // 94 | // extern void synthesizerFireEventVisemeReceived(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 95 | // 96 | // void cgo_synthesizer_viseme_received(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 97 | // { 98 | // synthesizerFireEventVisemeReceived(handle, event); 99 | // } 100 | // 101 | // extern void synthesizerFireEventBookmarkReached(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 102 | // 103 | // void cgo_synthesizer_bookmark_reached(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 104 | // { 105 | // synthesizerFireEventBookmarkReached(handle, event); 106 | // } 107 | // 108 | // extern void cgoTranslationRecognized(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 109 | // 110 | // void cgo_translation_recognizer_recognized(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 111 | // { 112 | // cgoTranslationRecognized(handle, event); 113 | // } 114 | // 115 | // extern void cgoTranslationRecognizing(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 116 | // 117 | // void cgo_translation_recognizer_recognizing(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 118 | // { 119 | // cgoTranslationRecognizing(handle, event); 120 | // } 121 | // 122 | // extern void cgoTranslationSynthesis(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 123 | // 124 | // void cgo_translation_synthesis(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 125 | // { 126 | // cgoTranslationSynthesis(handle, event); 127 | // } 128 | // 129 | // extern void cgoTranslationCanceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 130 | // 131 | // void cgo_translation_recognizer_canceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 132 | // { 133 | // cgoTranslationCanceled(handle, event); 134 | // } 135 | // 136 | // extern void conversationTranscriberFireEventTranscribing(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 137 | // 138 | // void cgo_conversation_transcriber_transcribing(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 139 | // { 140 | // conversationTranscriberFireEventTranscribing(handle, event); 141 | // } 142 | // 143 | // extern void conversationTranscriberFireEventTranscribed(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 144 | // 145 | // void cgo_conversation_transcriber_transcribed(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 146 | // { 147 | // conversationTranscriberFireEventTranscribed(handle, event); 148 | // } 149 | // 150 | // extern void conversationTranscriberFireEventCanceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 151 | // 152 | // void cgo_conversation_transcriber_canceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 153 | // { 154 | // conversationTranscriberFireEventCanceled(handle, event); 155 | // } 156 | // 157 | // void cgo_conversation_transcriber_session_started(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 158 | // { 159 | // recognizerFireEventSessionStarted(handle, event); 160 | // } 161 | // 162 | // void cgo_conversation_transcriber_session_stopped(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 163 | // { 164 | // recognizerFireEventSessionStopped(handle, event); 165 | // } 166 | // 167 | // void cgo_conversation_transcriber_speech_start_detected(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 168 | // { 169 | // recognizerFireEventSpeechStartDetected(handle, event); 170 | // } 171 | // 172 | // void cgo_conversation_transcriber_speech_end_detected(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 173 | // { 174 | // recognizerFireEventSpeechEndDetected(handle, event); 175 | // } 176 | import "C" 177 | -------------------------------------------------------------------------------- /speech/translation_recognition_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | import "C" 17 | 18 | // TranslationRecognitionResult represents the result of a translation recognition. 19 | type TranslationRecognitionResult struct { 20 | SpeechRecognitionResult 21 | translations map[string]string 22 | } 23 | 24 | // NewTranslationRecognitionResultFromHandle creates a TranslationRecognitionResult from a handle. 25 | func NewTranslationRecognitionResultFromHandle(handle common.SPXHandle) (*TranslationRecognitionResult, error) { 26 | result := new(TranslationRecognitionResult) 27 | result.translations = make(map[string]string) 28 | 29 | // Get base recognition result 30 | baseResult, err := NewSpeechRecognitionResultFromHandle(handle) 31 | if err != nil { 32 | return nil, err 33 | } 34 | result.SpeechRecognitionResult = *baseResult 35 | 36 | // Get translation count 37 | var count C.size_t 38 | ret := uintptr(C.translation_text_result_get_translation_count(uintptr2handle(handle), &count)) 39 | if ret != C.SPX_NOERROR { 40 | return nil, common.NewCarbonError(ret) 41 | } 42 | 43 | // Get translations 44 | for i := C.size_t(0); i < count; i++ { 45 | var languageSize, textSize C.size_t 46 | ret = uintptr(C.translation_text_result_get_translation(uintptr2handle(handle), i, nil, nil, &languageSize, &textSize)) 47 | if ret != C.SPX_NOERROR { 48 | return nil, common.NewCarbonError(ret) 49 | } 50 | 51 | language := make([]byte, languageSize) 52 | text := make([]byte, textSize) 53 | ret = uintptr(C.translation_text_result_get_translation(uintptr2handle(handle), i, 54 | (*C.char)(unsafe.Pointer(&language[0])), 55 | (*C.char)(unsafe.Pointer(&text[0])), 56 | &languageSize, &textSize)) 57 | if ret != C.SPX_NOERROR { 58 | return nil, common.NewCarbonError(ret) 59 | } 60 | 61 | result.translations[string(language[:languageSize-1])] = string(text[:textSize-1]) 62 | } 63 | 64 | return result, nil 65 | } 66 | 67 | // GetTranslations returns all available translations. 68 | func (result TranslationRecognitionResult) GetTranslations() map[string]string { 69 | return result.translations 70 | } 71 | 72 | // GetTranslation returns the translation for the specified language. 73 | func (result TranslationRecognitionResult) GetTranslation(language string) string { 74 | return result.translations[language] 75 | } 76 | 77 | // TranslationSynthesisResult represents the voice output of the translated text. 78 | type TranslationSynthesisResult struct { 79 | Reason common.ResultReason 80 | audioData []byte 81 | } 82 | 83 | // NewTranslationSynthesisResultFromHandle creates a TranslationSynthesisResult from a handle. 84 | func NewTranslationSynthesisResultFromHandle(handle common.SPXHandle) (*TranslationSynthesisResult, error) { 85 | result := new(TranslationSynthesisResult) 86 | 87 | var reason C.Result_Reason 88 | ret := uintptr(C.result_get_reason(uintptr2handle(handle), &reason)) 89 | if ret != C.SPX_NOERROR { 90 | return nil, common.NewCarbonError(ret) 91 | } 92 | result.Reason = common.ResultReason(reason) 93 | 94 | var size C.size_t 95 | ret = uintptr(C.translation_synthesis_result_get_audio_data(uintptr2handle(handle), nil, &size)) 96 | if ret == uintptr(C.SPXERR_BUFFER_TOO_SMALL) { 97 | result.audioData = make([]byte, size) 98 | ret = uintptr(C.translation_synthesis_result_get_audio_data(uintptr2handle(handle), 99 | (*C.uint8_t)(unsafe.Pointer(&result.audioData[0])), &size)) 100 | } 101 | if ret != C.SPX_NOERROR { 102 | return nil, common.NewCarbonError(ret) 103 | } 104 | 105 | return result, nil 106 | } 107 | 108 | // GetAudioData returns the voice output of the translated text. 109 | func (result TranslationSynthesisResult) GetAudioData() []byte { 110 | return result.audioData 111 | } 112 | 113 | // TranslationRecognitionEventArgs represents the event arguments for a translation recognition event. 114 | type TranslationRecognitionEventArgs struct { 115 | RecognitionEventArgs 116 | Result *TranslationRecognitionResult 117 | } 118 | 119 | // NewTranslationRecognitionEventArgsFromHandle creates a TranslationRecognitionEventArgs from a handle. 120 | func NewTranslationRecognitionEventArgsFromHandle(handle common.SPXHandle) (*TranslationRecognitionEventArgs, error) { 121 | base, err := NewRecognitionEventArgsFromHandle(handle) 122 | if err != nil { 123 | return nil, err 124 | } 125 | 126 | event := new(TranslationRecognitionEventArgs) 127 | event.RecognitionEventArgs = *base 128 | event.handle = uintptr2handle(handle) 129 | 130 | var resultHandle C.SPXRESULTHANDLE 131 | ret := uintptr(C.recognizer_recognition_event_get_result(event.handle, &resultHandle)) 132 | if ret != C.SPX_NOERROR { 133 | return nil, common.NewCarbonError(ret) 134 | } 135 | 136 | result, err := NewTranslationRecognitionResultFromHandle(handle2uintptr(resultHandle)) 137 | if err != nil { 138 | return nil, err 139 | } 140 | 141 | event.Result = result 142 | return event, nil 143 | } 144 | 145 | // TranslationRecognitionCanceledEventArgs represents the event arguments for a translation recognition canceled event. 146 | type TranslationRecognitionCanceledEventArgs struct { 147 | TranslationRecognitionEventArgs 148 | ErrorDetails string 149 | Reason common.CancellationReason 150 | ErrorCode common.CancellationErrorCode 151 | } 152 | 153 | // NewTranslationRecognitionCanceledEventArgsFromHandle creates a TranslationRecognitionCanceledEventArgs from a handle. 154 | func NewTranslationRecognitionCanceledEventArgsFromHandle(handle common.SPXHandle) (*TranslationRecognitionCanceledEventArgs, error) { 155 | var reason C.Result_CancellationReason 156 | var errorCode C.Result_CancellationErrorCode 157 | 158 | baseArgs, err := NewTranslationRecognitionEventArgsFromHandle(handle) 159 | if err != nil { 160 | return nil, err 161 | } 162 | event := new(TranslationRecognitionCanceledEventArgs) 163 | event.TranslationRecognitionEventArgs = *baseArgs 164 | 165 | ret := uintptr(C.result_get_reason_canceled(event.Result.handle, &reason)) 166 | if ret != C.SPX_NOERROR { 167 | event.Close() 168 | return nil, common.NewCarbonError(ret) 169 | } 170 | 171 | /* ErrorCode */ 172 | ret = uintptr(C.result_get_canceled_error_code(event.Result.handle, &errorCode)) 173 | if ret != C.SPX_NOERROR { 174 | event.Close() 175 | return nil, common.NewCarbonError(ret) 176 | } 177 | 178 | event.ErrorDetails = event.Result.Properties.GetProperty(common.SpeechServiceResponseJSONErrorDetails, "") 179 | event.ErrorCode = (common.CancellationErrorCode)(errorCode) 180 | event.Reason = (common.CancellationReason)(reason) 181 | 182 | return event, nil 183 | } 184 | 185 | // TranslationSynthesisEventArgs represents the event arguments for a translation synthesis event. 186 | type TranslationSynthesisEventArgs struct { 187 | SessionEventArgs 188 | Result *TranslationSynthesisResult 189 | } 190 | 191 | // NewTranslationSynthesisEventArgsFromHandle creates a TranslationSynthesisEventArgs from a handle. 192 | func NewTranslationSynthesisEventArgsFromHandle(handle common.SPXHandle) (*TranslationSynthesisEventArgs, error) { 193 | var resultHandle C.SPXRESULTHANDLE 194 | ret := uintptr(C.recognizer_recognition_event_get_result(uintptr2handle(handle), &resultHandle)) 195 | if ret != C.SPX_NOERROR { 196 | return nil, common.NewCarbonError(ret) 197 | } 198 | 199 | result, err := NewTranslationSynthesisResultFromHandle(handle2uintptr(resultHandle)) 200 | if err != nil { 201 | return nil, err 202 | } 203 | 204 | return &TranslationSynthesisEventArgs{Result: result}, nil 205 | } 206 | 207 | // Event handler types 208 | type TranslationRecognitionEventHandler func(event TranslationRecognitionEventArgs) 209 | type TranslationRecognitionCanceledEventHandler func(event TranslationRecognitionCanceledEventArgs) 210 | type TranslationSynthesisEventHandler func(event TranslationSynthesisEventArgs) 211 | 212 | // TranslationRecognitionOutcome represents the outcome of a translation recognition operation. 213 | type TranslationRecognitionOutcome struct { 214 | Result *TranslationRecognitionResult 215 | common.OperationOutcome 216 | } 217 | -------------------------------------------------------------------------------- /audio/audio_input_stream.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "sync" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | // 19 | // /* Proxy functions forward declarations */ 20 | // int cgo_audio_read_callback_wrapper(void *context, uint8_t *buffer, uint32_t size); 21 | // void cgo_audio_get_property_callback_wrapper(void* context, int id, uint8_t* value, uint32_t size); 22 | // void cgo_audio_close_callback_wrapper(void *context); 23 | import "C" 24 | 25 | // AudioInputStream represents audio input stream used for custom audio input configurations 26 | type AudioInputStream interface { 27 | Close() 28 | getHandle() C.SPXHANDLE 29 | } 30 | 31 | type audioInputStreamBase struct { 32 | handle C.SPXHANDLE 33 | } 34 | 35 | func (stream audioInputStreamBase) getHandle() C.SPXHANDLE { 36 | return stream.handle 37 | } 38 | 39 | func (stream audioInputStreamBase) Close() { 40 | C.audio_stream_release(stream.handle) 41 | } 42 | 43 | // PushAudioInputStream represents memory backed push audio input stream used for custom audio input configurations. 44 | type PushAudioInputStream struct { 45 | audioInputStreamBase 46 | } 47 | 48 | // CreatePushAudioInputStreamFromFormat creates a memory backed PushAudioInputStream with the specified audio format. 49 | // Currently, only WAV / PCM with 16-bit samples, 16 kHz sample rate, and a single channel (Mono) is supported. When used 50 | // with Conversation Transcription, eight channels are supported. 51 | func CreatePushAudioInputStreamFromFormat(format *AudioStreamFormat) (*PushAudioInputStream, error) { 52 | var handle C.SPXHANDLE 53 | ret := uintptr(C.audio_stream_create_push_audio_input_stream(&handle, format.handle)) 54 | if ret != C.SPX_NOERROR { 55 | return nil, common.NewCarbonError(ret) 56 | } 57 | stream := new(PushAudioInputStream) 58 | stream.handle = handle 59 | return stream, nil 60 | } 61 | 62 | // CreatePushAudioInputStream creates a memory backed PushAudioInputStream using the default format (16 kHz, 16 bit, mono PCM). 63 | func CreatePushAudioInputStream() (*PushAudioInputStream, error) { 64 | format, err := GetDefaultInputFormat() 65 | if err != nil { 66 | return nil, err 67 | } 68 | return CreatePushAudioInputStreamFromFormat(format) 69 | } 70 | 71 | // Write writes the audio data specified by making an internal copy of the data. 72 | // Note: The dataBuffer should not contain any audio header. 73 | func (stream PushAudioInputStream) Write(buffer []byte) error { 74 | size := uint(len(buffer)) 75 | cBuffer := C.CBytes(buffer) 76 | defer C.free(cBuffer) 77 | ret := uintptr(C.push_audio_input_stream_write(stream.handle, (*C.uint8_t)(cBuffer), (C.uint32_t)(size))) 78 | if ret != C.SPX_NOERROR { 79 | return common.NewCarbonError(ret) 80 | } 81 | return nil 82 | } 83 | 84 | // SetProperty sets value of a property. The properties of the audio data should be set before writing the audio data. 85 | func (stream PushAudioInputStream) SetProperty(id common.PropertyID, value string) error { 86 | v := C.CString(value) 87 | defer C.free(unsafe.Pointer(v)) 88 | ret := uintptr(C.push_audio_input_stream_set_property_by_id(stream.handle, (C.int)(id), v)) 89 | if ret != C.SPX_NOERROR { 90 | return common.NewCarbonError(ret) 91 | } 92 | return nil 93 | } 94 | 95 | // SetPropertyByName sets value of a property. The properties of the audio data should be set before writing the audio data. 96 | func (stream PushAudioInputStream) SetPropertyByName(name string, value string) error { 97 | n := C.CString(name) 98 | defer C.free(unsafe.Pointer(n)) 99 | v := C.CString(value) 100 | defer C.free(unsafe.Pointer(v)) 101 | ret := uintptr(C.push_audio_input_stream_set_property_by_name(stream.handle, n, v)) 102 | if ret != C.SPX_NOERROR { 103 | return common.NewCarbonError(ret) 104 | } 105 | return nil 106 | } 107 | 108 | // CloseStream closes the stream. 109 | func (stream PushAudioInputStream) CloseStream() { 110 | C.push_audio_input_stream_close(stream.handle) 111 | } 112 | 113 | // PullAudioInputStream represents audio input stream used for custom audio input configurations. 114 | type PullAudioInputStream struct { 115 | audioInputStreamBase 116 | } 117 | 118 | // PullAudioInputStreamCallback interface that defines callback methods (Read(), GetProperty() and CloseStream()) for custom 119 | // audio input streams). 120 | type PullAudioInputStreamCallback interface { 121 | Read(maxSize uint32) ([]byte, int) 122 | GetProperty(id common.PropertyID) string 123 | CloseStream() 124 | } 125 | 126 | var mu sync.Mutex 127 | var pullStreamCallbacks = make(map[C.SPXHANDLE]PullAudioInputStreamCallback) 128 | 129 | func registerCallback(handle C.SPXHANDLE, callback PullAudioInputStreamCallback) { 130 | mu.Lock() 131 | defer mu.Unlock() 132 | pullStreamCallbacks[handle] = callback 133 | } 134 | 135 | func getCallback(handle C.SPXHANDLE) *PullAudioInputStreamCallback { 136 | mu.Lock() 137 | defer mu.Unlock() 138 | cb, ok := pullStreamCallbacks[handle] 139 | if ok { 140 | return &cb 141 | } 142 | return nil 143 | } 144 | 145 | //nolint:deadcode 146 | func deregisterCallback(handle C.SPXHANDLE) { 147 | mu.Lock() 148 | defer mu.Unlock() 149 | pullStreamCallbacks[handle] = nil 150 | } 151 | 152 | //export cgoAudioCallReadCallback 153 | func cgoAudioCallReadCallback(handle C.SPXRECOHANDLE, dataBuffer *C.uint8_t, size C.uint32_t) int { 154 | callback := getCallback(handle) 155 | if callback != nil { 156 | goBuffer, readSize := (*callback).Read(uint32(size)) 157 | buffer := C.CBytes(goBuffer) 158 | defer C.free(buffer) 159 | C.memcpy(unsafe.Pointer(dataBuffer), buffer, (C.size_t)(readSize)) 160 | return readSize 161 | } 162 | return 0 163 | } 164 | 165 | //export cgoAudioCallGetPropertyCallback 166 | func cgoAudioCallGetPropertyCallback(handle C.SPXHANDLE, id int, value *C.uint8_t, size C.uint32_t) { 167 | callback := getCallback(handle) 168 | if callback != nil { 169 | propValue := (*callback).GetProperty((common.PropertyID)(id)) 170 | buffer := C.CString(propValue) 171 | defer C.free(unsafe.Pointer(buffer)) 172 | s := size 173 | if uintptr(len(propValue)) < uintptr(size) { 174 | s = (C.uint32_t)(len(propValue)) 175 | } 176 | C.memcpy(unsafe.Pointer(value), unsafe.Pointer(buffer), (C.size_t)(s)) 177 | } 178 | } 179 | 180 | //export cgoAudioCallCloseCallback 181 | func cgoAudioCallCloseCallback(handle C.SPXHANDLE) { 182 | callback := getCallback(handle) 183 | if callback != nil { 184 | (*callback).CloseStream() 185 | } 186 | } 187 | 188 | // CreatePullStreamFromFormat creates a PullAudioInputStream that delegates to the specified callback interface for Read() 189 | // and CloseStream() methods and the specified format. 190 | // Currently, only WAV / PCM with 16-bit samples, 16 kHz sample rate, and a single channel (Mono) is supported. When used with Conversation Transcription, eight channels are supported. 191 | func CreatePullStreamFromFormat(callback PullAudioInputStreamCallback, format *AudioStreamFormat) (*PullAudioInputStream, error) { 192 | var handle C.SPXHANDLE 193 | ret := uintptr(C.audio_stream_create_pull_audio_input_stream(&handle, format.handle)) 194 | if ret != C.SPX_NOERROR { 195 | return nil, common.NewCarbonError(ret) 196 | } 197 | ret = uintptr(C.pull_audio_input_stream_set_callbacks( 198 | handle, 199 | unsafe.Pointer(handle), 200 | (C.CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK)(unsafe.Pointer(C.cgo_audio_read_callback_wrapper)), 201 | (C.CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK)(unsafe.Pointer(C.cgo_audio_close_callback_wrapper)))) 202 | if ret != C.SPX_NOERROR { 203 | return nil, common.NewCarbonError(ret) 204 | } 205 | ret = uintptr(C.pull_audio_input_stream_set_getproperty_callback( 206 | handle, 207 | unsafe.Pointer(handle), 208 | (C.CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK)(unsafe.Pointer(C.cgo_audio_get_property_callback_wrapper)))) 209 | if ret != C.SPX_NOERROR { 210 | return nil, common.NewCarbonError(ret) 211 | } 212 | registerCallback(handle, callback) 213 | stream := new(PullAudioInputStream) 214 | stream.handle = handle 215 | return stream, nil 216 | } 217 | 218 | // CreatePullStream creates a PullAudioInputStream that delegates to the specified callback interface for Read() and CloseStream() 219 | // methods using the default format (16 kHz, 16 bit, mono PCM). 220 | func CreatePullStream(callback PullAudioInputStreamCallback) (*PullAudioInputStream, error) { 221 | format, err := GetDefaultInputFormat() 222 | if err != nil { 223 | return nil, err 224 | } 225 | return CreatePullStreamFromFormat(callback, format) 226 | } 227 | --------------------------------------------------------------------------------