├── .github └── workflows │ ├── go.yml │ └── lint.yml ├── .gitignore ├── .golangci.yaml ├── .vscode └── settings.json ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── audio ├── audio_config.go ├── audio_input_stream.go ├── audio_output_stream.go ├── audio_stream_container_format.go ├── audio_stream_format.go ├── cfunctions.go ├── doc.go ├── interop_utils.go └── interop_utils_test.go ├── ci └── azure-pipelines.yml ├── common ├── cancellation_error_code.go ├── cancellation_error_code_string.go ├── cancellation_reason.go ├── cancellation_reason_string.go ├── doc.go ├── error.go ├── interop_utils.go ├── operation_outcome.go ├── output_format.go ├── profanity_option.go ├── property_collection.go ├── property_id.go ├── result_reason.go ├── result_reason_string.go ├── service_property_channel.go ├── speech_synthesis_boundary_type.go ├── speech_synthesis_output_format.go ├── stream_status.go ├── stream_status_string.go ├── synthesis_voice_gender.go ├── synthesis_voice_type.go └── voice_profile_type.go ├── diagnostics ├── diagnostics.go └── error.go ├── dialog ├── activity_received_event_args.go ├── callback_helpers.go ├── cfunctions.go ├── dialog_service_config.go ├── dialog_service_connector.go ├── dialog_service_connector_test.go ├── doc.go ├── interop_utils.go └── interop_utils_test.go ├── go.mod ├── samples ├── .gitignore ├── dialog_service_connector │ ├── doc.go │ ├── from_push_audio_stream.go │ ├── listen_once.go │ └── start_keyword_listening.go ├── go.mod ├── helpers │ └── stream_utils.go ├── main.go ├── recognizer │ ├── doc.go │ ├── from_file.go │ ├── from_microphone.go │ └── wrapper.go ├── speaker_recognition │ ├── doc.go │ ├── independent_identification.go │ └── independent_verification.go └── synthesizer │ ├── doc.go │ ├── to_audio_data_stream.go │ └── to_speaker.go ├── speaker ├── doc.go ├── interop_utils.go ├── interop_utils_test.go ├── speaker_identification_model.go ├── speaker_recognition_result.go ├── speaker_recognizer.go ├── speaker_verification_model.go ├── voice_profile.go ├── voice_profile_cancellation_details.go ├── voice_profile_client.go ├── voice_profile_client_test.go ├── voice_profile_enrollment_result.go ├── voice_profile_phrase_result.go ├── voice_profile_result.go └── voice_profile_test.go ├── speech ├── audio_data_stream.go ├── auto_detect_source_language_config.go ├── callback_helpers.go ├── cancellation_details.go ├── cfunctions.go ├── conversation_callback_helpers.go ├── conversation_transcriber.go ├── conversation_transcriber_test.go ├── conversation_transcription_event_args.go ├── conversation_transcription_result.go ├── doc.go ├── interop_utils.go ├── interop_utils_test.go ├── keyword_recognition_model.go ├── recognition_event_args.go ├── session_event_args.go ├── source_language_config.go ├── speech_config.go ├── speech_config_test.go ├── speech_recognition_canceled_event_args.go ├── speech_recognition_event_args.go ├── speech_recognition_result.go ├── speech_recognizer.go ├── speech_recognizer_test.go ├── speech_synthesis_bookmark_event_args.go ├── speech_synthesis_event_args.go ├── speech_synthesis_result.go ├── speech_synthesis_viseme_event_args.go ├── speech_synthesis_word_boundary_event_args.go ├── speech_synthesizer.go ├── speech_synthesizer_test.go ├── speech_translation_config.go ├── synthesis_voices_result.go ├── translation_callback_helpers.go ├── translation_recognition_result.go ├── translation_recognizer.go ├── translation_recognizer_test.go └── voice_info.go └── test_files ├── TalkForAFewSeconds16.wav ├── katiesteve_mono.wav ├── kws.table ├── myVoiceIsMyPassportVerifyMe01.wav ├── peloozoid.wav ├── turn_on_the_lamp.wav └── whats_the_weather_like.wav /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - main 7 | workflow_dispatch: 8 | 9 | env: 10 | CARBON_VERSION: "1.43.0" 11 | 12 | jobs: 13 | linux: 14 | name: Build (Linux) 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: [ubuntu-20.04, ubuntu-22.04] 19 | go: [1.13, 1.17, 1.18] 20 | steps: 21 | - name: Set up Go ${{ matrix.go }} 22 | uses: actions/setup-go@v3 23 | with: 24 | go-version: ${{ matrix.go }} 25 | id: go 26 | - name: Check out code into the Go module directory 27 | uses: actions/checkout@v3 28 | 29 | - name: Get dependencies 30 | run: | 31 | go get -v -t -d ./... 32 | if [ -f Gopkg.toml ]; then 33 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 34 | dep ensure 35 | fi 36 | 37 | - name: Get Speech SDK (Linux) 38 | run: | 39 | mkdir $HOME/carbon 40 | pushd $HOME/carbon 41 | wget https://csspeechstorage.blob.core.windows.net/drop/$CARBON_VERSION/SpeechSDK-Linux-$CARBON_VERSION.tar.gz 42 | tar xzf SpeechSDK-Linux-$CARBON_VERSION.tar.gz 43 | rm SpeechSDK-Linux-$CARBON_VERSION.tar.gz 44 | ln -s SpeechSDK-Linux-$CARBON_VERSION current 45 | popd 46 | - name: Get Carbon Dependencies 47 | run: | 48 | sudo apt-get update 49 | sudo apt-get install libasound2 50 | sudo apt-get install -y libgstreamer1.0-0 gstreamer1.0-plugins-good 51 | dpkg -l | grep gstreamer 52 | - name: Build 53 | run: | 54 | export CGO_CFLAGS="-I$HOME/carbon/current/include/c_api" 55 | export CGO_LDFLAGS="-L$HOME/carbon/current/lib/x64 -lMicrosoft.CognitiveServices.Speech.core" 56 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HOME/carbon/current/lib/x64" 57 | go build -v ./... 58 | - name: Test 59 | env: 60 | DIALOG_SUBSCRIPTION_KEY: ${{ secrets.DIALOG_SUBSCRIPTION_KEY }} 61 | DIALOG_SUBSCRIPTION_REGION: ${{ secrets.DIALOG_SUBSCRIPTION_REGION }} 62 | SPEECH_SUBSCRIPTION_KEY: ${{ secrets.SR_SUBSCRIPTION_KEY }} 63 | SPEECH_SUBSCRIPTION_REGION: ${{ secrets.SR_SUBSCRIPTION_REGION }} 64 | SPEAKER_RECOGNITION_SUBSCRIPTION_KEY: ${{ secrets.SPEAKER_SUBSCRIPTION_KEY }} 65 | SPEAKER_RECOGNITION_SUBSCRIPTION_REGION: ${{ secrets.SPEAKER_SUBSCRIPTION_REGION }} 66 | run: | 67 | export CGO_CFLAGS="-I$HOME/carbon/current/include/c_api" 68 | export CGO_LDFLAGS="-L$HOME/carbon/current/lib/x64 -lMicrosoft.CognitiveServices.Speech.core" 69 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HOME/carbon/current/lib/x64" 70 | go test -v ./... 71 | - name: 'Upload Test Logs' 72 | uses: actions/upload-artifact@v4 73 | if: always() 74 | with: 75 | name: Log-${{ matrix.os }}-${{ matrix.go }} 76 | path: '**/*.log' 77 | retention-days: 3 78 | 79 | macos: 80 | name: Build (macOS) 81 | runs-on: ${{ matrix.os }} 82 | strategy: 83 | matrix: 84 | os: [macos-10.15, macos-11.0] 85 | steps: 86 | - name: Install modern bash for macOS 87 | run: | 88 | export HOMEBREW_NO_INSTALL_CLEANUP=1 89 | brew config && brew install bash 90 | - name: Check out code into the Go module directory 91 | uses: actions/checkout@v3 92 | 93 | - name: Get dependencies 94 | run: | 95 | go get -v -t -d ./... 96 | if [ -f Gopkg.toml ]; then 97 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 98 | dep ensure 99 | fi 100 | 101 | - name: Get Speech SDK (macOS) 102 | run: | 103 | cd ~ 104 | wget https://csspeechstorage.blob.core.windows.net/drop/$CARBON_VERSION/MicrosoftCognitiveServicesSpeech-XCFramework-$CARBON_VERSION.zip -O SpeechSDK.zip 105 | unzip SpeechSDK.zip -d speechsdk 106 | - name: Build 107 | run: | 108 | export SDK_HOME="$HOME/speechsdk/MicrosoftCognitiveServicesSpeech.xcframework/macos-arm64_x86_64" 109 | export CGO_CFLAGS="-I$SDK_HOME/MicrosoftCognitiveServicesSpeech.framework/Headers" 110 | export CGO_LDFLAGS="-F$SDK_HOME -framework MicrosoftCognitiveServicesSpeech" 111 | export DYLD_FRAMEWORK_PATH="$DYLD_FRAMEWORK_PATH:$SDK_HOME" 112 | go build -v ./... 113 | - name: Test 114 | env: 115 | DIALOG_SUBSCRIPTION_KEY: ${{ secrets.DIALOG_SUBSCRIPTION_KEY }} 116 | DIALOG_SUBSCRIPTION_REGION: ${{ secrets.DIALOG_SUBSCRIPTION_REGION }} 117 | SPEECH_SUBSCRIPTION_KEY: ${{ secrets.SR_SUBSCRIPTION_KEY }} 118 | SPEECH_SUBSCRIPTION_REGION: ${{ secrets.SR_SUBSCRIPTION_REGION }} 119 | SPEAKER_RECOGNITION_SUBSCRIPTION_KEY: ${{ secrets.SPEAKER_SUBSCRIPTION_KEY }} 120 | SPEAKER_RECOGNITION_SUBSCRIPTION_REGION: ${{ secrets.SPEAKER_SUBSCRIPTION_REGION }} 121 | run: | 122 | export SDK_HOME="$HOME/speechsdk/MicrosoftCognitiveServicesSpeech.xcframework/macos-arm64_x86_64" 123 | export CGO_CFLAGS="-I$SDK_HOME/MicrosoftCognitiveServicesSpeech.framework/Headers" 124 | export CGO_LDFLAGS="-F$SDK_HOME -framework MicrosoftCognitiveServicesSpeech" 125 | export DYLD_FRAMEWORK_PATH="$DYLD_FRAMEWORK_PATH:$SDK_HOME" 126 | go test -v ./... 127 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint code 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | branches: 7 | - master 8 | - main 9 | pull_request: 10 | env: 11 | CARBON_VERSION: "1.42.0" 12 | jobs: 13 | golangci: 14 | name: lint 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v2 18 | - name: Get Carbon 19 | run: | 20 | mkdir /tmp/carbon 21 | pushd /tmp/carbon 22 | wget https://csspeechstorage.blob.core.windows.net/drop/$CARBON_VERSION/SpeechSDK-Linux-$CARBON_VERSION.tar.gz 23 | tar xzf SpeechSDK-Linux-$CARBON_VERSION.tar.gz 24 | rm SpeechSDK-Linux-$CARBON_VERSION.tar.gz 25 | ln -s SpeechSDK-Linux-$CARBON_VERSION current 26 | popd 27 | - name: golangci-lint 28 | uses: golangci/golangci-lint-action@v5 29 | env: 30 | CGO_CFLAGS: "-I/tmp/carbon/current/include/c_api" 31 | CGO_LDFLAGS: "-L/tmp/carbon/current/lib/x64 -lMicrosoft.CognitiveServices.Speech.core" 32 | -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | run: 2 | tests: false 3 | 4 | linters: 5 | enable: 6 | - revive 7 | 8 | issues: 9 | exclude-rules: 10 | - linters: 11 | - govet 12 | text: "OperationOutcome" 13 | - linters: 14 | - golint 15 | - revive 16 | text: "type name will be used" 17 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "NOERROR", 4 | "PSESSION", 5 | "PSYNTHESIS", 6 | "SPXASYNCHANDLE", 7 | "SPXERR", 8 | "SPXEVENTHANDLE", 9 | "SPXHANDLE", 10 | "SPXRECOHANDLE", 11 | "SPXRESULTHANDLE", 12 | "SPXSYNTHHANDLE", 13 | "SSML", 14 | "Visemek", 15 | "golangci", 16 | "golint", 17 | "govet", 18 | "unstaged", 19 | "untracked", 20 | "webm" 21 | ] 22 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | This project contains Golang binding for the Microsoft Cognitive Service Speech SDK. 4 | 5 | # Getting Started 6 | 7 | Check the [Speech SDK Setup documentation for Go](https://docs.microsoft.com/azure/cognitive-services/speech-service/quickstarts/setup-platform?tabs=dotnet%2Cwindows%2Cjre%2Cbrowser&pivots=programming-language-go) 8 | 9 | Get started with [speech-to-text sample for Go](https://docs.microsoft.com/azure/cognitive-services/speech-service/get-started-speech-to-text?tabs=windowsinstall&pivots=programming-language-go) 10 | 11 | Get started with [text-to-speech sample for Go](https://docs.microsoft.com/azure/cognitive-services/speech-service/get-started-text-to-speech?tabs=script%2Cwindowsinstall&pivots=programming-language-go) 12 | 13 | This project requires Go 1.13 14 | 15 | # Reference 16 | 17 | Reference documentation for these packages is available at http://aka.ms/csspeech/goref 18 | 19 | # Contributing 20 | 21 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 22 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 23 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 24 | 25 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 26 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 27 | provided by the bot. You will only need to do this once across all repos using our CLA. 28 | 29 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 30 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 31 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 32 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets Microsoft's [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)) of a security vulnerability, please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /audio/audio_config.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | import "C" 17 | 18 | // AudioConfig represents specific audio configuration, such as microphone, file, or custom audio streams. 19 | type AudioConfig struct { 20 | handle C.SPXHANDLE 21 | properties *common.PropertyCollection 22 | } 23 | 24 | // GetHandle gets the handle to the resource (for internal use) 25 | func (config AudioConfig) GetHandle() common.SPXHandle { 26 | return handle2uintptr(config.handle) 27 | } 28 | 29 | // Close releases the underlying resources 30 | func (config AudioConfig) Close() { 31 | config.properties.Close() 32 | C.audio_config_release(config.handle) 33 | } 34 | 35 | func newAudioConfigFromHandle(handle C.SPXHANDLE) (*AudioConfig, error) { 36 | var propBagHandle C.SPXPROPERTYBAGHANDLE 37 | ret := uintptr(C.audio_config_get_property_bag(handle, &propBagHandle)) 38 | if ret != C.SPX_NOERROR { 39 | return nil, common.NewCarbonError(ret) 40 | } 41 | config := new(AudioConfig) 42 | config.handle = handle 43 | config.properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 44 | return config, nil 45 | } 46 | 47 | // NewAudioConfigFromDefaultMicrophoneInput creates an AudioConfig object representing the default microphone on the system. 48 | func NewAudioConfigFromDefaultMicrophoneInput() (*AudioConfig, error) { 49 | var handle C.SPXHANDLE 50 | ret := uintptr(C.audio_config_create_audio_input_from_default_microphone(&handle)) 51 | if ret != C.SPX_NOERROR { 52 | return nil, common.NewCarbonError(ret) 53 | } 54 | return newAudioConfigFromHandle(handle) 55 | } 56 | 57 | // NewAudioConfigFromMicrophoneInput creates an AudioConfig object representing a specific microphone on the system. 58 | func NewAudioConfigFromMicrophoneInput(deviceName string) (*AudioConfig, error) { 59 | var handle C.SPXHANDLE 60 | dn := C.CString(deviceName) 61 | defer C.free(unsafe.Pointer(dn)) 62 | ret := uintptr(C.audio_config_create_audio_input_from_a_microphone(&handle, dn)) 63 | if ret != C.SPX_NOERROR { 64 | return nil, common.NewCarbonError(ret) 65 | } 66 | return newAudioConfigFromHandle(handle) 67 | } 68 | 69 | // NewAudioConfigFromWavFileInput creates an AudioConfig object representing the specified file. 70 | func NewAudioConfigFromWavFileInput(filename string) (*AudioConfig, error) { 71 | var handle C.SPXHANDLE 72 | fn := C.CString(filename) 73 | defer C.free(unsafe.Pointer(fn)) 74 | ret := uintptr(C.audio_config_create_audio_input_from_wav_file_name(&handle, fn)) 75 | if ret != C.SPX_NOERROR { 76 | return nil, common.NewCarbonError(ret) 77 | } 78 | return newAudioConfigFromHandle(handle) 79 | } 80 | 81 | // NewAudioConfigFromStreamInput creates an AudioConfig object representing the specified stream. 82 | func NewAudioConfigFromStreamInput(stream AudioInputStream) (*AudioConfig, error) { 83 | var handle C.SPXHANDLE 84 | ret := uintptr(C.audio_config_create_audio_input_from_stream(&handle, stream.getHandle())) 85 | if ret != C.SPX_NOERROR { 86 | return nil, common.NewCarbonError(ret) 87 | } 88 | return newAudioConfigFromHandle(handle) 89 | } 90 | 91 | // NewAudioConfigFromDefaultSpeakerOutput creates an AudioConfig object representing the default audio output device 92 | // (speaker) on the system. 93 | func NewAudioConfigFromDefaultSpeakerOutput() (*AudioConfig, error) { 94 | var handle C.SPXHANDLE 95 | ret := uintptr(C.audio_config_create_audio_output_from_default_speaker(&handle)) 96 | if ret != C.SPX_NOERROR { 97 | return nil, common.NewCarbonError(ret) 98 | } 99 | return newAudioConfigFromHandle(handle) 100 | } 101 | 102 | // NewAudioConfigFromSpeakerOutput creates an AudioConfig object representing the specific audio output device 103 | // (speaker) on the system. 104 | func NewAudioConfigFromSpeakerOutput(deviceName string) (*AudioConfig, error) { 105 | var handle C.SPXHANDLE 106 | dn := C.CString(deviceName) 107 | defer C.free(unsafe.Pointer(dn)) 108 | ret := uintptr(C.audio_config_create_audio_output_from_a_speaker(&handle, dn)) 109 | if ret != C.SPX_NOERROR { 110 | return nil, common.NewCarbonError(ret) 111 | } 112 | return newAudioConfigFromHandle(handle) 113 | } 114 | 115 | // NewAudioConfigFromWavFileOutput creates an AudioConfig object representing the specified file for audio output. 116 | func NewAudioConfigFromWavFileOutput(filename string) (*AudioConfig, error) { 117 | var handle C.SPXHANDLE 118 | fn := C.CString(filename) 119 | defer C.free(unsafe.Pointer(fn)) 120 | ret := uintptr(C.audio_config_create_audio_output_from_wav_file_name(&handle, fn)) 121 | if ret != C.SPX_NOERROR { 122 | return nil, common.NewCarbonError(ret) 123 | } 124 | return newAudioConfigFromHandle(handle) 125 | } 126 | 127 | // NewAudioConfigFromStreamOutput creates an AudioConfig object representing the specified output stream. 128 | func NewAudioConfigFromStreamOutput(stream AudioOutputStream) (*AudioConfig, error) { 129 | var handle C.SPXHANDLE 130 | streamHandle := stream.getHandle() 131 | ret := uintptr(C.audio_config_create_audio_output_from_stream(&handle, streamHandle)) 132 | if ret != C.SPX_NOERROR { 133 | return nil, common.NewCarbonError(ret) 134 | } 135 | return newAudioConfigFromHandle(handle) 136 | } 137 | 138 | // SetProperty sets a property value by ID. 139 | func (config AudioConfig) SetProperty(id common.PropertyID, value string) error { 140 | return config.properties.SetProperty(id, value) 141 | } 142 | 143 | // GetProperty gets a property value by ID. 144 | func (config AudioConfig) GetProperty(id common.PropertyID) string { 145 | return config.properties.GetProperty(id, "") 146 | } 147 | 148 | // SetPropertyByString sets a property value by name. 149 | func (config AudioConfig) SetPropertyByString(name string, value string) error { 150 | return config.properties.SetPropertyByString(name, value) 151 | } 152 | 153 | // GetPropertyByString gets a property value by name. 154 | func (config AudioConfig) GetPropertyByString(name string) string { 155 | return config.properties.GetPropertyByString(name, "") 156 | } 157 | -------------------------------------------------------------------------------- /audio/audio_output_stream.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // 18 | // /* Proxy functions forward declarations */ 19 | // int cgo_audio_push_stream_write_callback_wrapper(void *context, uint8_t* buffer, uint32_t size); 20 | // void cgo_audio_push_stream_close_callback_wrapper(void *context); 21 | import "C" 22 | 23 | // AudioOutputStream represents audio output stream used for custom audio output configurations. 24 | // Updated in version 1.7.0 25 | type AudioOutputStream interface { 26 | Close() 27 | getHandle() C.SPXHANDLE 28 | } 29 | 30 | type audioOutputStreamBase struct { 31 | handle C.SPXHANDLE 32 | } 33 | 34 | func (stream *audioOutputStreamBase) getHandle() C.SPXHANDLE { 35 | return stream.handle 36 | } 37 | 38 | func (stream *audioOutputStreamBase) Close() { 39 | C.audio_stream_release(stream.handle) 40 | } 41 | 42 | // PullAudioOutputStream represents memory backed pull audio output stream used for custom audio output configurations. 43 | type PullAudioOutputStream struct { 44 | audioOutputStreamBase 45 | } 46 | 47 | // NewPullAudioOutputStreamFromHandle creates a new PullAudioOutputStream from a handle (for internal use) 48 | func NewPullAudioOutputStreamFromHandle(handle common.SPXHandle) *PullAudioOutputStream { 49 | stream := new(PullAudioOutputStream) 50 | stream.handle = uintptr2handle(handle) 51 | return stream 52 | } 53 | 54 | // CreatePullAudioOutputStream creates a memory backed PullAudioOutputStream. 55 | func CreatePullAudioOutputStream() (*PullAudioOutputStream, error) { 56 | var handle C.SPXHANDLE 57 | ret := uintptr(C.audio_stream_create_pull_audio_output_stream(&handle)) 58 | if ret != C.SPX_NOERROR { 59 | return nil, common.NewCarbonError(ret) 60 | } 61 | return NewPullAudioOutputStreamFromHandle(handle2uintptr(handle)), nil 62 | } 63 | 64 | // Read reads audio from the stream. 65 | // The maximal number of bytes to be read is determined from the size parameter. 66 | // If there is no data immediately available, read() blocks until the next data becomes available. 67 | func (stream PullAudioOutputStream) Read(size uint) ([]byte, error) { 68 | cBuffer := C.malloc(C.sizeof_char * (C.size_t)(size)) 69 | defer C.free(unsafe.Pointer(cBuffer)) 70 | var outSize C.uint32_t 71 | ret := uintptr(C.pull_audio_output_stream_read(stream.handle, (*C.uint8_t)(cBuffer), (C.uint32_t)(size), &outSize)) 72 | if ret != C.SPX_NOERROR { 73 | return nil, common.NewCarbonError(ret) 74 | } 75 | buffer := C.GoBytes(cBuffer, (C.int)(outSize)) 76 | return buffer, nil 77 | } 78 | 79 | // PushAudioOutputStream represents audio output stream used for custom audio output configurations. 80 | type PushAudioOutputStream struct { 81 | audioOutputStreamBase 82 | } 83 | 84 | // PushAudioOutputStreamCallback an interface that defines callback methods (Write() and CloseStream()) for custom audio output 85 | // streams). 86 | type PushAudioOutputStreamCallback interface { 87 | Write(buffer []byte) int 88 | CloseStream() 89 | } 90 | 91 | var pushStreamCallbacks = make(map[C.SPXHANDLE]PushAudioOutputStreamCallback) 92 | 93 | func registerPushStreamCallback(handle C.SPXHANDLE, callback PushAudioOutputStreamCallback) { 94 | mu.Lock() 95 | defer mu.Unlock() 96 | pushStreamCallbacks[handle] = callback 97 | } 98 | 99 | func getPushStreamCallback(handle C.SPXHANDLE) *PushAudioOutputStreamCallback { 100 | mu.Lock() 101 | defer mu.Unlock() 102 | cb, ok := pushStreamCallbacks[handle] 103 | if ok { 104 | return &cb 105 | } 106 | return nil 107 | } 108 | 109 | //nolint:deadcode 110 | func deregisterPushStreamCallback(handle C.SPXHANDLE) { 111 | mu.Lock() 112 | defer mu.Unlock() 113 | pushStreamCallbacks[handle] = nil 114 | } 115 | 116 | //export cgoAudioOutputCallWriteCallback 117 | func cgoAudioOutputCallWriteCallback(handle C.SPXHANDLE, buffer *C.uint8_t, size C.uint32_t) int { 118 | callback := getPushStreamCallback(handle) 119 | if callback != nil { 120 | goBuffer := C.GoBytes(unsafe.Pointer(buffer), (C.int)(size)) 121 | return (*callback).Write(goBuffer) 122 | } 123 | return 0 124 | } 125 | 126 | //export cgoAudioOutputCallCloseCallback 127 | func cgoAudioOutputCallCloseCallback(handle C.SPXHANDLE) { 128 | callback := getPushStreamCallback(handle) 129 | if callback != nil { 130 | (*callback).CloseStream() 131 | } 132 | } 133 | 134 | // CreatePushAudioOutputStream creates a PushAudioOutputStream that delegates to the specified callback interface for Write() 135 | // and CloseStream() methods. 136 | func CreatePushAudioOutputStream(callback PushAudioOutputStreamCallback) (*PushAudioOutputStream, error) { 137 | var handle C.SPXHANDLE 138 | ret := uintptr(C.audio_stream_create_push_audio_output_stream(&handle)) 139 | if ret != C.SPX_NOERROR { 140 | return nil, common.NewCarbonError(ret) 141 | } 142 | ret = uintptr(C.push_audio_output_stream_set_callbacks( 143 | handle, 144 | unsafe.Pointer(handle), 145 | (C.CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK)(unsafe.Pointer(C.cgo_audio_push_stream_write_callback_wrapper)), 146 | (C.CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK)(unsafe.Pointer(C.cgo_audio_push_stream_close_callback_wrapper)))) 147 | if ret != C.SPX_NOERROR { 148 | return nil, common.NewCarbonError(ret) 149 | } 150 | registerPushStreamCallback(handle, callback) 151 | stream := new(PushAudioOutputStream) 152 | stream.handle = handle 153 | return stream, nil 154 | } 155 | -------------------------------------------------------------------------------- /audio/audio_stream_container_format.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | // AudioStreamContainerFormat defines supported audio stream container format. 7 | type AudioStreamContainerFormat int //nolint:revive 8 | 9 | const ( 10 | // OGGOPUS Stream ContainerFormat definition for OGG OPUS. 11 | OGGOPUS AudioStreamContainerFormat = 0x101 12 | 13 | // MP3 Stream ContainerFormat definition for MP3. 14 | MP3 AudioStreamContainerFormat = 0x102 15 | 16 | // FLAC Stream ContainerFormat definition for FLAC. 17 | FLAC AudioStreamContainerFormat = 0x103 18 | 19 | // ALAW Stream ContainerFormat definition for ALAW. 20 | ALAW AudioStreamContainerFormat = 0x104 21 | 22 | // MULAW Stream ContainerFormat definition for MULAW. 23 | MULAW AudioStreamContainerFormat = 0x105 24 | 25 | // AMRNB Stream ContainerFormat definition for AMRNB. Currently not supported. 26 | AMRNB AudioStreamContainerFormat = 0x106 27 | 28 | // AMRWB Stream ContainerFormat definition for AMRWB. Currently not supported. 29 | AMRWB AudioStreamContainerFormat = 0x107 30 | 31 | // ANY Stream ContainerFormat definition when the actual stream format is not known. 32 | ANY AudioStreamContainerFormat = 0x108 33 | ) 34 | 35 | // AudioStreamWaveFormat represents the format specified inside WAV container which are sent directly as encoded to the speech service. 36 | type AudioStreamWaveFormat int //nolint:revive 37 | 38 | const ( 39 | // AudioStreamWaveFormat definition for PCM (pulse-code modulated) data in integer format. 40 | WavePCM AudioStreamWaveFormat = 0x0001 41 | 42 | // AudioStreamWaveFormat definition A-law-encoded format. 43 | WaveALAW AudioStreamWaveFormat = 0x0006 44 | 45 | // AudioStreamWaveFormat definition for Mu-law-encoded format. 46 | WaveMULAW AudioStreamWaveFormat = 0x0007 47 | 48 | ) 49 | -------------------------------------------------------------------------------- /audio/audio_stream_format.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // #include 13 | // #include 14 | import "C" 15 | 16 | // AudioStreamFormat represents the audio stream format used for custom audio input configurations. 17 | // Updated in version 1.5.0. 18 | type AudioStreamFormat struct { 19 | handle C.SPXHANDLE 20 | } 21 | 22 | // GetDefaultInputFormat creates an audio stream format object representing the default audio stream format 23 | // (16 kHz, 16 bit, mono PCM). 24 | func GetDefaultInputFormat() (*AudioStreamFormat, error) { 25 | var handle C.SPXHANDLE 26 | ret := uintptr(C.audio_stream_format_create_from_default_input(&handle)) 27 | if ret != C.SPX_NOERROR { 28 | return nil, common.NewCarbonError(ret) 29 | } 30 | format := new(AudioStreamFormat) 31 | format.handle = handle 32 | return format, nil 33 | } 34 | 35 | // GetWaveFormat creates an audio stream format object with the specified waveformat characteristics. 36 | func GetWaveFormat(samplesPerSecond uint32, bitsPerSample uint8, channels uint8, waveFormat AudioStreamWaveFormat) (*AudioStreamFormat, error) { 37 | var handle C.SPXHANDLE 38 | ret := uintptr(C.audio_stream_format_create_from_waveformat( 39 | &handle, 40 | (C.uint32_t)(samplesPerSecond), 41 | (C.uint8_t)(bitsPerSample), 42 | (C.uint8_t)(channels), 43 | (C.Audio_Stream_Wave_Format)(waveFormat))) 44 | if ret != C.SPX_NOERROR { 45 | return nil, common.NewCarbonError(ret) 46 | } 47 | format := new(AudioStreamFormat) 48 | format.handle = handle 49 | return format, nil 50 | } 51 | 52 | // GetWaveFormatPCM creates an audio stream format object with the specified PCM waveformat characteristics. 53 | // Note: Currently, only WAV / PCM with 16-bit samples, 16 kHz sample rate, and a single channel (Mono) is supported. When 54 | // used with Conversation Transcription, eight channels are supported. 55 | func GetWaveFormatPCM(samplesPerSecond uint32, bitsPerSample uint8, channels uint8) (*AudioStreamFormat, error) { 56 | var handle C.SPXHANDLE 57 | ret := uintptr(C.audio_stream_format_create_from_waveformat_pcm( 58 | &handle, 59 | (C.uint32_t)(samplesPerSecond), 60 | (C.uint8_t)(bitsPerSample), 61 | (C.uint8_t)(channels))) 62 | if ret != C.SPX_NOERROR { 63 | return nil, common.NewCarbonError(ret) 64 | } 65 | format := new(AudioStreamFormat) 66 | format.handle = handle 67 | return format, nil 68 | } 69 | 70 | // GetDefaultOutputFormat creates an audio stream format object representing the default audio stream format 71 | // (16 kHz, 16 bit, mono PCM). 72 | func GetDefaultOutputFormat() (*AudioStreamFormat, error) { 73 | var handle C.SPXHANDLE 74 | ret := uintptr(C.audio_stream_format_create_from_default_output(&handle)) 75 | if ret != C.SPX_NOERROR { 76 | return nil, common.NewCarbonError(ret) 77 | } 78 | format := new(AudioStreamFormat) 79 | format.handle = handle 80 | return format, nil 81 | } 82 | 83 | // GetCompressedFormat creates an audio stream format object with the specified compressed audio container format, to be 84 | // used as input format. 85 | func GetCompressedFormat(compressedFormat AudioStreamContainerFormat) (*AudioStreamFormat, error) { 86 | var handle C.SPXHANDLE 87 | ret := uintptr(C.audio_stream_format_create_from_compressed_format(&handle, (C.Audio_Stream_Container_Format)(compressedFormat))) 88 | if ret != C.SPX_NOERROR { 89 | return nil, common.NewCarbonError(ret) 90 | } 91 | format := new(AudioStreamFormat) 92 | format.handle = handle 93 | return format, nil 94 | } 95 | 96 | // Close disposes the associated resources. 97 | func (format *AudioStreamFormat) Close() { 98 | C.audio_stream_format_release(format.handle) 99 | } 100 | -------------------------------------------------------------------------------- /audio/cfunctions.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | // This file defines the proxy functions required to use callbacks 7 | 8 | // #include 9 | // #include 10 | // #include 11 | // extern int cgoAudioCallReadCallback(SPXHANDLE handle, uint8_t *buffer, uint32_t size); 12 | // extern void cgoAudioCallGetPropertyCallback(SPXHANDLE handle, int id, uint8_t *value, uint32_t size); 13 | // extern void cgoAudioCallCloseCallback(SPXHANDLE handle); 14 | // 15 | // int cgo_audio_read_callback_wrapper(void *context, uint8_t *buffer, uint32_t size) 16 | // { 17 | // return cgoAudioCallReadCallback((SPXHANDLE)context, buffer, size); 18 | // } 19 | // 20 | // void cgo_audio_get_property_callback_wrapper(void* context, int id, uint8_t* value, uint32_t size) 21 | // { 22 | // cgoAudioCallGetPropertyCallback((SPXHANDLE)context, id, value, size); 23 | // } 24 | // 25 | // void cgo_audio_close_callback_wrapper(void *context) 26 | // { 27 | // cgoAudioCallCloseCallback((SPXHANDLE)context); 28 | // } 29 | // 30 | // extern int cgoAudioOutputCallWriteCallback(SPXHANDLE handle, uint8_t *buffer, uint32_t size); 31 | // extern void cgoAudioOutputCallCloseCallback(SPXHANDLE handle); 32 | // 33 | // int cgo_audio_push_stream_write_callback_wrapper(void *context, uint8_t* buffer, uint32_t size) 34 | // { 35 | // return cgoAudioOutputCallWriteCallback((SPXHANDLE)context, buffer, size); 36 | // } 37 | // 38 | // void cgo_audio_push_stream_close_callback_wrapper(void *context) 39 | // { 40 | // cgoAudioOutputCallCloseCallback((SPXHANDLE)context); 41 | // } 42 | import "C" 43 | -------------------------------------------------------------------------------- /audio/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package audio provides the audio configuration, input/output streams, and related utilities for audio interactions 5 | package audio -------------------------------------------------------------------------------- /audio/interop_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | import "C" 14 | 15 | func uintptr2handle(h common.SPXHandle) C.SPXHANDLE { 16 | return (C.SPXHANDLE)(unsafe.Pointer(h)) //nolint:govet 17 | } 18 | 19 | func handle2uintptr(h C.SPXHANDLE) common.SPXHandle { 20 | return (common.SPXHandle)(unsafe.Pointer(h)) //nolint:govet 21 | } 22 | -------------------------------------------------------------------------------- /audio/interop_utils_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package audio 5 | 6 | import ( 7 | "testing" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | func TestHandleConversion(t *testing.T) { 13 | orig := common.SPXHandle(3) 14 | handle := uintptr2handle(orig) 15 | dest := handle2uintptr(handle) 16 | if orig != dest { 17 | t.Error("Values are not equal") 18 | } 19 | if uintptr2handle(dest) != handle { 20 | t.Error("Values are not equal") 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /ci/azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | 2 | resources: 3 | - repo: self 4 | clean: true 5 | 6 | trigger: 7 | branches: 8 | include: 9 | - master 10 | 11 | schedules: 12 | - cron: "0 18 * * 6" 13 | displayName: "Saturday Evening Schedule" 14 | branches: 15 | include: 16 | - master 17 | 18 | pool: 19 | vmImage: ubuntu-latest 20 | variables: 21 | CARBON_VERSION: "1.42.0" 22 | 23 | steps: 24 | - task: GoTool@0 25 | inputs: 26 | version: '1.13' 27 | - script: | 28 | go version 29 | go get -v -t -d ./... 30 | if [ -f Gopkg.toml ]; then 31 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 32 | dep ensure 33 | fi 34 | displayName: 'Get dependencies' 35 | - task: ComponentGovernanceComponentDetection@0 36 | inputs: 37 | scanType: 'Register' 38 | verbosity: 'Verbose' 39 | alertWarningLevel: 'High' 40 | sourceScanPath: $(Build.SourcesDirectory) 41 | - script: | 42 | mkdir $HOME/carbon 43 | pushd $HOME/carbon 44 | wget https://csspeechstorage.blob.core.windows.net/drop/$(CARBON_VERSION)/SpeechSDK-Linux-$(CARBON_VERSION).tar.gz 45 | tar xzf SpeechSDK-Linux-$(CARBON_VERSION).tar.gz 46 | rm SpeechSDK-Linux-$(CARBON_VERSION).tar.gz 47 | ln -s SpeechSDK-Linux-$(CARBON_VERSION) current 48 | popd 49 | displayName: 'Get Speech SDK' 50 | - script: | 51 | sudo apt-get update 52 | sudo apt-get install libasound2 53 | sudo apt-get install -y libgstreamer1.0-0 gstreamer1.0-plugins-good 54 | dpkg -l | grep gstreamer 55 | displayName: 'Get Speech SDK dependencies' 56 | - script: | 57 | export CGO_CFLAGS="-I$HOME/carbon/current/include/c_api" 58 | export CGO_LDFLAGS="-L$HOME/carbon/current/lib/x64 -lMicrosoft.CognitiveServices.Speech.core" 59 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HOME/carbon/current/lib/x64" 60 | go build -v ./... 61 | displayName: 'Build' 62 | -------------------------------------------------------------------------------- /common/cancellation_error_code.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // CancellationErrorCode defines error code in case that CancellationReason is Error. 7 | type CancellationErrorCode int 8 | 9 | const ( 10 | 11 | // NoError if CancellationReason is EndOfStream, CancellationErrorCode 12 | // is set to NoError. 13 | NoError CancellationErrorCode = 0 14 | 15 | // AuthenticationFailure indicates an authentication error. 16 | // An authentication error occurs if subscription key or authorization token is invalid, expired, 17 | // or does not match the region being used. 18 | AuthenticationFailure CancellationErrorCode = 1 19 | 20 | // BadRequest indicates that one or more recognition parameters are invalid or the audio format is not supported. 21 | BadRequest CancellationErrorCode = 2 22 | 23 | // TooManyRequests indicates that the number of parallel requests exceeded the number of allowed concurrent transcriptions for the subscription. 24 | TooManyRequests CancellationErrorCode = 3 25 | 26 | // Forbidden indicates that the free subscription used by the request ran out of quota. 27 | Forbidden CancellationErrorCode = 4 28 | 29 | // ConnectionFailure indicates a connection error. 30 | ConnectionFailure CancellationErrorCode = 5 31 | 32 | // ServiceTimeout indicates a time-out error when waiting for response from service. 33 | ServiceTimeout CancellationErrorCode = 6 34 | 35 | // ServiceError indicates that an error is returned by the service. 36 | ServiceError CancellationErrorCode = 7 37 | 38 | // ServiceUnavailable indicates that the service is currently unavailable. 39 | ServiceUnavailable CancellationErrorCode = 8 40 | 41 | // RuntimeError indicates an unexpected runtime error. 42 | RuntimeError CancellationErrorCode = 9 43 | ) 44 | 45 | //go:generate stringer -type=CancellationErrorCode -output=cancellation_error_code_string.go 46 | -------------------------------------------------------------------------------- /common/cancellation_error_code_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=CancellationErrorCode -output=cancellation_error_code_string.go"; DO NOT EDIT. 2 | 3 | package common 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[NoError-0] 12 | _ = x[AuthenticationFailure-1] 13 | _ = x[BadRequest-2] 14 | _ = x[TooManyRequests-3] 15 | _ = x[Forbidden-4] 16 | _ = x[ConnectionFailure-5] 17 | _ = x[ServiceTimeout-6] 18 | _ = x[ServiceError-7] 19 | _ = x[ServiceUnavailable-8] 20 | _ = x[RuntimeError-9] 21 | } 22 | 23 | const _CancellationErrorCode_name = "NoErrorAuthenticationFailureBadRequestTooManyRequestsForbiddenConnectionFailureServiceTimeoutServiceErrorServiceUnavailableRuntimeError" 24 | 25 | var _CancellationErrorCode_index = [...]uint8{0, 7, 28, 38, 53, 62, 79, 93, 105, 123, 135} 26 | 27 | func (i CancellationErrorCode) String() string { 28 | if i < 0 || i >= CancellationErrorCode(len(_CancellationErrorCode_index)-1) { 29 | return "CancellationErrorCode(" + strconv.FormatInt(int64(i), 10) + ")" 30 | } 31 | return _CancellationErrorCode_name[_CancellationErrorCode_index[i]:_CancellationErrorCode_index[i+1]] 32 | } 33 | -------------------------------------------------------------------------------- /common/cancellation_reason.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // CancellationReason defines the possible reasons a recognition result might be canceled. 7 | type CancellationReason int 8 | 9 | const ( 10 | // Error indicates that an error occurred during speech recognition. 11 | Error CancellationReason = 1 12 | 13 | // EndOfStream indicates that the end of the audio stream was reached. 14 | EndOfStream CancellationReason = 2 15 | 16 | // CancelledByUser indicates that request was cancelled by the user. 17 | // Added in version 1.17.0 18 | CancelledByUser CancellationReason = 3 19 | ) 20 | 21 | //go:generate stringer -type=CancellationReason -output=cancellation_reason_string.go 22 | -------------------------------------------------------------------------------- /common/cancellation_reason_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=CancellationReason -output=cancellation_reason_string.go"; DO NOT EDIT. 2 | 3 | package common 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[Error-1] 12 | _ = x[EndOfStream-2] 13 | _ = x[CancelledByUser-3] 14 | } 15 | 16 | const _CancellationReason_name = "ErrorEndOfStreamCancelledByUser" 17 | 18 | var _CancellationReason_index = [...]uint8{0, 5, 16, 31} 19 | 20 | func (i CancellationReason) String() string { 21 | i -= 1 22 | if i < 0 || i >= CancellationReason(len(_CancellationReason_index)-1) { 23 | return "CancellationReason(" + strconv.FormatInt(int64(i+1), 10) + ")" 24 | } 25 | return _CancellationReason_name[_CancellationReason_index[i]:_CancellationReason_index[i+1]] 26 | } 27 | -------------------------------------------------------------------------------- /common/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package common contains the definitions for many of the shared objects and properties in the Speech SDK 5 | package common -------------------------------------------------------------------------------- /common/error.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | import ( 7 | "fmt" 8 | ) 9 | 10 | // #include 11 | import "C" 12 | 13 | type CarbonError struct { 14 | Code int 15 | Message string 16 | } 17 | 18 | var errorString = map[int]string{ 19 | 0x000: "SPX_NOERROR", 20 | 0xfff: "SPXERR_NOT_IMPL", 21 | 0x001: "SPXERR_UNINITIALIZED", 22 | 0x002: "SPXERR_ALREADY_INITIALIZED", 23 | 0x003: "SPXERR_UNHANDLED_EXCEPTION", 24 | 0x004: "SPXERR_NOT_FOUND", 25 | 0x005: "SPXERR_INVALID_ARG", 26 | 0x006: "SPXERR_TIMEOUT", 27 | 0x007: "SPXERR_ALREADY_IN_PROGRESS", 28 | 0x008: "SPXERR_FILE_OPEN_FAILED", 29 | 0x009: "SPXERR_UNEXPECTED_EOF", 30 | 0x00a: "SPXERR_INVALID_HEADER", 31 | 0x00b: "SPXERR_AUDIO_IS_PUMPING", 32 | 0x00c: "SPXERR_UNSUPPORTED_FORMAT", 33 | 0x00d: "SPXERR_ABORT", 34 | 0x00e: "SPXERR_MIC_NOT_AVAILABLE", 35 | 0x00f: "SPXERR_INVALID_STATE", 36 | 0x010: "SPXERR_UUID_CREATE_FAILED", 37 | 0x011: "SPXERR_SETFORMAT_UNEXPECTED_STATE_TRANSITION", 38 | 0x012: "SPXERR_PROCESS_AUDIO_INVALID_STATE", 39 | 0x013: "SPXERR_START_RECOGNIZING_INVALID_STATE_TRANSITION", 40 | 0x014: "SPXERR_UNEXPECTED_CREATE_OBJECT_FAILURE", 41 | 0x015: "SPXERR_MIC_ERROR", 42 | 0x016: "SPXERR_NO_AUDIO_INPUT", 43 | 0x017: "SPXERR_UNEXPECTED_USP_SITE_FAILURE", 44 | 0x018: "SPXERR_UNEXPECTED_UNIDEC_SITE_FAILURE", 45 | 0x019: "SPXERR_BUFFER_TOO_SMALL", 46 | 0x01A: "SPXERR_OUT_OF_MEMORY", 47 | 0x01B: "SPXERR_RUNTIME_ERROR", 48 | 0x01C: "SPXERR_INVALID_URL", 49 | 0x01D: "SPXERR_INVALID_REGION", 50 | 0x01E: "SPXERR_SWITCH_MODE_NOT_ALLOWED", 51 | 0x01F: "SPXERR_CHANGE_CONNECTION_STATUS_NOT_ALLOWED", 52 | 0x020: "SPXERR_EXPLICIT_CONNECTION_NOT_SUPPORTED_BY_RECOGNIZER", 53 | 0x021: "SPXERR_INVALID_HANDLE", 54 | 0x022: "SPXERR_INVALID_RECOGNIZER", 55 | 0x023: "SPXERR_OUT_OF_RANGE", 56 | 0x024: "SPXERR_EXTENSION_LIBRARY_NOT_FOUND", 57 | 0x025: "SPXERR_UNEXPECTED_TTS_ENGINE_SITE_FAILURE", 58 | 0x026: "SPXERR_UNEXPECTED_AUDIO_OUTPUT_FAILURE", 59 | 0x027: "SPXERR_GSTREAMER_INTERNAL_ERROR", 60 | 0x028: "SPXERR_CONTAINER_FORMAT_NOT_SUPPORTED_ERROR", 61 | 0x029: "SPXERR_GSTREAMER_NOT_FOUND_ERROR", 62 | 0x02A: "SPXERR_INVALID_LANGUAGE", 63 | 0x02B: "SPXERR_UNSUPPORTED_API_ERROR", 64 | 0x02C: "SPXERR_RINGBUFFER_DATA_UNAVAILABLE", 65 | 0x030: "SPXERR_UNEXPECTED_CONVERSATION_SITE_FAILURE", 66 | 0x031: "SPXERR_UNEXPECTED_CONVERSATION_TRANSLATOR_SITE_FAILURE", 67 | 0x032: "SPXERR_CANCELED", 68 | } 69 | 70 | func NewCarbonError(errorHandle uintptr) CarbonError { 71 | var carbonError CarbonError 72 | carbonError.Code = getErrorCode(SPXHandle(errorHandle)) 73 | carbonError.Message = getErrorMessage(SPXHandle(errorHandle)) 74 | // When the message is empty, construct the error message using the errorHandle value directly. 75 | if carbonError.Message == "" { 76 | codeAsHexString := fmt.Sprintf("0x%0x", carbonError.Code) 77 | carbonError.Message = "Exception with an error code: " + codeAsHexString + " (" + errorString[carbonError.Code] + ")" 78 | } 79 | return carbonError 80 | } 81 | 82 | func (e CarbonError) Error() string { 83 | return e.Message 84 | } 85 | 86 | func getErrorCode(errorHandle SPXHandle) int { 87 | ret := int(C.error_get_error_code(uintptr2handle(errorHandle))) 88 | // A 0 means there was no corresponding event stored. 89 | // So this must be a SPX_* error and not a stored exception. 90 | // Return the HR as the error. 91 | if ret == 0 { 92 | return int(errorHandle) 93 | } 94 | return ret 95 | } 96 | 97 | func getErrorMessage(errorHandle SPXHandle) string { 98 | message := "" 99 | ret := C.error_get_message(uintptr2handle(errorHandle)) 100 | if ret != nil { 101 | message = C.GoString(ret) 102 | } 103 | return message 104 | } 105 | -------------------------------------------------------------------------------- /common/interop_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // #include 7 | import "C" 8 | import "unsafe" 9 | 10 | // SPXHandle is the internal handle type 11 | type SPXHandle uintptr 12 | 13 | func uintptr2handle(h SPXHandle) C.SPXHANDLE { 14 | return (C.SPXHANDLE)(unsafe.Pointer(h)) //nolint:govet 15 | } 16 | -------------------------------------------------------------------------------- /common/operation_outcome.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // OperationOutcome is the base type of operation outcomes. 7 | type OperationOutcome struct { 8 | // Error is present (not nil) if the operation failed 9 | Error error 10 | } 11 | 12 | // Failed checks if the operation failed 13 | func (outcome OperationOutcome) Failed() bool { 14 | return outcome.Error != nil 15 | } 16 | -------------------------------------------------------------------------------- /common/output_format.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // OutputFormat Defines output formats 7 | type OutputFormat int 8 | 9 | const ( 10 | // Simple output format 11 | Simple OutputFormat = 0 12 | // Detailed output format 13 | Detailed OutputFormat = 1 14 | ) 15 | -------------------------------------------------------------------------------- /common/profanity_option.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // ProfanityOption defines the profanity option. 7 | type ProfanityOption int 8 | 9 | const ( 10 | // Masked profanity option. 11 | Masked ProfanityOption = 0 12 | 13 | // Removed profanity option 14 | Removed ProfanityOption = 1 15 | 16 | // Raw profanity option 17 | Raw ProfanityOption = 2 18 | ) 19 | -------------------------------------------------------------------------------- /common/property_collection.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // #include 7 | // #include 8 | // #include 9 | import "C" 10 | import "unsafe" 11 | 12 | // PropertyCollection is a class to retrieve or set a property value from a property collection. 13 | type PropertyCollection struct { 14 | handle C.SPXHANDLE 15 | } 16 | 17 | // GetProperty returns value of a property. 18 | // If the property value is not defined, the specified default value is returned. 19 | func (properties PropertyCollection) GetProperty(id PropertyID, defaultValue string) string { 20 | defValue := C.CString(defaultValue) 21 | defer C.free(unsafe.Pointer(defValue)) 22 | value := C.property_bag_get_string(properties.handle, (C.int)(id), nil, defValue) 23 | goValue := C.GoString(value) 24 | C.property_bag_free_string(value) 25 | return goValue 26 | } 27 | 28 | // GetPropertyByString returns value of a property. 29 | // If the property value is not defined, the specified default value is returned. 30 | func (properties PropertyCollection) GetPropertyByString(name string, defaultValue string) string { 31 | defValue := C.CString(defaultValue) 32 | defer C.free(unsafe.Pointer(defValue)) 33 | n := C.CString(name) 34 | defer C.free(unsafe.Pointer(n)) 35 | value := C.property_bag_get_string(properties.handle, -1, n, defValue) 36 | goValue := C.GoString(value) 37 | C.property_bag_free_string(value) 38 | return goValue 39 | } 40 | 41 | // SetProperty sets the value of a property. 42 | func (properties PropertyCollection) SetProperty(id PropertyID, value string) error { 43 | v := C.CString(value) 44 | defer C.free(unsafe.Pointer(v)) 45 | ret := uintptr(C.property_bag_set_string(properties.handle, (C.int)(id), nil, v)) 46 | if ret != C.SPX_NOERROR { 47 | return NewCarbonError(ret) 48 | } 49 | return nil 50 | } 51 | 52 | // SetPropertyByString sets the value of a property. 53 | func (properties PropertyCollection) SetPropertyByString(name string, value string) error { 54 | n := C.CString(name) 55 | defer C.free(unsafe.Pointer(n)) 56 | v := C.CString(value) 57 | defer C.free(unsafe.Pointer(v)) 58 | ret := uintptr(C.property_bag_set_string(properties.handle, -1, n, v)) 59 | if ret != C.SPX_NOERROR { 60 | return NewCarbonError(ret) 61 | } 62 | return nil 63 | } 64 | 65 | // Close disposes the associated resources. 66 | func (properties PropertyCollection) Close() { 67 | C.property_bag_release(properties.handle) 68 | } 69 | 70 | // NewPropertyCollectionFromHandle creates a PropertyCollection from a handle (for internal use) 71 | func NewPropertyCollectionFromHandle(handle SPXHandle) *PropertyCollection { 72 | propertyCollection := new(PropertyCollection) 73 | propertyCollection.handle = uintptr2handle(handle) 74 | return propertyCollection 75 | } 76 | -------------------------------------------------------------------------------- /common/result_reason.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // ResultReason specifies the possible reasons a recognition result might be generated. 7 | type ResultReason int 8 | 9 | const ( 10 | // NoMatch indicates speech could not be recognized. More details can be found in the NoMatchDetails object. 11 | NoMatch ResultReason = 0 12 | 13 | // Canceled indicates that the recognition was canceled. More details can be found using the CancellationDetails object. 14 | Canceled ResultReason = 1 15 | 16 | // RecognizingSpeech indicates the speech result contains hypothesis text. 17 | RecognizingSpeech ResultReason = 2 18 | 19 | // RecognizedSpeech indicates the speech result contains final text that has been recognized. 20 | // Speech Recognition is now complete for this phrase. 21 | RecognizedSpeech ResultReason = 3 22 | 23 | // RecognizingIntent indicates the intent result contains hypothesis text and intent. 24 | RecognizingIntent ResultReason = 4 25 | 26 | // RecognizedIntent indicates the intent result contains final text and intent. 27 | // Speech Recognition and Intent determination are now complete for this phrase. 28 | RecognizedIntent ResultReason = 5 29 | 30 | // TranslatingSpeech indicates the translation result contains hypothesis text and its translation(s). 31 | TranslatingSpeech ResultReason = 6 32 | 33 | // TranslatedSpeech indicates the translation result contains final text and corresponding translation(s). 34 | // Speech Recognition and Translation are now complete for this phrase. 35 | TranslatedSpeech ResultReason = 7 36 | 37 | // SynthesizingAudio indicates the synthesized audio result contains a non-zero amount of audio data 38 | SynthesizingAudio ResultReason = 8 39 | 40 | // SynthesizingAudioCompleted indicates the synthesized audio is now complete for this phrase. 41 | SynthesizingAudioCompleted ResultReason = 9 42 | 43 | // RecognizingKeyword indicates the speech result contains (unverified) keyword text. 44 | RecognizingKeyword ResultReason = 10 45 | 46 | // RecognizedKeyword indicates that keyword recognition completed recognizing the given keyword. 47 | RecognizedKeyword ResultReason = 11 48 | 49 | // SynthesizingAudioStarted indicates the speech synthesis is now started 50 | SynthesizingAudioStarted ResultReason = 12 51 | 52 | // EnrollingVoiceProfile indicates the voice profile is being enrolling and customers need to send more audio to create a voice profile. 53 | EnrollingVoiceProfile ResultReason = 17 54 | 55 | // EnrolledVoiceProfile indicates the voice profile has been enrolled. 56 | EnrolledVoiceProfile ResultReason = 18 57 | 58 | // RecognizedSpeakers indicates some speakers have been successfully identified. 59 | RecognizedSpeakers ResultReason = 19 60 | 61 | // RecognizedSpeaker indicates one speaker has been successfully verified. 62 | RecognizedSpeaker ResultReason = 20 63 | 64 | // ResetVoiceProfile indicates the voice profile has been reset successfully. 65 | ResetVoiceProfile ResultReason = 21 66 | 67 | // DeletedVoiceProfile indicates the voice profile has been deleted successfully. 68 | DeletedVoiceProfile ResultReason = 22 69 | 70 | // VoicesListRetrieved indicates the voices list has been retrieved successfully. 71 | VoicesListRetrieved ResultReason = 23 72 | ) 73 | 74 | //go:generate stringer -type=ResultReason -output=result_reason_string.go 75 | -------------------------------------------------------------------------------- /common/result_reason_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=ResultReason -output=result_reason_string.go"; DO NOT EDIT. 2 | 3 | package common 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[NoMatch-0] 12 | _ = x[Canceled-1] 13 | _ = x[RecognizingSpeech-2] 14 | _ = x[RecognizedSpeech-3] 15 | _ = x[RecognizingIntent-4] 16 | _ = x[RecognizedIntent-5] 17 | _ = x[TranslatingSpeech-6] 18 | _ = x[TranslatedSpeech-7] 19 | _ = x[SynthesizingAudio-8] 20 | _ = x[SynthesizingAudioCompleted-9] 21 | _ = x[RecognizingKeyword-10] 22 | _ = x[RecognizedKeyword-11] 23 | _ = x[SynthesizingAudioStarted-12] 24 | _ = x[EnrollingVoiceProfile-17] 25 | _ = x[EnrolledVoiceProfile-18] 26 | _ = x[RecognizedSpeakers-19] 27 | _ = x[RecognizedSpeaker-20] 28 | _ = x[ResetVoiceProfile-21] 29 | _ = x[DeletedVoiceProfile-22] 30 | _ = x[VoicesListRetrieved-23] 31 | } 32 | 33 | const ( 34 | _ResultReason_name_0 = "NoMatchCanceledRecognizingSpeechRecognizedSpeechRecognizingIntentRecognizedIntentTranslatingSpeechTranslatedSpeechSynthesizingAudioSynthesizingAudioCompletedRecognizingKeywordRecognizedKeywordSynthesizingAudioStarted" 35 | _ResultReason_name_1 = "EnrollingVoiceProfileEnrolledVoiceProfileRecognizedSpeakersRecognizedSpeakerResetVoiceProfileDeletedVoiceProfileVoicesListRetrieved" 36 | ) 37 | 38 | var ( 39 | _ResultReason_index_0 = [...]uint8{0, 7, 15, 32, 48, 65, 81, 98, 114, 131, 157, 175, 192, 216} 40 | _ResultReason_index_1 = [...]uint8{0, 21, 41, 59, 76, 93, 112, 131} 41 | ) 42 | 43 | func (i ResultReason) String() string { 44 | switch { 45 | case 0 <= i && i <= 12: 46 | return _ResultReason_name_0[_ResultReason_index_0[i]:_ResultReason_index_0[i+1]] 47 | case 17 <= i && i <= 23: 48 | i -= 17 49 | return _ResultReason_name_1[_ResultReason_index_1[i]:_ResultReason_index_1[i+1]] 50 | default: 51 | return "ResultReason(" + strconv.FormatInt(int64(i), 10) + ")" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /common/service_property_channel.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // ServicePropertyChannel defines channels used to pass property settings to service. 7 | type ServicePropertyChannel int 8 | 9 | const ( 10 | // URIQueryParameter uses URI query parameter to pass property settings to service. 11 | URIQueryParameter ServicePropertyChannel = 0 12 | ) 13 | -------------------------------------------------------------------------------- /common/speech_synthesis_boundary_type.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // SpeechSynthesisBoundaryType defines the boundary type of speech synthesis boundary event. 7 | type SpeechSynthesisBoundaryType int 8 | 9 | const ( 10 | // WordBoundary indicates word boundary. 11 | WordBoundary SpeechSynthesisBoundaryType = 0 12 | 13 | // PunctuationBoundary indicates punctuation boundary. 14 | PunctuationBoundary SpeechSynthesisBoundaryType = 1 15 | 16 | // SentenceBoundary indicates sentence boundary. 17 | SentenceBoundary SpeechSynthesisBoundaryType = 2 18 | ) 19 | -------------------------------------------------------------------------------- /common/stream_status.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // StreamStatus defines the possible status of audio data stream. 7 | type StreamStatus int 8 | 9 | const ( 10 | // StreamStatusUnknown indicates the audio data stream status is unknown. 11 | StreamStatusUnknown StreamStatus = 0 12 | 13 | // StreamStatusNoData indicates that the audio data stream contains no data. 14 | StreamStatusNoData StreamStatus = 1 15 | 16 | // StreamStatusPartialData indicates the audio data stream contains partial data of a speak request. 17 | StreamStatusPartialData StreamStatus = 2 18 | 19 | // StreamStatusAllData indicates the audio data stream contains all data of a speak request. 20 | StreamStatusAllData StreamStatus = 3 21 | 22 | // StreamStatusCanceled indicates the audio data stream was canceled. 23 | StreamStatusCanceled StreamStatus = 4 24 | ) 25 | 26 | //go:generate stringer -type=StreamStatus -trimprefix=StreamStatus -output=stream_status_string.go 27 | -------------------------------------------------------------------------------- /common/stream_status_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=StreamStatus -trimprefix=StreamStatus -output=stream_status_string.go"; DO NOT EDIT. 2 | 3 | package common 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[StreamStatusUnknown-0] 12 | _ = x[StreamStatusNoData-1] 13 | _ = x[StreamStatusPartialData-2] 14 | _ = x[StreamStatusAllData-3] 15 | _ = x[StreamStatusCanceled-4] 16 | } 17 | 18 | const _StreamStatus_name = "UnknownNoDataPartialDataAllDataCanceled" 19 | 20 | var _StreamStatus_index = [...]uint8{0, 7, 13, 24, 31, 39} 21 | 22 | func (i StreamStatus) String() string { 23 | if i < 0 || i >= StreamStatus(len(_StreamStatus_index)-1) { 24 | return "StreamStatus(" + strconv.FormatInt(int64(i), 10) + ")" 25 | } 26 | return _StreamStatus_name[_StreamStatus_index[i]:_StreamStatus_index[i+1]] 27 | } 28 | -------------------------------------------------------------------------------- /common/synthesis_voice_gender.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // SynthesisVoiceGender defines the gender of a synthesis voice. 7 | type SynthesisVoiceGender int 8 | 9 | const ( 10 | // GenderUnknown means the gender is unknown. 11 | GenderUnknown SynthesisVoiceGender = 0 12 | 13 | // Female indicates female. 14 | Female SynthesisVoiceGender = 1 15 | 16 | // Male indicates male. 17 | Male SynthesisVoiceGender = 2 18 | ) 19 | -------------------------------------------------------------------------------- /common/synthesis_voice_type.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // SynthesisVoiceType defines the type of a synthesis voice. 7 | type SynthesisVoiceType int 8 | 9 | const ( 10 | // OnlineNeural indicates online neural voice. 11 | OnlineNeural SynthesisVoiceType = 1 12 | 13 | // OnlineStandard indicates online standard voice. 14 | OnlineStandard SynthesisVoiceType = 2 15 | 16 | // OfflineNeural indicates offline neural voice. 17 | OfflineNeural SynthesisVoiceType = 3 18 | 19 | // OfflineStandard indicates offline started voice. 20 | OfflineStandard SynthesisVoiceType = 4 21 | ) 22 | -------------------------------------------------------------------------------- /common/voice_profile_type.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package common 5 | 6 | // VoiceProfileType defines the type of scenario a voice profile has created for. 7 | type VoiceProfileType int 8 | 9 | const ( 10 | // Text independent speaker identification 11 | TextIndependentIdentification VoiceProfileType = 1 12 | 13 | // Text dependent speaker verification 14 | TextDependentVerification VoiceProfileType = 2 15 | 16 | // Text independent speaker verification 17 | TextIndependentVerification VoiceProfileType = 3 18 | ) 19 | -------------------------------------------------------------------------------- /diagnostics/diagnostics.go: -------------------------------------------------------------------------------- 1 | package diagnostics 2 | 3 | // #include 4 | // #include 5 | import "C" 6 | import "unsafe" 7 | 8 | // StartMemoryLogging starts logging to memory 9 | func StartMemoryLogging() { 10 | C.diagnostics_log_memory_start_logging() 11 | } 12 | 13 | // StopMemoryLogging stops logging to memory 14 | func StopMemoryLogging() { 15 | C.diagnostics_log_memory_stop_logging() 16 | } 17 | 18 | // SetMemoryLogFilters sets filters for memory logging 19 | func SetMemoryLogFilters(filters string) { 20 | cFilters := C.CString(filters) 21 | defer C.free(unsafe.Pointer(cFilters)) 22 | C.diagnostics_log_memory_set_filters(cFilters) 23 | } 24 | 25 | // GetMemoryLogLineNumOldest gets the line number of the oldest memory log entry 26 | func GetMemoryLogLineNumOldest() uint { 27 | return uint(C.diagnostics_log_memory_get_line_num_oldest()) 28 | } 29 | 30 | // GetMemoryLogLineNumNewest gets the line number of the newest memory log entry 31 | func GetMemoryLogLineNumNewest() uint { 32 | return uint(C.diagnostics_log_memory_get_line_num_newest()) 33 | } 34 | 35 | // GetMemoryLogLine gets a specific line from the memory log 36 | func GetMemoryLogLine(lineNum uint) string { 37 | cLine := C.diagnostics_log_memory_get_line(C.size_t(lineNum)) 38 | if cLine == nil { 39 | return "" 40 | } 41 | return C.GoString(cLine) 42 | 43 | } 44 | 45 | // DumpMemoryLogToStderr dumps the memory log to stderr 46 | func DumpMemoryLogToStderr() error { 47 | ret := uintptr(C.diagnostics_log_memory_dump_to_stderr()) 48 | if ret != 0 { 49 | return newDiagnosticsError("dumpMemoryLogToStderr", ret) 50 | } 51 | return nil 52 | } 53 | 54 | // DumpMemoryLog dumps the memory log to a file and/or standard output 55 | func DumpMemoryLog(filename string, linePrefix string, emitToStdOut bool, emitToStdErr bool) error { 56 | var cFilename *C.char 57 | if filename != "" { 58 | cFilename = C.CString(filename) 59 | defer C.free(unsafe.Pointer(cFilename)) 60 | } 61 | var cLinePrefix *C.char 62 | if linePrefix != "" { 63 | cLinePrefix = C.CString(linePrefix) 64 | defer C.free(unsafe.Pointer(cLinePrefix)) 65 | } 66 | ret := uintptr(C.diagnostics_log_memory_dump(cFilename, cLinePrefix, C.bool(emitToStdOut), C.bool(emitToStdErr))) 67 | if ret != 0 { 68 | return newDiagnosticsError("dumpMemoryLog", ret) 69 | } 70 | return nil 71 | } 72 | 73 | // DumpMemoryLogOnExit dumps the memory log when the program exits 74 | func DumpMemoryLogOnExit(filename string, linePrefix string, emitToStdOut bool, emitToStdErr bool) error { 75 | var cFilename *C.char 76 | if filename != "" { 77 | cFilename = C.CString(filename) 78 | defer C.free(unsafe.Pointer(cFilename)) 79 | } 80 | var cLinePrefix *C.char 81 | if linePrefix != "" { 82 | cLinePrefix = C.CString(linePrefix) 83 | defer C.free(unsafe.Pointer(cLinePrefix)) 84 | } 85 | ret := uintptr(C.diagnostics_log_memory_dump_on_exit(cFilename, cLinePrefix, C.bool(emitToStdOut), C.bool(emitToStdErr))) 86 | if ret != 0 { 87 | return newDiagnosticsError("dumpMemoryLogOnExit", ret) 88 | } 89 | return nil 90 | } 91 | 92 | // StartConsoleLogging starts logging to the console 93 | func StartConsoleLogging(logToStderr bool) { 94 | C.diagnostics_log_console_start_logging(C.bool(logToStderr)) 95 | } 96 | 97 | // StopConsoleLogging stops logging to the console 98 | func StopConsoleLogging() { 99 | C.diagnostics_log_console_stop_logging() 100 | } 101 | 102 | // SetConsoleLogFilters sets filters for console logging 103 | func SetConsoleLogFilters(filters string) { 104 | cFilters := C.CString(filters) 105 | defer C.free(unsafe.Pointer(cFilters)) 106 | C.diagnostics_log_console_set_filters(cFilters) 107 | } 108 | -------------------------------------------------------------------------------- /diagnostics/error.go: -------------------------------------------------------------------------------- 1 | package diagnostics 2 | 3 | // #include 4 | // #include 5 | import "C" 6 | 7 | import "fmt" 8 | 9 | type diagnosticsError struct { 10 | operation string 11 | code uintptr 12 | } 13 | 14 | func newDiagnosticsError(operation string, code uintptr) error { 15 | return &diagnosticsError{ 16 | operation: operation, 17 | code: code, 18 | } 19 | } 20 | 21 | func (e *diagnosticsError) Error() string { 22 | return fmt.Sprintf("diagnostics operation '%s' failed with error code %d", e.operation, e.code) 23 | } 24 | -------------------------------------------------------------------------------- /dialog/activity_received_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 8 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 9 | ) 10 | 11 | // #include 12 | // #include 13 | // #include 14 | import "C" 15 | import "unsafe" 16 | 17 | type ActivityReceivedEventArgs struct { 18 | handle C.SPXHANDLE 19 | Activity string 20 | } 21 | 22 | // Close releases the underlying resources 23 | func (event ActivityReceivedEventArgs) Close() { 24 | C.dialog_service_connector_activity_received_event_release(event.handle) 25 | } 26 | 27 | // HasAudio checks if the event contains audio 28 | func (event ActivityReceivedEventArgs) HasAudio() bool { 29 | return bool(C.dialog_service_connector_activity_received_event_has_audio(event.handle)) 30 | } 31 | 32 | // GetAudio gets the audio associated with the event. 33 | func (event ActivityReceivedEventArgs) GetAudio() (*audio.PullAudioOutputStream, error) { 34 | var handle C.SPXHANDLE 35 | ret := uintptr(C.dialog_service_connector_activity_received_event_get_audio(event.handle, &handle)) 36 | if ret != C.SPX_NOERROR { 37 | return nil, common.NewCarbonError(ret) 38 | } 39 | return audio.NewPullAudioOutputStreamFromHandle(handle2uintptr(handle)), nil 40 | } 41 | 42 | // NewSpeechRecognitionCanceledEventArgsFromHandle creates the object from the handle (for internal use) 43 | func NewActivityReceivedEventArgsFromHandle(handle common.SPXHandle) (*ActivityReceivedEventArgs, error) { 44 | event := new(ActivityReceivedEventArgs) 45 | event.handle = uintptr2handle(handle) 46 | var size C.size_t 47 | ret := uintptr(C.dialog_service_connector_activity_received_event_get_activity_size(event.handle, &size)) 48 | if ret != C.SPX_NOERROR { 49 | event.Close() 50 | return nil, common.NewCarbonError(ret) 51 | } 52 | actBuffer := C.malloc(C.sizeof_char * (size + 1)) 53 | defer C.free(unsafe.Pointer(actBuffer)) 54 | ret = uintptr(C.dialog_service_connector_activity_received_event_get_activity(event.handle, (*C.char)(actBuffer), size+1)) 55 | if ret != C.SPX_NOERROR { 56 | event.Close() 57 | return nil, common.NewCarbonError(ret) 58 | } 59 | event.Activity = C.GoString((*C.char)(actBuffer)) 60 | return event, nil 61 | } 62 | 63 | type ActivityReceivedEventHandler func(event ActivityReceivedEventArgs) 64 | -------------------------------------------------------------------------------- /dialog/callback_helpers.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog 5 | 6 | import ( 7 | "sync" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | import "C" 16 | 17 | var mu sync.Mutex 18 | var sessionStartedCallbacks = make(map[C.SPXHANDLE]speech.SessionEventHandler) 19 | 20 | func registerSessionStartedCallback(handler speech.SessionEventHandler, handle C.SPXHANDLE) { 21 | mu.Lock() 22 | defer mu.Unlock() 23 | sessionStartedCallbacks[handle] = handler 24 | } 25 | 26 | func getSessionStartedCallback(handle C.SPXHANDLE) speech.SessionEventHandler { 27 | mu.Lock() 28 | defer mu.Unlock() 29 | return sessionStartedCallbacks[handle] 30 | } 31 | 32 | //export dialogFireEventSessionStarted 33 | func dialogFireEventSessionStarted(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 34 | handler := getSessionStartedCallback(handle) 35 | event, err := speech.NewSessionEventArgsFromHandle(handle2uintptr(eventHandle)) 36 | if err != nil || handler == nil { 37 | C.recognizer_event_handle_release(handle) 38 | return 39 | } 40 | handler(*event) 41 | } 42 | 43 | var sessionStoppedCallbacks = make(map[C.SPXHANDLE]speech.SessionEventHandler) 44 | 45 | func registerSessionStoppedCallback(handler speech.SessionEventHandler, handle C.SPXHANDLE) { 46 | mu.Lock() 47 | defer mu.Unlock() 48 | sessionStoppedCallbacks[handle] = handler 49 | } 50 | 51 | func getSessionStoppedCallback(handle C.SPXHANDLE) speech.SessionEventHandler { 52 | mu.Lock() 53 | defer mu.Unlock() 54 | return sessionStoppedCallbacks[handle] 55 | } 56 | 57 | //export dialogFireEventSessionStopped 58 | func dialogFireEventSessionStopped(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 59 | handler := getSessionStoppedCallback(handle) 60 | event, err := speech.NewSessionEventArgsFromHandle(handle2uintptr(eventHandle)) 61 | if err != nil || handler == nil { 62 | C.recognizer_event_handle_release(handle) 63 | return 64 | } 65 | handler(*event) 66 | } 67 | 68 | var recognizedCallbacks = make(map[C.SPXHANDLE]speech.SpeechRecognitionEventHandler) 69 | 70 | func registerRecognizedCallback(handler speech.SpeechRecognitionEventHandler, handle C.SPXHANDLE) { 71 | mu.Lock() 72 | defer mu.Unlock() 73 | recognizedCallbacks[handle] = handler 74 | } 75 | 76 | func getRecognizedCallback(handle C.SPXHANDLE) speech.SpeechRecognitionEventHandler { 77 | mu.Lock() 78 | defer mu.Unlock() 79 | return recognizedCallbacks[handle] 80 | } 81 | 82 | //export dialogFireEventRecognized 83 | func dialogFireEventRecognized(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 84 | handler := getRecognizedCallback(handle) 85 | event, err := speech.NewSpeechRecognitionEventArgsFromHandle(handle2uintptr(eventHandle)) 86 | if err != nil || handler == nil { 87 | C.recognizer_event_handle_release(handle) 88 | return 89 | } 90 | handler(*event) 91 | } 92 | 93 | var recognizingCallbacks = make(map[C.SPXHANDLE]speech.SpeechRecognitionEventHandler) 94 | 95 | func registerRecognizingCallback(handler speech.SpeechRecognitionEventHandler, handle C.SPXHANDLE) { 96 | mu.Lock() 97 | defer mu.Unlock() 98 | recognizingCallbacks[handle] = handler 99 | } 100 | 101 | func getRecognizingCallback(handle C.SPXHANDLE) speech.SpeechRecognitionEventHandler { 102 | mu.Lock() 103 | defer mu.Unlock() 104 | return recognizingCallbacks[handle] 105 | } 106 | 107 | //export dialogFireEventRecognizing 108 | func dialogFireEventRecognizing(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 109 | handler := getRecognizingCallback(handle) 110 | event, err := speech.NewSpeechRecognitionEventArgsFromHandle(handle2uintptr(eventHandle)) 111 | if err != nil || handler == nil { 112 | C.recognizer_event_handle_release(handle) 113 | return 114 | } 115 | handler(*event) 116 | } 117 | 118 | var canceledCallbacks = make(map[C.SPXHANDLE]speech.SpeechRecognitionCanceledEventHandler) 119 | 120 | func registerCanceledCallback(handler speech.SpeechRecognitionCanceledEventHandler, handle C.SPXHANDLE) { 121 | mu.Lock() 122 | defer mu.Unlock() 123 | canceledCallbacks[handle] = handler 124 | } 125 | 126 | func getCanceledCallback(handle C.SPXHANDLE) speech.SpeechRecognitionCanceledEventHandler { 127 | mu.Lock() 128 | defer mu.Unlock() 129 | return canceledCallbacks[handle] 130 | } 131 | 132 | //export dialogFireEventCanceled 133 | func dialogFireEventCanceled(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 134 | handler := getCanceledCallback(handle) 135 | event, err := speech.NewSpeechRecognitionCanceledEventArgsFromHandle(handle2uintptr(eventHandle)) 136 | if err != nil || handler == nil { 137 | C.recognizer_event_handle_release(handle) 138 | return 139 | } 140 | handler(*event) 141 | } 142 | 143 | var activityReceivedCallbacks = make(map[C.SPXHANDLE]ActivityReceivedEventHandler) 144 | 145 | func registerActivityReceivedCallback(handler ActivityReceivedEventHandler, handle C.SPXHANDLE) { 146 | mu.Lock() 147 | defer mu.Unlock() 148 | activityReceivedCallbacks[handle] = handler 149 | } 150 | 151 | func getActivityReceivedCallback(handle C.SPXHANDLE) ActivityReceivedEventHandler { 152 | mu.Lock() 153 | defer mu.Unlock() 154 | return activityReceivedCallbacks[handle] 155 | } 156 | 157 | //export dialogFireEventActivityReceived 158 | func dialogFireEventActivityReceived(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 159 | handler := getActivityReceivedCallback(handle) 160 | event, err := NewActivityReceivedEventArgsFromHandle(handle2uintptr(eventHandle)) 161 | if err != nil || handler == nil { 162 | C.dialog_service_connector_activity_received_event_release(handle) 163 | return 164 | } 165 | handler(*event) 166 | } 167 | -------------------------------------------------------------------------------- /dialog/cfunctions.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog 5 | 6 | // This file defines the proxy functions required to use callbacks 7 | 8 | // #include 9 | // #include 10 | // extern void dialogFireEventSessionStarted(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 11 | // 12 | // void cgo_dialog_session_started(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 13 | // { 14 | // dialogFireEventSessionStarted(handle, event); 15 | // } 16 | // 17 | // extern void dialogFireEventSessionStopped(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 18 | // 19 | // void cgo_dialog_session_stopped(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 20 | // { 21 | // dialogFireEventSessionStopped(handle, event); 22 | // } 23 | // 24 | // extern void dialogFireEventRecognized(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 25 | // 26 | // void cgo_dialog_recognized(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 27 | // { 28 | // dialogFireEventRecognized(handle, event); 29 | // } 30 | // 31 | // extern void dialogFireEventRecognizing(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 32 | // 33 | // void cgo_dialog_recognizing(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 34 | // { 35 | // dialogFireEventRecognizing(handle, event); 36 | // } 37 | // 38 | // extern void dialogFireEventCanceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 39 | // 40 | // void cgo_dialog_canceled(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 41 | // { 42 | // dialogFireEventCanceled(handle, event); 43 | // } 44 | // 45 | // extern void dialogFireEventActivityReceived(SPXRECOHANDLE handle, SPXEVENTHANDLE event); 46 | // 47 | // void cgo_dialog_activity_received(SPXRECOHANDLE handle, SPXEVENTHANDLE event, void* context) 48 | // { 49 | // dialogFireEventActivityReceived(handle, event); 50 | // } 51 | // 52 | import "C" 53 | -------------------------------------------------------------------------------- /dialog/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package dialog provides functionality for creating custom voice assistant applications and managing the 5 | // related interaction flow 6 | package dialog -------------------------------------------------------------------------------- /dialog/interop_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | import "C" 14 | 15 | func uintptr2handle(h common.SPXHandle) C.SPXHANDLE { 16 | return (C.SPXHANDLE)(unsafe.Pointer(h)) //nolint:govet 17 | } 18 | 19 | func handle2uintptr(h C.SPXHANDLE) common.SPXHandle { 20 | return (common.SPXHandle)(unsafe.Pointer(h)) //nolint:govet 21 | } 22 | -------------------------------------------------------------------------------- /dialog/interop_utils_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | "testing" 9 | ) 10 | 11 | func TestHandleConversion(t *testing.T) { 12 | orig := common.SPXHandle(3) 13 | handle := uintptr2handle(orig) 14 | dest := handle2uintptr(handle) 15 | if orig != dest { 16 | t.Error("Values are not equal") 17 | } 18 | if uintptr2handle(dest) != handle { 19 | t.Error("Values are not equal") 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/Microsoft/cognitive-services-speech-sdk-go 2 | 3 | go 1.13 4 | -------------------------------------------------------------------------------- /samples/.gitignore: -------------------------------------------------------------------------------- 1 | samples 2 | -------------------------------------------------------------------------------- /samples/dialog_service_connector/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package dialog_service_connector provides samples of single-turn recognition and keyword recognition 5 | package dialog_service_connector -------------------------------------------------------------------------------- /samples/dialog_service_connector/from_push_audio_stream.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog_service_connector 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/dialog" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/helpers" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 14 | ) 15 | 16 | func ListenOnceFromStream(subscription string, region string, file string) { 17 | stream, err := audio.CreatePushAudioInputStream() 18 | if err != nil { 19 | fmt.Println("Got an error: ", err) 20 | return 21 | } 22 | defer stream.Close() 23 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 24 | if err != nil { 25 | fmt.Println("Got an error: ", err) 26 | return 27 | } 28 | defer audioConfig.Close() 29 | config, err := dialog.NewBotFrameworkConfigFromSubscription(subscription, region) 30 | if err != nil { 31 | fmt.Println("Got an error: ", err) 32 | return 33 | } 34 | defer config.Close() 35 | connector, err := dialog.NewDialogServiceConnectorFromConfig(config, audioConfig) 36 | if err != nil { 37 | fmt.Println("Got an error: ", err) 38 | return 39 | } 40 | defer connector.Close() 41 | sessionStartedHandler := func(event speech.SessionEventArgs) { 42 | defer event.Close() 43 | fmt.Println("Session Started") 44 | } 45 | sessionStoppedHandler := func(event speech.SessionEventArgs) { 46 | defer event.Close() 47 | fmt.Println("Session Stopped") 48 | } 49 | connector.SessionStarted(sessionStartedHandler) 50 | connector.SessionStopped(sessionStoppedHandler) 51 | activityReceivedHandler := func(event dialog.ActivityReceivedEventArgs) { 52 | defer event.Close() 53 | fmt.Println("Received an activity.") 54 | } 55 | connector.ActivityReceived(activityReceivedHandler) 56 | recognizedHandle := func(event speech.SpeechRecognitionEventArgs) { 57 | defer event.Close() 58 | fmt.Println("Recognized ", event.Result.Text) 59 | } 60 | connector.Recognized(recognizedHandle) 61 | recognizingHandler := func(event speech.SpeechRecognitionEventArgs) { 62 | defer event.Close() 63 | fmt.Println("Recognizing ", event.Result.Text) 64 | } 65 | connector.Recognizing(recognizingHandler) 66 | helpers.PumpFileIntoStream(file, stream) 67 | connector.ListenOnceAsync() 68 | <-time.After(10 * time.Second) 69 | } 70 | -------------------------------------------------------------------------------- /samples/dialog_service_connector/listen_once.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog_service_connector 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/dialog" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/helpers" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 14 | ) 15 | 16 | func ListenOnce(subscription string, region string, file string) { 17 | stream, err := audio.CreatePushAudioInputStream() 18 | if err != nil { 19 | fmt.Println("Got an error: ", err) 20 | return 21 | } 22 | defer stream.Close() 23 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 24 | if err != nil { 25 | fmt.Println("Got an error: ", err) 26 | return 27 | } 28 | defer audioConfig.Close() 29 | config, err := dialog.NewBotFrameworkConfigFromSubscription(subscription, region) 30 | if err != nil { 31 | fmt.Println("Got an error: ", err) 32 | return 33 | } 34 | defer config.Close() 35 | connector, err := dialog.NewDialogServiceConnectorFromConfig(config, audioConfig) 36 | if err != nil { 37 | fmt.Println("Got an error: ", err) 38 | return 39 | } 40 | defer connector.Close() 41 | sessionStartedHandler := func(event speech.SessionEventArgs) { 42 | defer event.Close() 43 | fmt.Println("Session Started") 44 | } 45 | sessionStoppedHandler := func(event speech.SessionEventArgs) { 46 | defer event.Close() 47 | fmt.Println("Session Stopped") 48 | } 49 | connector.SessionStarted(sessionStartedHandler) 50 | connector.SessionStopped(sessionStoppedHandler) 51 | activityReceivedHandler := func(event dialog.ActivityReceivedEventArgs) { 52 | defer event.Close() 53 | fmt.Println("Received an activity.") 54 | } 55 | connector.ActivityReceived(activityReceivedHandler) 56 | recognizedHandle := func(event speech.SpeechRecognitionEventArgs) { 57 | defer event.Close() 58 | fmt.Println("Recognized ", event.Result.Text) 59 | } 60 | connector.Recognized(recognizedHandle) 61 | recognizingHandler := func(event speech.SpeechRecognitionEventArgs) { 62 | defer event.Close() 63 | fmt.Println("Recognizing ", event.Result.Text) 64 | } 65 | connector.Recognizing(recognizingHandler) 66 | helpers.PumpFileIntoStream(file, stream) 67 | connector.ListenOnceAsync() 68 | <-time.After(10 * time.Second) 69 | } 70 | -------------------------------------------------------------------------------- /samples/dialog_service_connector/start_keyword_listening.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package dialog_service_connector 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/dialog" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/helpers" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 14 | ) 15 | 16 | func KWS(subscription string, region string, file string) { 17 | stream, err := audio.CreatePushAudioInputStream() 18 | if err != nil { 19 | fmt.Println("Got an error: ", err) 20 | return 21 | } 22 | defer stream.Close() 23 | model, err := speech.NewKeywordRecognitionModelFromFile("../../test_files/kws.table") 24 | if err != nil { 25 | fmt.Println("Got an error: ", err) 26 | } 27 | defer model.Close() 28 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 29 | if err != nil { 30 | fmt.Println("Got an error: ", err) 31 | return 32 | } 33 | defer audioConfig.Close() 34 | config, err := dialog.NewBotFrameworkConfigFromSubscription(subscription, region) 35 | if err != nil { 36 | fmt.Println("Got an error: ", err) 37 | return 38 | } 39 | defer config.Close() 40 | connector, err := dialog.NewDialogServiceConnectorFromConfig(config, audioConfig) 41 | if err != nil { 42 | fmt.Println("Got an error: ", err) 43 | return 44 | } 45 | defer connector.Close() 46 | sessionStartedHandler := func(event speech.SessionEventArgs) { 47 | defer event.Close() 48 | fmt.Println("Session Started") 49 | } 50 | sessionStoppedHandler := func(event speech.SessionEventArgs) { 51 | defer event.Close() 52 | fmt.Println("Session Stopped") 53 | } 54 | connector.SessionStarted(sessionStartedHandler) 55 | connector.SessionStopped(sessionStoppedHandler) 56 | activityReceivedHandler := func(event dialog.ActivityReceivedEventArgs) { 57 | defer event.Close() 58 | fmt.Println("Received an activity.") 59 | } 60 | connector.ActivityReceived(activityReceivedHandler) 61 | recognizedHandle := func(event speech.SpeechRecognitionEventArgs) { 62 | defer event.Close() 63 | fmt.Println("Recognized ", event.Result.Text) 64 | } 65 | connector.Recognized(recognizedHandle) 66 | recognizingHandler := func(event speech.SpeechRecognitionEventArgs) { 67 | defer event.Close() 68 | fmt.Println("Recognizing ", event.Result.Text) 69 | } 70 | connector.Recognizing(recognizingHandler) 71 | canceledHandler := func(event speech.SpeechRecognitionCanceledEventArgs) { 72 | defer event.Close() 73 | fmt.Println("Canceled ", event.Reason) 74 | fmt.Println("Error code ", event.ErrorCode) 75 | } 76 | connector.Canceled(canceledHandler) 77 | helpers.PumpFileIntoStream(file, stream) 78 | connector.StartKeywordRecognitionAsync(model) 79 | <-time.After(10 * time.Second) 80 | } 81 | -------------------------------------------------------------------------------- /samples/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/Microsoft/cognitive-services-speech-sdk-go/samples 2 | 3 | require github.com/Microsoft/cognitive-services-speech-sdk-go v1.33.0 4 | 5 | go 1.13 6 | -------------------------------------------------------------------------------- /samples/helpers/stream_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package helpers 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "io" 10 | "os" 11 | 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 13 | ) 14 | 15 | func PumpFileIntoStream(filename string, stream *audio.PushAudioInputStream) { 16 | file, err := os.Open(filename) 17 | if err != nil { 18 | fmt.Println("Error opening file: ", err) 19 | return 20 | } 21 | defer file.Close() 22 | reader := bufio.NewReader(file) 23 | buffer := make([]byte, 1000) 24 | for { 25 | n, err := reader.Read(buffer) 26 | if err == io.EOF { 27 | fmt.Println("Done reading file.") 28 | break 29 | } 30 | if err != nil { 31 | fmt.Println("Error reading file: ", err) 32 | break 33 | } 34 | err = stream.Write(buffer[0:n]) 35 | if err != nil { 36 | fmt.Println("Error writing to the stream") 37 | } 38 | } 39 | stream.CloseStream() 40 | } 41 | -------------------------------------------------------------------------------- /samples/main.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package main demonstrates usages for the speech recognizer and dialog service connector 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | "os" 10 | 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/dialog_service_connector" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/recognizer" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/synthesizer" 14 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/speaker_recognition" 15 | ) 16 | 17 | type functionMap = map[string]func(string, string, string) 18 | 19 | func printHelp(executableName string, samples functionMap) { 20 | fmt.Println("Input not valid") 21 | fmt.Println("Usage: ") 22 | fmt.Println(executableName, " ") 23 | fmt.Println("Where sample is of the format :") 24 | fmt.Println("Available samples:") 25 | for id, _ := range samples { 26 | fmt.Println(" -- ", id) 27 | } 28 | } 29 | 30 | func main() { 31 | samples := functionMap{ 32 | "speech_recognizer:RecognizeOnceFromWavFile": recognizer.RecognizeOnceFromWavFile, 33 | "speech_recognizer:RecognizeOnceFromCompressedFile": recognizer.RecognizeOnceFromCompressedFile, 34 | "speech_recognizer:RecognizeOnceFromALAWFile": recognizer.RecognizeOnceFromALAWFile, 35 | "speech_recognizer:ContinuousFromMicrophone": recognizer.ContinuousFromMicrophone, 36 | "speech_recognizer:RecognizeContinuousUsingWrapper": recognizer.RecognizeContinuousUsingWrapper, 37 | "dialog_service_connector:ListenOnce": dialog_service_connector.ListenOnce, 38 | "dialog_service_connector:KWS": dialog_service_connector.KWS, 39 | "dialog_service_connector:ListenOnceFromStream": dialog_service_connector.ListenOnceFromStream, 40 | "speech_synthesizer:SynthesisToSpeaker": synthesizer.SynthesisToSpeaker, 41 | "speech_synthesizer:SynthesisToAudioDataStream": synthesizer.SynthesisToAudioDataStream, 42 | "speaker_recognizer:IndependentIdentification": speaker_recognition.IndependentIdentification, 43 | "speaker_recognizer:IndependentVerification": speaker_recognition.IndependentVerification, 44 | } 45 | args := os.Args[1:] 46 | if len(args) != 4 { 47 | printHelp(os.Args[0], samples) 48 | return 49 | } 50 | subscription := args[0] 51 | region := args[1] 52 | file := args[2] 53 | sample := args[3] 54 | sampleFunction := samples[sample] 55 | if sampleFunction == nil { 56 | printHelp(os.Args[0], samples) 57 | return 58 | } 59 | sampleFunction(subscription, region, file) 60 | } 61 | -------------------------------------------------------------------------------- /samples/recognizer/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package recognizer provides samples of recognition from microphone or from an audio file 5 | package recognizer -------------------------------------------------------------------------------- /samples/recognizer/from_file.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package recognizer 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | "strings" 10 | 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/samples/helpers" 14 | ) 15 | 16 | func RecognizeOnceFromWavFile(subscription string, region string, file string) { 17 | audioConfig, err := audio.NewAudioConfigFromWavFileInput(file) 18 | if err != nil { 19 | fmt.Println("Got an error: ", err) 20 | return 21 | } 22 | defer audioConfig.Close() 23 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 24 | if err != nil { 25 | fmt.Println("Got an error: ", err) 26 | return 27 | } 28 | defer config.Close() 29 | speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) 30 | if err != nil { 31 | fmt.Println("Got an error: ", err) 32 | return 33 | } 34 | defer speechRecognizer.Close() 35 | speechRecognizer.SessionStarted(func(event speech.SessionEventArgs) { 36 | defer event.Close() 37 | fmt.Println("Session Started (ID=", event.SessionID, ")") 38 | }) 39 | speechRecognizer.SessionStopped(func(event speech.SessionEventArgs) { 40 | defer event.Close() 41 | fmt.Println("Session Stopped (ID=", event.SessionID, ")") 42 | }) 43 | task := speechRecognizer.RecognizeOnceAsync() 44 | var outcome speech.SpeechRecognitionOutcome 45 | select { 46 | case outcome = <-task: 47 | case <-time.After(5 * time.Second): 48 | fmt.Println("Timed out") 49 | return 50 | } 51 | defer outcome.Close() 52 | if outcome.Error != nil { 53 | fmt.Println("Got an error: ", outcome.Error) 54 | } 55 | fmt.Println("Got a recognition!") 56 | fmt.Println(outcome.Result.Text) 57 | } 58 | 59 | func RecognizeOnceFromCompressedFile(subscription string, region string, file string) { 60 | var containerFormat audio.AudioStreamContainerFormat 61 | if strings.Contains(file, ".mulaw") { 62 | containerFormat = audio.MULAW 63 | } else if strings.Contains(file, ".alaw") { 64 | containerFormat = audio.ALAW 65 | } else if strings.Contains(file, ".mp3") { 66 | containerFormat = audio.MP3 67 | } else if strings.Contains(file, ".flac") { 68 | containerFormat = audio.FLAC 69 | } else if strings.Contains(file, ".opus") { 70 | containerFormat = audio.OGGOPUS 71 | } else { 72 | containerFormat = audio.ANY 73 | } 74 | format, err := audio.GetCompressedFormat(containerFormat) 75 | if err != nil { 76 | fmt.Println("Got an error: ", err) 77 | return 78 | } 79 | defer format.Close() 80 | stream, err := audio.CreatePushAudioInputStreamFromFormat(format) 81 | if err != nil { 82 | fmt.Println("Got an error: ", err) 83 | return 84 | } 85 | defer stream.Close() 86 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 87 | if err != nil { 88 | fmt.Println("Got an error: ", err) 89 | return 90 | } 91 | defer audioConfig.Close() 92 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 93 | if err != nil { 94 | fmt.Println("Got an error: ", err) 95 | return 96 | } 97 | defer config.Close() 98 | speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) 99 | if err != nil { 100 | fmt.Println("Got an error: ", err) 101 | return 102 | } 103 | defer speechRecognizer.Close() 104 | speechRecognizer.SessionStarted(func(event speech.SessionEventArgs) { 105 | defer event.Close() 106 | fmt.Println("Session Started (ID=", event.SessionID, ")") 107 | }) 108 | speechRecognizer.SessionStopped(func(event speech.SessionEventArgs) { 109 | defer event.Close() 110 | fmt.Println("Session Stopped (ID=", event.SessionID, ")") 111 | }) 112 | helpers.PumpFileIntoStream(file, stream) 113 | task := speechRecognizer.RecognizeOnceAsync() 114 | var outcome speech.SpeechRecognitionOutcome 115 | select { 116 | case outcome = <-task: 117 | case <-time.After(40 * time.Second): 118 | fmt.Println("Timed out") 119 | return 120 | } 121 | defer outcome.Close() 122 | if outcome.Error != nil { 123 | fmt.Println("Got an error: ", outcome.Error) 124 | } 125 | fmt.Println("Got a recognition!") 126 | fmt.Println(outcome.Result.Text) 127 | } 128 | 129 | func RecognizeOnceFromALAWFile(subscription string, region string, file string) { 130 | var waveFormat audio.AudioStreamWaveFormat 131 | waveFormat = audio.WaveALAW 132 | format, err := audio.GetWaveFormat(8000, 16, 1, waveFormat) 133 | if err != nil { 134 | fmt.Println("Got an error: ", err) 135 | return 136 | } 137 | defer format.Close() 138 | stream, err := audio.CreatePushAudioInputStreamFromFormat(format) 139 | if err != nil { 140 | fmt.Println("Got an error: ", err) 141 | return 142 | } 143 | defer stream.Close() 144 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 145 | if err != nil { 146 | fmt.Println("Got an error: ", err) 147 | return 148 | } 149 | defer audioConfig.Close() 150 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 151 | if err != nil { 152 | fmt.Println("Got an error: ", err) 153 | return 154 | } 155 | defer config.Close() 156 | speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) 157 | if err != nil { 158 | fmt.Println("Got an error: ", err) 159 | return 160 | } 161 | defer speechRecognizer.Close() 162 | speechRecognizer.SessionStarted(func(event speech.SessionEventArgs) { 163 | defer event.Close() 164 | fmt.Println("Session Started (ID=", event.SessionID, ")") 165 | }) 166 | speechRecognizer.SessionStopped(func(event speech.SessionEventArgs) { 167 | defer event.Close() 168 | fmt.Println("Session Stopped (ID=", event.SessionID, ")") 169 | }) 170 | helpers.PumpFileIntoStream(file, stream) 171 | task := speechRecognizer.RecognizeOnceAsync() 172 | var outcome speech.SpeechRecognitionOutcome 173 | select { 174 | case outcome = <-task: 175 | case <-time.After(40 * time.Second): 176 | fmt.Println("Timed out") 177 | return 178 | } 179 | defer outcome.Close() 180 | if outcome.Error != nil { 181 | fmt.Println("Got an error: ", outcome.Error) 182 | } 183 | fmt.Println("Got a recognition!") 184 | fmt.Println(outcome.Result.Text) 185 | } -------------------------------------------------------------------------------- /samples/recognizer/from_microphone.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package recognizer 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "os" 10 | 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 13 | ) 14 | 15 | func sessionStartedHandler(event speech.SessionEventArgs) { 16 | defer event.Close() 17 | fmt.Println("Session Started (ID=", event.SessionID, ")") 18 | } 19 | 20 | func sessionStoppedHandler(event speech.SessionEventArgs) { 21 | defer event.Close() 22 | fmt.Println("Session Stopped (ID=", event.SessionID, ")") 23 | } 24 | 25 | func recognizingHandler(event speech.SpeechRecognitionEventArgs) { 26 | defer event.Close() 27 | fmt.Println("Recognizing:", event.Result.Text) 28 | } 29 | 30 | func recognizedHandler(event speech.SpeechRecognitionEventArgs) { 31 | defer event.Close() 32 | fmt.Println("Recognized:", event.Result.Text) 33 | } 34 | 35 | func cancelledHandler(event speech.SpeechRecognitionCanceledEventArgs) { 36 | defer event.Close() 37 | fmt.Println("Received a cancellation: ", event.ErrorDetails) 38 | } 39 | 40 | func ContinuousFromMicrophone(subscription string, region string, file string) { 41 | audioConfig, err := audio.NewAudioConfigFromDefaultMicrophoneInput() 42 | if err != nil { 43 | fmt.Println("Got an error: ", err) 44 | return 45 | } 46 | defer audioConfig.Close() 47 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 48 | if err != nil { 49 | fmt.Println("Got an error: ", err) 50 | return 51 | } 52 | defer config.Close() 53 | speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) 54 | if err != nil { 55 | fmt.Println("Got an error: ", err) 56 | return 57 | } 58 | defer speechRecognizer.Close() 59 | speechRecognizer.SessionStarted(sessionStartedHandler) 60 | speechRecognizer.SessionStopped(sessionStoppedHandler) 61 | speechRecognizer.Recognizing(recognizingHandler) 62 | speechRecognizer.Recognized(recognizedHandler) 63 | speechRecognizer.Canceled(cancelledHandler) 64 | speechRecognizer.StartContinuousRecognitionAsync() 65 | defer speechRecognizer.StopContinuousRecognitionAsync() 66 | bufio.NewReader(os.Stdin).ReadBytes('\n') 67 | } 68 | -------------------------------------------------------------------------------- /samples/recognizer/wrapper.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package recognizer 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "io" 10 | "os" 11 | "sync/atomic" 12 | "time" 13 | 14 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 15 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 16 | ) 17 | 18 | type SDKWrapperEventType int 19 | 20 | const ( 21 | Cancellation SDKWrapperEventType = iota 22 | Recognizing 23 | Recognized 24 | ) 25 | 26 | type SDKWrapperEvent struct { 27 | EventType SDKWrapperEventType 28 | Cancellation *speech.SpeechRecognitionCanceledEventArgs 29 | Recognized *speech.SpeechRecognitionEventArgs 30 | Recognizing *speech.SpeechRecognitionEventArgs 31 | } 32 | 33 | func (event *SDKWrapperEvent) Close() { 34 | if event.Cancellation != nil { 35 | event.Cancellation.Close() 36 | } 37 | if event.Recognizing != nil { 38 | event.Recognizing.Close() 39 | } 40 | if event.Recognized != nil { 41 | event.Recognized.Close() 42 | } 43 | } 44 | 45 | type SDKWrapper struct { 46 | stream *audio.PushAudioInputStream 47 | recognizer *speech.SpeechRecognizer 48 | started int32 49 | } 50 | 51 | func NewWrapper(subscription string, region string) (*SDKWrapper, error) { 52 | format, err := audio.GetDefaultInputFormat() 53 | if err != nil { 54 | return nil, err 55 | } 56 | defer format.Close() 57 | stream, err := audio.CreatePushAudioInputStreamFromFormat(format) 58 | if err != nil { 59 | return nil, err 60 | } 61 | audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) 62 | if err != nil { 63 | stream.Close() 64 | return nil, err 65 | } 66 | defer audioConfig.Close() 67 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 68 | if err != nil { 69 | stream.Close() 70 | return nil, err 71 | } 72 | defer config.Close() 73 | recognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) 74 | if err != nil { 75 | stream.Close() 76 | return nil, err 77 | } 78 | wrapper := new(SDKWrapper) 79 | wrapper.recognizer = recognizer 80 | wrapper.stream = stream 81 | return wrapper, nil 82 | } 83 | 84 | func (wrapper *SDKWrapper) Close() { 85 | wrapper.stream.CloseStream() 86 | <-wrapper.recognizer.StopContinuousRecognitionAsync() 87 | wrapper.stream.Close() 88 | wrapper.recognizer.Close() 89 | } 90 | 91 | func (wrapper *SDKWrapper) Write(buffer []byte) error { 92 | if atomic.LoadInt32(&wrapper.started) != 1 { 93 | return fmt.Errorf("Trying to write when recognizer is stopped") 94 | } 95 | return wrapper.stream.Write(buffer) 96 | } 97 | 98 | func (wrapper *SDKWrapper) StartContinuous(callback func(*SDKWrapperEvent)) error { 99 | if atomic.SwapInt32(&wrapper.started, 1) == 1 { 100 | return nil 101 | } 102 | wrapper.recognizer.Recognized(func(event speech.SpeechRecognitionEventArgs) { 103 | wrapperEvent := new(SDKWrapperEvent) 104 | wrapperEvent.EventType = Recognized 105 | wrapperEvent.Recognized = &event 106 | callback(wrapperEvent) 107 | }) 108 | wrapper.recognizer.Recognizing(func(event speech.SpeechRecognitionEventArgs) { 109 | wrapperEvent := new(SDKWrapperEvent) 110 | wrapperEvent.EventType = Recognizing 111 | wrapperEvent.Recognizing = &event 112 | callback(wrapperEvent) 113 | }) 114 | wrapper.recognizer.Canceled(func(event speech.SpeechRecognitionCanceledEventArgs) { 115 | wrapperEvent := new(SDKWrapperEvent) 116 | wrapperEvent.EventType = Cancellation 117 | wrapperEvent.Cancellation = &event 118 | callback(wrapperEvent) 119 | }) 120 | return <-wrapper.recognizer.StartContinuousRecognitionAsync() 121 | } 122 | 123 | func (wrapper *SDKWrapper) StopContinuous() error { 124 | if atomic.SwapInt32(&wrapper.started, 0) == 0 { 125 | return nil 126 | } 127 | var empty = []byte{} 128 | wrapper.stream.Write(empty) 129 | wrapper.recognizer.Recognized(nil) 130 | wrapper.recognizer.Recognizing(nil) 131 | wrapper.recognizer.Canceled(nil) 132 | return <-wrapper.recognizer.StopContinuousRecognitionAsync() 133 | } 134 | 135 | func PumpFileContinuously(stop chan int, filename string, wrapper *SDKWrapper) { 136 | file, err := os.Open(filename) 137 | if err != nil { 138 | fmt.Println("Error opening file: ", err) 139 | return 140 | } 141 | defer file.Close() 142 | reader := bufio.NewReader(file) 143 | buffer := make([]byte, 3200) 144 | for { 145 | select { 146 | case <-stop: 147 | fmt.Println("Stopping pump...") 148 | return 149 | case <-time.After(100 * time.Millisecond): 150 | } 151 | n, err := reader.Read(buffer) 152 | if err == io.EOF { 153 | file.Seek(44, io.SeekStart) 154 | continue 155 | } 156 | if err != nil { 157 | fmt.Println("Error reading file: ", err) 158 | break 159 | } 160 | err = wrapper.Write(buffer[0:n]) 161 | if err != nil { 162 | fmt.Println("Error writing to the stream") 163 | } 164 | } 165 | } 166 | 167 | func RecognizeContinuousUsingWrapper(subscription string, region string, file string) { 168 | /* If running this in a server, each worker thread should run something similar to this */ 169 | wrapper, err := NewWrapper(subscription, region) 170 | if err != nil { 171 | fmt.Println("Got an error: ", err) 172 | } 173 | defer wrapper.Close() 174 | stop := make(chan int) 175 | go PumpFileContinuously(stop, file, wrapper) 176 | fmt.Println("Starting Continuous...") 177 | wrapper.StartContinuous(func(event *SDKWrapperEvent) { 178 | defer event.Close() 179 | switch event.EventType { 180 | case Recognized: 181 | fmt.Println("Got a recognized event") 182 | case Recognizing: 183 | fmt.Println("Got a recognizing event") 184 | case Cancellation: 185 | fmt.Println("Got a cancellation event") 186 | } 187 | }) 188 | <-time.After(10 * time.Second) 189 | stop <- 1 190 | fmt.Println("Stopping Continuous...") 191 | wrapper.StopContinuous() 192 | fmt.Println("Exiting...") 193 | } 194 | -------------------------------------------------------------------------------- /samples/speaker_recognition/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package speaker_recognition provides samples of speaker recognition using voice profiles 5 | package speaker_recognition -------------------------------------------------------------------------------- /samples/speaker_recognition/independent_identification.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker_recognition 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speaker" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 14 | ) 15 | 16 | func GetNewVoiceProfileFromClient(client *speaker.VoiceProfileClient, expectedType common.VoiceProfileType) *speaker.VoiceProfile { 17 | future := client.CreateProfileAsync(expectedType, "en-US") 18 | outcome := <-future 19 | if outcome.Failed() { 20 | fmt.Println("Got an error creating profile: ", outcome.Error.Error()) 21 | return nil 22 | } 23 | profile := outcome.Profile 24 | _, err := profile.Id() 25 | if err != nil { 26 | fmt.Println("Unexpected error creating profile id: ", err) 27 | return nil 28 | } 29 | profileType, err := profile.Type(); 30 | if err != nil { 31 | fmt.Println("Unexpected error getting profile type: ", err) 32 | return nil 33 | } 34 | if profileType != expectedType { 35 | fmt.Println("Profile type does not match expected type") 36 | return nil 37 | } 38 | return profile 39 | } 40 | 41 | func EnrollProfile(client *speaker.VoiceProfileClient, profile *speaker.VoiceProfile, audioConfig *audio.AudioConfig) { 42 | enrollmentReason, currentReason := common.EnrollingVoiceProfile, common.EnrollingVoiceProfile 43 | var currentResult *speaker.VoiceProfileEnrollmentResult 44 | expectedEnrollmentCount := 1 45 | for currentReason == enrollmentReason { 46 | enrollFuture := client.EnrollProfileAsync(profile, audioConfig) 47 | enrollOutcome := <-enrollFuture 48 | if enrollOutcome.Failed() { 49 | fmt.Println("Got an error enrolling profile: ", enrollOutcome.Error.Error()) 50 | return 51 | } 52 | currentResult = enrollOutcome.Result 53 | currentReason = currentResult.Reason 54 | if currentResult.EnrollmentsCount != expectedEnrollmentCount { 55 | fmt.Println("Unexpected enrollments for profile: ", currentResult.RemainingEnrollmentsCount) 56 | } 57 | expectedEnrollmentCount += 1 58 | } 59 | if currentReason != common.EnrolledVoiceProfile { 60 | fmt.Println("Unexpected result enrolling profile: ", currentResult) 61 | } 62 | } 63 | 64 | func DeleteProfile(client *speaker.VoiceProfileClient, profile *speaker.VoiceProfile) { 65 | deleteFuture := client.DeleteProfileAsync(profile) 66 | deleteOutcome := <-deleteFuture 67 | if deleteOutcome.Failed() { 68 | fmt.Println("Got an error deleting profile: ", deleteOutcome.Error.Error()) 69 | return 70 | } 71 | result := deleteOutcome.Result 72 | if result.Reason != common.DeletedVoiceProfile { 73 | fmt.Println("Unexpected result deleting profile: ", result) 74 | } 75 | } 76 | 77 | func IndependentIdentification(subscription string, region string, file string) { 78 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 79 | if err != nil { 80 | fmt.Println("Got an error: ", err) 81 | return 82 | } 83 | defer config.Close() 84 | client, err := speaker.NewVoiceProfileClientFromConfig(config) 85 | if err != nil { 86 | fmt.Println("Got an error: ", err) 87 | return 88 | } 89 | defer client.Close() 90 | audioConfig, err := audio.NewAudioConfigFromWavFileInput(file) 91 | if err != nil { 92 | fmt.Println("Got an error: ", err) 93 | return 94 | } 95 | defer audioConfig.Close() 96 | <-time.After(10 * time.Second) 97 | expectedType := common.VoiceProfileType(1) 98 | 99 | profile := GetNewVoiceProfileFromClient(client, expectedType) 100 | if profile == nil { 101 | fmt.Println("Error creating profile") 102 | return 103 | } 104 | defer profile.Close() 105 | 106 | EnrollProfile(client, profile, audioConfig) 107 | 108 | profiles := []*speaker.VoiceProfile{profile} 109 | model, err := speaker.NewSpeakerIdentificationModelFromProfiles(profiles) 110 | if err != nil { 111 | fmt.Println("Error creating Identification model: ", err) 112 | } 113 | if model == nil { 114 | fmt.Println("Error creating Identification model: nil model") 115 | return 116 | } 117 | identifyAudioConfig, err := audio.NewAudioConfigFromWavFileInput(file) 118 | if err != nil { 119 | fmt.Println("Got an error: ", err) 120 | return 121 | } 122 | defer identifyAudioConfig.Close() 123 | speakerRecognizer, err := speaker.NewSpeakerRecognizerFromConfig(config, identifyAudioConfig) 124 | if err != nil { 125 | fmt.Println("Got an error: ", err) 126 | return 127 | } 128 | identifyFuture := speakerRecognizer.IdentifyOnceAsync(model) 129 | identifyOutcome := <-identifyFuture 130 | if identifyOutcome.Failed() { 131 | fmt.Println("Got an error identifying profile: ", identifyOutcome.Error.Error()) 132 | return 133 | } 134 | identifyResult := identifyOutcome.Result 135 | if identifyResult.Reason != common.RecognizedSpeakers { 136 | fmt.Println("Got an unexpected result identifying profile: ", identifyResult) 137 | } 138 | expectedID, _ := profile.Id() 139 | if identifyResult.ProfileID != expectedID { 140 | fmt.Println("Got an unexpected profile id identifying profile: ", identifyResult.ProfileID) 141 | } 142 | if identifyResult.Score < 1.0 { 143 | fmt.Println("Got an unexpected score identifying profile: ", identifyResult.Score) 144 | } 145 | 146 | DeleteProfile(client, profile) 147 | } 148 | -------------------------------------------------------------------------------- /samples/speaker_recognition/independent_verification.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker_recognition 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speaker" 12 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 14 | ) 15 | 16 | func IndependentVerification(subscription string, region string, file string) { 17 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 18 | if err != nil { 19 | fmt.Println("Got an error: ", err) 20 | return 21 | } 22 | defer config.Close() 23 | client, err := speaker.NewVoiceProfileClientFromConfig(config) 24 | if err != nil { 25 | fmt.Println("Got an error: ", err) 26 | return 27 | } 28 | defer client.Close() 29 | audioConfig, err := audio.NewAudioConfigFromWavFileInput(file) 30 | if err != nil { 31 | fmt.Println("Got an error: ", err) 32 | return 33 | } 34 | defer audioConfig.Close() 35 | <-time.After(10 * time.Second) 36 | expectedType := common.VoiceProfileType(3) 37 | 38 | profile := GetNewVoiceProfileFromClient(client, expectedType) 39 | if profile == nil { 40 | fmt.Println("Error creating profile") 41 | return 42 | } 43 | defer profile.Close() 44 | 45 | EnrollProfile(client, profile, audioConfig) 46 | 47 | model, err := speaker.NewSpeakerVerificationModelFromProfile(profile) 48 | if err != nil { 49 | fmt.Println("Error creating Verification model: ", err) 50 | } 51 | if model == nil { 52 | fmt.Println("Error creating Verification model: nil model") 53 | return 54 | } 55 | verifyAudioConfig, err := audio.NewAudioConfigFromWavFileInput(file) 56 | if err != nil { 57 | fmt.Println("Got an error: ", err) 58 | return 59 | } 60 | defer verifyAudioConfig.Close() 61 | speakerRecognizer, err := speaker.NewSpeakerRecognizerFromConfig(config, verifyAudioConfig) 62 | if err != nil { 63 | fmt.Println("Got an error: ", err) 64 | return 65 | } 66 | verifyFuture := speakerRecognizer.VerifyOnceAsync(model) 67 | verifyOutcome := <-verifyFuture 68 | if verifyOutcome.Failed() { 69 | fmt.Println("Got an error verifying profile: ", verifyOutcome.Error.Error()) 70 | return 71 | } 72 | verifyResult := verifyOutcome.Result 73 | if verifyResult.Reason != common.RecognizedSpeaker { 74 | fmt.Println("Got an unexpected result verifying profile: ", verifyResult) 75 | } 76 | expectedID, _ := profile.Id() 77 | if verifyResult.ProfileID != expectedID { 78 | fmt.Println("Got an unexpected profile id verifying profile: ", verifyResult.ProfileID) 79 | } 80 | if verifyResult.Score < 1.0 { 81 | fmt.Println("Got an unexpected score verifying profile: ", verifyResult.Score) 82 | } 83 | 84 | DeleteProfile(client, profile) 85 | } 86 | -------------------------------------------------------------------------------- /samples/synthesizer/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package synthesizer provides samples of text-to-speech 5 | package synthesizer 6 | -------------------------------------------------------------------------------- /samples/synthesizer/to_audio_data_stream.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package synthesizer 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "io" 10 | "os" 11 | "strings" 12 | "time" 13 | 14 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 15 | ) 16 | 17 | func SynthesisToAudioDataStream(subscription string, region string, file string) { 18 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 19 | if err != nil { 20 | fmt.Println("Got an error: ", err) 21 | return 22 | } 23 | defer config.Close() 24 | speechSynthesizer, err := speech.NewSpeechSynthesizerFromConfig(config, nil) 25 | if err != nil { 26 | fmt.Println("Got an error: ", err) 27 | return 28 | } 29 | defer speechSynthesizer.Close() 30 | 31 | speechSynthesizer.SynthesisStarted(synthesizeStartedHandler) 32 | speechSynthesizer.Synthesizing(synthesizingHandler) 33 | speechSynthesizer.SynthesisCompleted(synthesizedHandler) 34 | speechSynthesizer.SynthesisCanceled(cancelledHandler) 35 | 36 | for { 37 | fmt.Printf("Enter some text that you want to speak, or enter empty text to exit.\n> ") 38 | text, _ := bufio.NewReader(os.Stdin).ReadString('\n') 39 | text = strings.TrimSuffix(text, "\n") 40 | if len(text) == 0 { 41 | break 42 | } 43 | 44 | // StartSpeakingTextAsync sends the result to channel when the synthesis starts. 45 | task := speechSynthesizer.StartSpeakingTextAsync(text) 46 | var outcome speech.SpeechSynthesisOutcome 47 | select { 48 | case outcome = <-task: 49 | case <-time.After(60 * time.Second): 50 | fmt.Println("Timed out") 51 | return 52 | } 53 | defer outcome.Close() 54 | if outcome.Error != nil { 55 | fmt.Println("Got an error: ", outcome.Error) 56 | return 57 | } 58 | 59 | // in most case we want to streaming receive the audio to lower the latency, 60 | // we can use AudioDataStream to do so. 61 | stream, err := speech.NewAudioDataStreamFromSpeechSynthesisResult(outcome.Result) 62 | defer stream.Close() 63 | if err != nil { 64 | fmt.Println("Got an error: ", err) 65 | return 66 | } 67 | 68 | var all_audio []byte 69 | audio_chunk := make([]byte, 2048) 70 | for { 71 | n, err := stream.Read(audio_chunk) 72 | 73 | if err == io.EOF { 74 | break 75 | } 76 | 77 | all_audio = append(all_audio, audio_chunk[:n]...) 78 | } 79 | 80 | fmt.Printf("Read [%d] bytes from audio data stream.\n", len(all_audio)) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /samples/synthesizer/to_speaker.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package synthesizer 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "os" 10 | "strings" 11 | "time" 12 | 13 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 14 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 15 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 16 | ) 17 | 18 | func synthesizeStartedHandler(event speech.SpeechSynthesisEventArgs) { 19 | defer event.Close() 20 | fmt.Println("Synthesis started.") 21 | } 22 | 23 | func synthesizingHandler(event speech.SpeechSynthesisEventArgs) { 24 | defer event.Close() 25 | fmt.Printf("Synthesizing, audio chunk size %d.\n", len(event.Result.AudioData)) 26 | } 27 | 28 | func synthesizedHandler(event speech.SpeechSynthesisEventArgs) { 29 | defer event.Close() 30 | fmt.Printf("Synthesized, audio length %d.\n", len(event.Result.AudioData)) 31 | } 32 | 33 | func cancelledHandler(event speech.SpeechSynthesisEventArgs) { 34 | defer event.Close() 35 | fmt.Println("Received a cancellation.") 36 | } 37 | 38 | func SynthesisToSpeaker(subscription string, region string, file string) { 39 | audioConfig, err := audio.NewAudioConfigFromDefaultSpeakerOutput() 40 | if err != nil { 41 | fmt.Println("Got an error: ", err) 42 | return 43 | } 44 | defer audioConfig.Close() 45 | config, err := speech.NewSpeechConfigFromSubscription(subscription, region) 46 | if err != nil { 47 | fmt.Println("Got an error: ", err) 48 | return 49 | } 50 | defer config.Close() 51 | speechSynthesizer, err := speech.NewSpeechSynthesizerFromConfig(config, audioConfig) 52 | if err != nil { 53 | fmt.Println("Got an error: ", err) 54 | return 55 | } 56 | defer speechSynthesizer.Close() 57 | 58 | speechSynthesizer.SynthesisStarted(synthesizeStartedHandler) 59 | speechSynthesizer.Synthesizing(synthesizingHandler) 60 | speechSynthesizer.SynthesisCompleted(synthesizedHandler) 61 | speechSynthesizer.SynthesisCanceled(cancelledHandler) 62 | 63 | for { 64 | fmt.Printf("Enter some text that you want to speak, or enter empty text to exit.\n> ") 65 | text, _ := bufio.NewReader(os.Stdin).ReadString('\n') 66 | text = strings.TrimSuffix(text, "\n") 67 | if len(text) == 0 { 68 | break 69 | } 70 | 71 | task := speechSynthesizer.SpeakTextAsync(text) 72 | var outcome speech.SpeechSynthesisOutcome 73 | select { 74 | case outcome = <-task: 75 | case <-time.After(60 * time.Second): 76 | fmt.Println("Timed out") 77 | return 78 | } 79 | defer outcome.Close() 80 | if outcome.Error != nil { 81 | fmt.Println("Got an error: ", outcome.Error) 82 | return 83 | } 84 | 85 | if outcome.Result.Reason == common.SynthesizingAudioCompleted { 86 | fmt.Printf("Speech synthesized to speaker for text [%s].\n", text) 87 | } else { 88 | cancellation, _ := speech.NewCancellationDetailsFromSpeechSynthesisResult(outcome.Result) 89 | fmt.Printf("CANCELED: Reason=%v.\n", cancellation.Reason) 90 | 91 | if cancellation.Reason == common.Error { 92 | fmt.Printf("CANCELED: ErrorCode=%v\nCANCELED: ErrorDetails=[%s]\nCANCELED: Did you update the subscription info?\n", 93 | cancellation.ErrorCode, 94 | cancellation.ErrorDetails) 95 | } 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /speaker/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package speaker provides functionality for creating speaker recognition applications and managing the 5 | // related voice profiles 6 | package speaker -------------------------------------------------------------------------------- /speaker/interop_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | import "C" 14 | 15 | func uintptr2handle(h common.SPXHandle) C.SPXHANDLE { 16 | return (C.SPXHANDLE)(unsafe.Pointer(h)) //nolint:govet 17 | } 18 | 19 | func handle2uintptr(h C.SPXHANDLE) common.SPXHandle { 20 | return (common.SPXHandle)(unsafe.Pointer(h)) //nolint:govet 21 | } 22 | -------------------------------------------------------------------------------- /speaker/interop_utils_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | "testing" 9 | ) 10 | 11 | func TestHandleConversion(t *testing.T) { 12 | orig := common.SPXHandle(3) 13 | handle := uintptr2handle(orig) 14 | dest := handle2uintptr(handle) 15 | if orig != dest { 16 | t.Error("Values are not equal") 17 | } 18 | if uintptr2handle(dest) != handle { 19 | t.Error("Values are not equal") 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /speaker/speaker_identification_model.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | 8 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 9 | ) 10 | 11 | // #include 12 | // #include 13 | import "C" 14 | 15 | // SpeakerIdentificationModel is the class that defines a identification model to be used in speaker identification scenarios. 16 | type SpeakerIdentificationModel struct { 17 | handle C.SPXHANDLE 18 | } 19 | 20 | // newSpeakerIdentificationModelFromHandle creates a SpeakerIdentificationModel instance from a valid handle. This is for internal use only. 21 | func newSpeakerIdentificationModelFromHandle(handle common.SPXHandle) (*SpeakerIdentificationModel, error) { 22 | model := new(SpeakerIdentificationModel) 23 | model.handle = uintptr2handle(handle) 24 | return model, nil 25 | } 26 | 27 | // NewSpeakerIdentificationModelFromProfile creates an instance of the identification model using the given voice profiles. 28 | func NewSpeakerIdentificationModelFromProfiles(profiles []*VoiceProfile) (*SpeakerIdentificationModel, error) { 29 | var handle C.SPXHANDLE 30 | ret := uintptr(C.speaker_identification_model_create(&handle)) 31 | if ret != C.SPX_NOERROR { 32 | return nil, common.NewCarbonError(ret) 33 | } 34 | 35 | for _, profile := range profiles { 36 | profileHandle := profile.GetHandle() 37 | ret := uintptr(C.speaker_identification_model_add_profile(handle, uintptr2handle(profileHandle))) 38 | if ret != C.SPX_NOERROR { 39 | C.speaker_identification_model_release_handle(handle) 40 | return nil, common.NewCarbonError(ret) 41 | } 42 | } 43 | 44 | return newSpeakerIdentificationModelFromHandle(handle2uintptr(handle)) 45 | } 46 | 47 | // Close disposes the associated resources. 48 | func (model *SpeakerIdentificationModel) Close() { 49 | C.speaker_identification_model_release_handle(model.handle) 50 | } 51 | 52 | func (model *SpeakerIdentificationModel) GetHandle() common.SPXHandle { 53 | return handle2uintptr(model.handle) 54 | } 55 | -------------------------------------------------------------------------------- /speaker/speaker_recognition_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | "strconv" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | // 19 | import "C" 20 | 21 | // SpeakerRecognitionResult contains information about result from voice profile operations. 22 | type SpeakerRecognitionResult struct { 23 | handle C.SPXHANDLE 24 | 25 | // ResultID specifies the result identifier. 26 | ResultID string 27 | 28 | // Reason specifies status of speech synthesis result. 29 | Reason common.ResultReason 30 | 31 | // ProfileID specifies the recognized profile id. 32 | ProfileID string 33 | 34 | // Score specifies the confidence score for the recognized profile id. 35 | Score float64 36 | 37 | // ErrorDetails presents error details. 38 | ErrorDetails string 39 | 40 | // Collection of additional properties. 41 | Properties *common.PropertyCollection 42 | } 43 | 44 | // Close releases the underlying resources 45 | func (result SpeakerRecognitionResult) Close() { 46 | result.Properties.Close() 47 | C.recognizer_result_handle_release(result.handle) 48 | } 49 | 50 | // NewSpeakerRecognitionResultFromHandle creates a SpeakerRecognitionResult from a handle (for internal use) 51 | func NewSpeakerRecognitionResultFromHandle (handle common.SPXHandle) (*SpeakerRecognitionResult, error) { 52 | result := new(SpeakerRecognitionResult) 53 | result.handle = uintptr2handle(handle) 54 | buffer := C.malloc(C.sizeof_char * 1024) 55 | defer C.free(unsafe.Pointer(buffer)) 56 | /* ResultID */ 57 | ret := uintptr(C.result_get_result_id(result.handle, (*C.char)(buffer), 1024)) 58 | if ret != C.SPX_NOERROR { 59 | return nil, common.NewCarbonError(ret) 60 | } 61 | result.ResultID = C.GoString((*C.char)(buffer)) 62 | /* Reason */ 63 | var cReason C.Result_Reason 64 | ret = uintptr(C.result_get_reason(result.handle, &cReason)) 65 | if ret != C.SPX_NOERROR { 66 | return nil, common.NewCarbonError(ret) 67 | } 68 | result.Reason = (common.ResultReason)(cReason) 69 | /* Properties */ 70 | var propBagHandle C.SPXHANDLE 71 | ret = uintptr(C.result_get_property_bag(result.handle, &propBagHandle)) 72 | if ret != C.SPX_NOERROR { 73 | return nil, common.NewCarbonError(ret) 74 | } 75 | result.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 76 | result.ErrorDetails = result.Properties.GetProperty(common.CancellationDetailsReasonDetailedText, "") 77 | 78 | /* ProfileID */ 79 | result.ProfileID = result.Properties.GetPropertyByString("speakerrecognition.profileid", "") 80 | 81 | /* Score */ 82 | value := result.Properties.GetPropertyByString("speakerrecognition.score", "0.0") 83 | if value != "" { 84 | if floatVal, err := strconv.ParseFloat(value, 64); err == nil { 85 | result.Score = floatVal 86 | } 87 | } 88 | 89 | return result, nil 90 | } 91 | 92 | // SpeakerRecognitionOutcome is a wrapper type to be returned by operations returning SpeakerRecognitionResult and error 93 | type SpeakerRecognitionOutcome struct { 94 | common.OperationOutcome 95 | 96 | // Result is the result of the operation 97 | Result *SpeakerRecognitionResult 98 | } 99 | 100 | // Close releases the underlying resources 101 | func (outcome SpeakerRecognitionOutcome) Close() { 102 | if outcome.Result != nil { 103 | outcome.Result.Close() 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /speaker/speaker_recognizer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | 8 | "github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/speech" 11 | ) 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // 17 | import "C" 18 | 19 | // SpeakerRecognizer is the class for speaker recognizers. 20 | type SpeakerRecognizer struct { 21 | Properties *common.PropertyCollection 22 | handle C.SPXHANDLE 23 | } 24 | 25 | func newSpeakerRecognizerFromHandle(handle C.SPXHANDLE) (*SpeakerRecognizer, error) { 26 | var propBagHandle C.SPXHANDLE 27 | ret := uintptr(C.speaker_recognizer_get_property_bag(handle, &propBagHandle)) 28 | if ret != C.SPX_NOERROR { 29 | return nil, common.NewCarbonError(ret) 30 | } 31 | recognizer := new(SpeakerRecognizer) 32 | recognizer.handle = handle 33 | recognizer.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 34 | return recognizer, nil 35 | } 36 | 37 | // NewSpeakerRecognizerFromConfig creates a speaker recognizer from a speech config and audio config. 38 | func NewSpeakerRecognizerFromConfig(config *speech.SpeechConfig, audioConfig *audio.AudioConfig) (*SpeakerRecognizer, error) { 39 | var handle C.SPXHANDLE 40 | if config == nil { 41 | return nil, common.NewCarbonError(uintptr(C.SPXERR_INVALID_ARG)) 42 | } 43 | configHandle := config.GetHandle() 44 | var audioHandle C.SPXHANDLE 45 | if audioConfig == nil { 46 | audioHandle = nil 47 | } else { 48 | audioHandle = uintptr2handle(audioConfig.GetHandle()) 49 | } 50 | ret := uintptr(C.recognizer_create_speaker_recognizer_from_config(&handle, uintptr2handle(configHandle), audioHandle)) 51 | if ret != C.SPX_NOERROR { 52 | return nil, common.NewCarbonError(ret) 53 | } 54 | return newSpeakerRecognizerFromHandle(handle) 55 | } 56 | 57 | // VerifyOnceAsync starts speaker verification, and returns a score indicates whether the profile in the model is verified or not 58 | func (recognizer SpeakerRecognizer) VerifyOnceAsync(model *SpeakerVerificationModel) chan SpeakerRecognitionOutcome { 59 | outcome := make(chan SpeakerRecognitionOutcome) 60 | go func() { 61 | var handle C.SPXRESULTHANDLE 62 | modelHandle := uintptr2handle(model.GetHandle()) 63 | ret := uintptr(C.speaker_recognizer_verify(recognizer.handle, modelHandle, &handle)) 64 | if ret != C.SPX_NOERROR { 65 | outcome <- SpeakerRecognitionOutcome{Result: nil, OperationOutcome: common.OperationOutcome{common.NewCarbonError(ret)}} 66 | } else { 67 | result, err := NewSpeakerRecognitionResultFromHandle(handle2uintptr(handle)) 68 | outcome <- SpeakerRecognitionOutcome{Result: result, OperationOutcome: common.OperationOutcome{err}} 69 | } 70 | }() 71 | return outcome 72 | } 73 | 74 | // IdentifyOnceAsync starts speaker verification, and returns a score indicates whether the profile in the model is verified or not 75 | func (recognizer SpeakerRecognizer) IdentifyOnceAsync(model *SpeakerIdentificationModel) chan SpeakerRecognitionOutcome { 76 | outcome := make(chan SpeakerRecognitionOutcome) 77 | go func() { 78 | var handle C.SPXRESULTHANDLE 79 | modelHandle := uintptr2handle(model.GetHandle()) 80 | ret := uintptr(C.speaker_recognizer_identify(recognizer.handle, modelHandle, &handle)) 81 | if ret != C.SPX_NOERROR { 82 | outcome <- SpeakerRecognitionOutcome{Result: nil, OperationOutcome: common.OperationOutcome{common.NewCarbonError(ret)}} 83 | } else { 84 | result, err := NewSpeakerRecognitionResultFromHandle(handle2uintptr(handle)) 85 | outcome <- SpeakerRecognitionOutcome{Result: result, OperationOutcome: common.OperationOutcome{err}} 86 | } 87 | }() 88 | return outcome 89 | } 90 | 91 | // SetAuthorizationToken sets the authorization token that will be used for connecting to the service. 92 | // Note: The caller needs to ensure that the authorization token is valid. Before the authorization token 93 | // expires, the caller needs to refresh it by calling this setter with a new valid token. 94 | // Otherwise, the recognizer will encounter errors during recognition. 95 | func (recognizer SpeakerRecognizer) SetAuthorizationToken(token string) error { 96 | return recognizer.Properties.SetProperty(common.SpeechServiceAuthorizationToken, token) 97 | } 98 | 99 | // AuthorizationToken is the authorization token. 100 | func (recognizer SpeakerRecognizer) AuthorizationToken() string { 101 | return recognizer.Properties.GetProperty(common.SpeechServiceAuthorizationToken, "") 102 | } 103 | 104 | // Close disposes the associated resources. 105 | func (recognizer SpeakerRecognizer) Close() { 106 | recognizer.Properties.Close() 107 | C.speaker_recognizer_release_handle(recognizer.handle) 108 | } 109 | -------------------------------------------------------------------------------- /speaker/speaker_verification_model.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | 8 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 9 | ) 10 | 11 | // #include 12 | // #include 13 | import "C" 14 | 15 | // SpeakerVerificationModel is the class that defines a verification model to be used in speaker verification scenarios. 16 | type SpeakerVerificationModel struct { 17 | handle C.SPXHANDLE 18 | } 19 | 20 | // newSpeakerVerificationModelFromHandle creates a SpeakerVerificationModel instance from a valid handle. This is for internal use only. 21 | func newSpeakerVerificationModelFromHandle(handle common.SPXHandle) (*SpeakerVerificationModel, error) { 22 | model := new(SpeakerVerificationModel) 23 | model.handle = uintptr2handle(handle) 24 | return model, nil 25 | } 26 | 27 | // NewSpeakerVerificationModelFromProfile creates an instance of the verification model using the given voice profile. 28 | func NewSpeakerVerificationModelFromProfile(profile *VoiceProfile) (*SpeakerVerificationModel, error) { 29 | var handle C.SPXHANDLE 30 | profileHandle := profile.GetHandle() 31 | ret := uintptr(C.speaker_verification_model_create(&handle, uintptr2handle(profileHandle))) 32 | if ret != C.SPX_NOERROR { 33 | return nil, common.NewCarbonError(ret) 34 | } 35 | 36 | return newSpeakerVerificationModelFromHandle(handle2uintptr(handle)) 37 | } 38 | 39 | // Close disposes the associated resources. 40 | func (model *SpeakerVerificationModel) Close() { 41 | C.speaker_verification_model_release_handle(model.handle) 42 | } 43 | 44 | func (model *SpeakerVerificationModel) GetHandle() common.SPXHandle { 45 | return handle2uintptr(model.handle) 46 | } 47 | -------------------------------------------------------------------------------- /speaker/voice_profile.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | import "C" 15 | 16 | // VoiceProfile is the class that defines voice profiles used in speaker recognition scenarios. 17 | type VoiceProfile struct { 18 | handle C.SPXHANDLE 19 | } 20 | 21 | // newVoiceProfileFromHandle creates a VoiceProfile instance from a valid handle. This is for internal use only. 22 | func newVoiceProfileFromHandle(handle common.SPXHandle) (*VoiceProfile, error) { 23 | profile := new(VoiceProfile) 24 | profile.handle = uintptr2handle(handle) 25 | return profile, nil 26 | } 27 | 28 | // NewVoiceProfileFromIdAndType creates an instance of the voice profile with specified id and type. 29 | //nolint:revive 30 | func NewVoiceProfileFromIdAndType(id string, profileType common.VoiceProfileType) (*VoiceProfile, error) { 31 | var handle C.SPXHANDLE 32 | profileID := C.CString(id) 33 | defer C.free(unsafe.Pointer(profileID)) 34 | ret := uintptr(C.create_voice_profile_from_id_and_type(&handle, profileID, (C.int)(profileType))) 35 | if ret != C.SPX_NOERROR { 36 | return nil, common.NewCarbonError(ret) 37 | } 38 | 39 | return newVoiceProfileFromHandle(handle2uintptr(handle)) 40 | } 41 | 42 | // Return the id of the given voice profile 43 | //nolint:revive 44 | func (profile *VoiceProfile) Id() (string, error) { 45 | var sz C.uint32_t 46 | ret := uintptr(C.voice_profile_get_id(profile.handle, nil, &sz)) 47 | if ret != C.SPX_NOERROR { 48 | return "", common.NewCarbonError(ret) 49 | } 50 | buffer := C.malloc(C.sizeof_char * (C.size_t)(sz)) 51 | defer C.free(unsafe.Pointer(buffer)) 52 | ret = uintptr(C.voice_profile_get_id(profile.handle, (*C.char)(buffer), &sz)) 53 | if ret != C.SPX_NOERROR { 54 | return "", common.NewCarbonError(ret) 55 | } 56 | id := C.GoString((*C.char)(buffer)) 57 | return id, nil 58 | } 59 | 60 | // Return the type of the given voice profile 61 | func (profile *VoiceProfile) Type() (common.VoiceProfileType, error) { 62 | var profileType C.int 63 | ret := uintptr(C.voice_profile_get_type(profile.handle, &profileType)) 64 | if ret != C.SPX_NOERROR { 65 | return common.VoiceProfileType(1), common.NewCarbonError(ret) 66 | } 67 | return common.VoiceProfileType(profileType), nil 68 | } 69 | 70 | // Close disposes the associated resources. 71 | func (profile *VoiceProfile) Close() { 72 | C.voice_profile_release_handle(profile.handle) 73 | } 74 | 75 | func (profile *VoiceProfile) GetHandle() common.SPXHandle { 76 | return handle2uintptr(profile.handle) 77 | } 78 | -------------------------------------------------------------------------------- /speaker/voice_profile_cancellation_details.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // 13 | import "C" 14 | 15 | // VoiceProfileCancellationDetails contains detailed information about why a result was canceled. 16 | // Added in version 1.21.0 17 | type VoiceProfileCancellationDetails struct { 18 | Reason common.CancellationReason 19 | ErrorCode common.CancellationErrorCode 20 | ErrorDetails string 21 | } 22 | 23 | // NewCancellationDetailsFromVoiceProfileResult creates the object from the speech synthesis result. 24 | func NewCancellationDetailsFromVoiceProfileResult(result *VoiceProfileResult) (*VoiceProfileCancellationDetails, error) { 25 | cancellationDetails := new(VoiceProfileCancellationDetails) 26 | /* Reason */ 27 | var cReason C.Result_CancellationReason 28 | ret := uintptr(C.result_get_reason_canceled(result.handle, &cReason)) 29 | if ret != C.SPX_NOERROR { 30 | return nil, common.NewCarbonError(ret) 31 | } 32 | cancellationDetails.Reason = (common.CancellationReason)(cReason) 33 | /* ErrorCode */ 34 | var cCode C.Result_CancellationErrorCode 35 | ret = uintptr(C.result_get_canceled_error_code(result.handle, &cCode)) 36 | if ret != C.SPX_NOERROR { 37 | return nil, common.NewCarbonError(ret) 38 | } 39 | cancellationDetails.ErrorCode = (common.CancellationErrorCode)(cCode) 40 | cancellationDetails.ErrorDetails = result.Properties.GetProperty(common.CancellationDetailsReasonDetailedText, "") 41 | return cancellationDetails, nil 42 | } 43 | -------------------------------------------------------------------------------- /speaker/voice_profile_enrollment_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | "math/big" 8 | "unsafe" 9 | "strconv" 10 | 11 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 12 | ) 13 | 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | // #include 19 | // 20 | import "C" 21 | 22 | // VoiceProfileEnrollmentResult contains information about result from voice profile operations. 23 | type VoiceProfileEnrollmentResult struct { 24 | handle C.SPXHANDLE 25 | 26 | // ResultID specifies the result identifier. 27 | ResultID string 28 | 29 | // ProfileID specifies the profile ID of the profile being enrolled. 30 | ProfileID string 31 | 32 | // EnrollmentsCount specifies the number of successful enrollments for the profile 33 | EnrollmentsCount int 34 | 35 | // RemainingEnrollmentsCount specifies the number of successful enrollments remaining until profile is enrolled 36 | RemainingEnrollmentsCount int 37 | 38 | // EnrollmentsLength specifies in hundreds of nanoseconds the audio length registered enrolling the profile 39 | EnrollmentsLength big.Int 40 | 41 | // RemainingEnrollmentsLength specifies the amount of pure speech (which is the amount of audio after removing silence and non-speech segments) needed to complete profile enrollment in hundred nanoseconds. 42 | RemainingEnrollmentsLength big.Int 43 | 44 | // CreatedTime specifies the created time of the voice profile. 45 | CreatedTime string 46 | 47 | // LastUpdatedDateTime specifies the last updated time of the voice profile. 48 | LastUpdatedTime string 49 | 50 | // Reason specifies status of speech synthesis result. 51 | Reason common.ResultReason 52 | 53 | // ErrorDetails presents error details. 54 | ErrorDetails string 55 | 56 | // Collection of additional properties. 57 | Properties *common.PropertyCollection 58 | } 59 | 60 | // Close releases the underlying resources 61 | func (result VoiceProfileEnrollmentResult) Close() { 62 | result.Properties.Close() 63 | C.recognizer_result_handle_release(result.handle) 64 | } 65 | 66 | // newVoiceProfileEnrollmentResultFromHandle creates a VoiceProfileEnrollmentResult from a handle (for internal use) 67 | func newVoiceProfileEnrollmentResultFromHandle (handle common.SPXHandle) (*VoiceProfileEnrollmentResult, error) { 68 | result := new(VoiceProfileEnrollmentResult) 69 | result.handle = uintptr2handle(handle) 70 | buffer := C.malloc(C.sizeof_char * 1024) 71 | defer C.free(unsafe.Pointer(buffer)) 72 | /* ResultID */ 73 | ret := uintptr(C.result_get_result_id(result.handle, (*C.char)(buffer), 1024)) 74 | if ret != C.SPX_NOERROR { 75 | return nil, common.NewCarbonError(ret) 76 | } 77 | result.ResultID = C.GoString((*C.char)(buffer)) 78 | /* Reason */ 79 | var cReason C.Result_Reason 80 | ret = uintptr(C.result_get_reason(result.handle, &cReason)) 81 | if ret != C.SPX_NOERROR { 82 | return nil, common.NewCarbonError(ret) 83 | } 84 | result.Reason = (common.ResultReason)(cReason) 85 | /* Properties */ 86 | var propBagHandle C.SPXHANDLE 87 | ret = uintptr(C.result_get_property_bag(result.handle, &propBagHandle)) 88 | if ret != C.SPX_NOERROR { 89 | return nil, common.NewCarbonError(ret) 90 | } 91 | result.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 92 | result.ErrorDetails = result.Properties.GetProperty(common.CancellationDetailsReasonDetailedText, "") 93 | 94 | /* ProfileID */ 95 | result.ProfileID = result.Properties.GetPropertyByString("enrollment.profileId", "") 96 | 97 | /* EnrollmentsCount */ 98 | value := result.Properties.GetPropertyByString("enrollment.enrollmentsCount", "") 99 | if value != "" { 100 | if intVal, err := strconv.Atoi(value); err == nil { 101 | result.EnrollmentsCount = intVal 102 | } 103 | } 104 | 105 | /* RemainingEnrollmentsCount */ 106 | value = result.Properties.GetPropertyByString("enrollment.remainingEnrollmentsCount", "") 107 | if value != "" { 108 | if intVal, err := strconv.Atoi(value); err == nil { 109 | result.RemainingEnrollmentsCount = intVal 110 | } 111 | } 112 | 113 | /* EnrollmentsLength */ 114 | value = result.Properties.GetPropertyByString("enrollment.enrollmentsLengthInSec", "") 115 | if value != "" { 116 | bigIntVal := new(big.Int) 117 | if bigIntVal, ok := bigIntVal.SetString(value, 10); ok { 118 | result.EnrollmentsLength = *bigIntVal 119 | } 120 | } 121 | 122 | /* RemainingEnrollmentsLength */ 123 | value = result.Properties.GetPropertyByString("enrollment.remainingEnrollmentsLengthInSec", "") 124 | if value != "" { 125 | bigIntVal := new(big.Int) 126 | if bigIntVal, ok := bigIntVal.SetString(value, 10); ok { 127 | result.RemainingEnrollmentsLength = *bigIntVal 128 | } 129 | } 130 | 131 | /* CreatedTime */ 132 | result.CreatedTime = result.Properties.GetPropertyByString("enrollment.createdDateTime", "") 133 | 134 | /* LastUpdatedTime */ 135 | result.LastUpdatedTime = result.Properties.GetPropertyByString("enrollment.lastUpdatedDateTime", "") 136 | 137 | return result, nil 138 | } 139 | 140 | // VoiceProfileEnrollmentOutcome is a wrapper type to be returned by operations returning VoiceProfileEnrollmentResult and error 141 | type VoiceProfileEnrollmentOutcome struct { 142 | common.OperationOutcome 143 | 144 | // Result is the result of the operation 145 | Result *VoiceProfileEnrollmentResult 146 | } 147 | 148 | // Close releases the underlying resources 149 | func (outcome VoiceProfileEnrollmentOutcome) Close() { 150 | if outcome.Result != nil { 151 | outcome.Result.Close() 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /speaker/voice_profile_phrase_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | "strings" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | // 19 | import "C" 20 | 21 | // VoiceProfilePhraseResult contains activation phrases needed to successfully enroll a voice profile. 22 | type VoiceProfilePhraseResult struct { 23 | handle C.SPXHANDLE 24 | 25 | // Activation phrases for voice profile enrollment 26 | Phrases []string 27 | 28 | // ResultID specifies the result identifier. 29 | ResultID string 30 | 31 | // Reason specifies status of speech synthesis result. 32 | Reason common.ResultReason 33 | 34 | // ErrorDetails presents error details. 35 | ErrorDetails string 36 | 37 | // Collection of additional properties. 38 | Properties *common.PropertyCollection 39 | } 40 | 41 | // Close releases the underlying resources 42 | func (result VoiceProfilePhraseResult) Close() { 43 | result.Properties.Close() 44 | C.recognizer_result_handle_release(result.handle) 45 | } 46 | 47 | // newVoiceProfilePhraseResultFromHandle creates a VoiceProfilePhraseResult from a handle (for internal use) 48 | func newVoiceProfilePhraseResultFromHandle (handle common.SPXHandle) (*VoiceProfilePhraseResult, error) { 49 | result := new(VoiceProfilePhraseResult) 50 | result.handle = uintptr2handle(handle) 51 | buffer := C.malloc(C.sizeof_char * 1024) 52 | defer C.free(unsafe.Pointer(buffer)) 53 | /* ResultID */ 54 | ret := uintptr(C.result_get_result_id(result.handle, (*C.char)(buffer), 1024)) 55 | if ret != C.SPX_NOERROR { 56 | return nil, common.NewCarbonError(ret) 57 | } 58 | result.ResultID = C.GoString((*C.char)(buffer)) 59 | /* Reason */ 60 | var cReason C.Result_Reason 61 | ret = uintptr(C.result_get_reason(result.handle, &cReason)) 62 | if ret != C.SPX_NOERROR { 63 | return nil, common.NewCarbonError(ret) 64 | } 65 | result.Reason = (common.ResultReason)(cReason) 66 | /* Properties */ 67 | var propBagHandle C.SPXHANDLE 68 | ret = uintptr(C.result_get_property_bag(result.handle, &propBagHandle)) 69 | if ret != C.SPX_NOERROR { 70 | return nil, common.NewCarbonError(ret) 71 | } 72 | result.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 73 | result.ErrorDetails = result.Properties.GetProperty(common.CancellationDetailsReasonDetailedText, "") 74 | 75 | /* Phrases */ 76 | phrasesString := result.Properties.GetPropertyByString("speakerrecognition.phrases", "") 77 | if len(phrasesString) > 0 { 78 | result.Phrases = strings.Split(phrasesString, "|") 79 | } 80 | return result, nil 81 | } 82 | 83 | // VoiceProfilePhraseOutcome is a wrapper type to be returned by operations returning VoiceProfilePhraseResult and error 84 | type VoiceProfilePhraseOutcome struct { 85 | common.OperationOutcome 86 | 87 | // Result is the result of the operation 88 | Result *VoiceProfilePhraseResult 89 | } 90 | 91 | // Close releases the underlying resources 92 | func (outcome VoiceProfilePhraseOutcome) Close() { 93 | if outcome.Result != nil { 94 | outcome.Result.Close() 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /speaker/voice_profile_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // 18 | import "C" 19 | 20 | // VoiceProfileResult contains information about result from voice profile operations. 21 | type VoiceProfileResult struct { 22 | handle C.SPXHANDLE 23 | 24 | // ResultID specifies the result identifier. 25 | ResultID string 26 | 27 | // Reason specifies status of speech synthesis result. 28 | Reason common.ResultReason 29 | 30 | // ErrorDetails presents error details. 31 | ErrorDetails string 32 | 33 | // Collection of additional properties. 34 | Properties *common.PropertyCollection 35 | } 36 | 37 | // Close releases the underlying resources 38 | func (result VoiceProfileResult) Close() { 39 | result.Properties.Close() 40 | C.recognizer_result_handle_release(result.handle) 41 | } 42 | 43 | // newVoiceProfileResultFromHandle creates a VoiceProfileResult from a handle (for internal use) 44 | func newVoiceProfileResultFromHandle (handle common.SPXHandle) (*VoiceProfileResult, error) { 45 | result := new(VoiceProfileResult) 46 | result.handle = uintptr2handle(handle) 47 | buffer := C.malloc(C.sizeof_char * 1024) 48 | defer C.free(unsafe.Pointer(buffer)) 49 | /* ResultID */ 50 | ret := uintptr(C.result_get_result_id(result.handle, (*C.char)(buffer), 1024)) 51 | if ret != C.SPX_NOERROR { 52 | return nil, common.NewCarbonError(ret) 53 | } 54 | result.ResultID = C.GoString((*C.char)(buffer)) 55 | /* Reason */ 56 | var cReason C.Result_Reason 57 | ret = uintptr(C.result_get_reason(result.handle, &cReason)) 58 | if ret != C.SPX_NOERROR { 59 | return nil, common.NewCarbonError(ret) 60 | } 61 | result.Reason = (common.ResultReason)(cReason) 62 | /* Properties */ 63 | var propBagHandle C.SPXHANDLE 64 | ret = uintptr(C.result_get_property_bag(result.handle, &propBagHandle)) 65 | if ret != C.SPX_NOERROR { 66 | return nil, common.NewCarbonError(ret) 67 | } 68 | result.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 69 | result.ErrorDetails = result.Properties.GetProperty(common.CancellationDetailsReasonDetailedText, "") 70 | return result, nil 71 | } 72 | 73 | // VoiceProfileOutcome is a wrapper type to be returned by operations returning VoiceProfileResult and error 74 | type VoiceProfileOutcome struct { 75 | common.OperationOutcome 76 | 77 | // Result is the result of the operation 78 | Result *VoiceProfileResult 79 | } 80 | 81 | // Close releases the underlying resources 82 | func (outcome VoiceProfileOutcome) Close() { 83 | if outcome.Result != nil { 84 | outcome.Result.Close() 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /speaker/voice_profile_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speaker 5 | 6 | import ( 7 | // "fmt" 8 | "testing" 9 | //"github.com/Microsoft/cognitive-services-speech-sdk-go/audio" 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | func TestNewVoiceProfile(t *testing.T) { 14 | id := "12345678-abcd-abcd-abcd-12345678abcd" 15 | profileType := common.VoiceProfileType(3) 16 | profile, err := NewVoiceProfileFromIdAndType(id, profileType) 17 | if err != nil { 18 | t.Error("Unexpected error") 19 | } 20 | defer profile.Close() 21 | profileId, err := profile.Id() 22 | if err != nil { 23 | t.Error("id not properly set") 24 | } else if profileId != id { 25 | t.Error("id does not match original") 26 | } 27 | profType, err := profile.Type() 28 | if err != nil { 29 | t.Error("type not properly set") 30 | } else if profType != profileType { 31 | t.Error("Voice Profile Type not properly set") 32 | } 33 | } -------------------------------------------------------------------------------- /speech/audio_data_stream.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "io" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | // 19 | import "C" 20 | 21 | // AudioDataStream represents audio data stream used for operating audio data as a stream. 22 | // Added in version 1.17.0 23 | type AudioDataStream struct { 24 | handle C.SPXHANDLE 25 | 26 | // Properties represents the collection of additional properties. 27 | Properties *common.PropertyCollection 28 | } 29 | 30 | // Close disposes the associated resources. 31 | func (stream AudioDataStream) Close() { 32 | stream.Properties.Close() 33 | C.audio_data_stream_release(stream.handle) 34 | } 35 | 36 | // NewAudioDataStreamFromHandle creates a new AudioDataStream from a handle (for internal use) 37 | func NewAudioDataStreamFromHandle(handle common.SPXHandle) (*AudioDataStream, error) { 38 | stream := new(AudioDataStream) 39 | stream.handle = uintptr2handle(handle) 40 | /* Properties */ 41 | var propBagHandle C.SPXHANDLE 42 | ret := uintptr(C.audio_data_stream_get_property_bag(uintptr2handle(handle), &propBagHandle)) 43 | if ret != C.SPX_NOERROR { 44 | return nil, common.NewCarbonError(ret) 45 | } 46 | stream.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 47 | return stream, nil 48 | } 49 | 50 | // NewAudioDataStreamFromWavFileInput creates a memory backed AudioDataStream for the specified audio input file. 51 | func NewAudioDataStreamFromWavFileInput(filename string) (*AudioDataStream, error) { 52 | var handle C.SPXHANDLE 53 | fn := C.CString(filename) 54 | defer C.free(unsafe.Pointer(fn)) 55 | ret := uintptr(C.audio_data_stream_create_from_file(&handle, fn)) 56 | if ret != C.SPX_NOERROR { 57 | return nil, common.NewCarbonError(ret) 58 | } 59 | return NewAudioDataStreamFromHandle(handle2uintptr(handle)) 60 | } 61 | 62 | // NewAudioDataStreamFromSpeechSynthesisResult creates a memory backed AudioDataStream from given speech synthesis result. 63 | func NewAudioDataStreamFromSpeechSynthesisResult(result *SpeechSynthesisResult) (*AudioDataStream, error) { 64 | var handle C.SPXHANDLE 65 | ret := uintptr(C.audio_data_stream_create_from_result(&handle, result.handle)) 66 | if ret != C.SPX_NOERROR { 67 | return nil, common.NewCarbonError(ret) 68 | } 69 | return NewAudioDataStreamFromHandle(handle2uintptr(handle)) 70 | } 71 | 72 | // GetStatus gets the current status of the audio data stream. 73 | func (stream AudioDataStream) GetStatus() (common.StreamStatus, error) { 74 | var cStatus C.Stream_Status 75 | ret := uintptr(C.audio_data_stream_get_status(stream.handle, &cStatus)) 76 | if ret != C.SPX_NOERROR { 77 | return common.StreamStatusUnknown, common.NewCarbonError(ret) 78 | } 79 | return (common.StreamStatus)(cStatus), nil 80 | } 81 | 82 | // CanReadData checks whether the stream has enough data to be read. 83 | func (stream AudioDataStream) CanReadData(bytesRequested uint) bool { 84 | return (bool)(C.audio_data_stream_can_read_data(stream.handle, (C.uint32_t)(bytesRequested))) 85 | } 86 | 87 | // CanReadDataAt checks whether the stream has enough data to be read, at the specified offset. 88 | func (stream AudioDataStream) CanReadDataAt(bytesRequested uint, off int64) bool { 89 | return (bool)(C.audio_data_stream_can_read_data_from_position(stream.handle, (C.uint32_t)(bytesRequested), (C.uint32_t)(off))) 90 | } 91 | 92 | // Read reads a chunk of the audio data stream and fill it to given buffer. 93 | // It returns size of data filled to the buffer and any write error encountered. 94 | func (stream AudioDataStream) Read(buffer []byte) (int, error) { 95 | if len(buffer) == 0 { 96 | return 0, common.NewCarbonError(0x005) // SPXERR_INVALID_ARG 97 | } 98 | var outSize C.uint32_t 99 | ret := uintptr(C.audio_data_stream_read(stream.handle, (*C.uint8_t)(unsafe.Pointer(&buffer[0])), (C.uint32_t)(len(buffer)), &outSize)) 100 | if ret != C.SPX_NOERROR { 101 | return 0, common.NewCarbonError(ret) 102 | } 103 | if outSize == 0 { 104 | return 0, io.EOF 105 | } 106 | return (int)(outSize), nil 107 | } 108 | 109 | // ReadAt reads a chunk of the audio data stream and fill it to given buffer, at specified offset. 110 | // It returns size of data filled to the buffer and any write error encountered. 111 | func (stream AudioDataStream) ReadAt(buffer []byte, off int64) (int, error) { 112 | if len(buffer) == 0 { 113 | return 0, common.NewCarbonError(0x005) // SPXERR_INVALID_ARG 114 | } 115 | var outSize C.uint32_t 116 | ret := uintptr(C.audio_data_stream_read_from_position(stream.handle, (*C.uint8_t)(unsafe.Pointer(&buffer[0])), (C.uint32_t)(len(buffer)), (C.uint32_t)(off), &outSize)) 117 | if ret != C.SPX_NOERROR { 118 | return 0, common.NewCarbonError(ret) 119 | } 120 | if outSize == 0 { 121 | return 0, io.EOF 122 | } 123 | return (int)(outSize), nil 124 | } 125 | 126 | // SaveToWavFileAsync saves the audio data to a file, asynchronously. 127 | func (stream AudioDataStream) SaveToWavFileAsync(filename string) chan error { 128 | outcome := make(chan error) 129 | go func() { 130 | fn := C.CString(filename) 131 | defer C.free(unsafe.Pointer(fn)) 132 | ret := uintptr(C.audio_data_stream_save_to_wave_file(stream.handle, fn)) 133 | if ret != C.SPX_NOERROR { 134 | outcome <- common.NewCarbonError(ret) 135 | } else { 136 | outcome <- nil 137 | } 138 | }() 139 | return outcome 140 | } 141 | 142 | // GetOffset gets current offset of the audio data stream. 143 | func (stream AudioDataStream) GetOffset() (int, error) { 144 | var position C.uint32_t 145 | ret := uintptr(C.audio_data_stream_get_position(stream.handle, &position)) 146 | if ret != C.SPX_NOERROR { 147 | return 0, common.NewCarbonError(ret) 148 | } 149 | return (int)(position), nil 150 | } 151 | 152 | // SetOffset sets current offset of the audio data stream. 153 | func (stream AudioDataStream) SetOffset(offset int) error { 154 | ret := uintptr(C.audio_data_stream_set_position(stream.handle, (C.uint32_t)(offset))) 155 | if ret != C.SPX_NOERROR { 156 | return common.NewCarbonError(ret) 157 | } 158 | return nil 159 | } 160 | -------------------------------------------------------------------------------- /speech/auto_detect_source_language_config.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "strings" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | import "C" 16 | 17 | // AutoDetectSourceLanguageConfig defines auto detection source configuration 18 | type AutoDetectSourceLanguageConfig struct { 19 | handle C.SPXHANDLE 20 | properties *common.PropertyCollection 21 | } 22 | 23 | func newAutoDetectSourceLanguageConfigFromHandle(handle C.SPXHANDLE) (*AutoDetectSourceLanguageConfig, error) { 24 | var propBagHandle C.SPXHANDLE 25 | ret := uintptr(C.auto_detect_source_lang_config_get_property_bag(handle, &propBagHandle)) 26 | if ret != C.SPX_NOERROR { 27 | C.auto_detect_source_lang_config_release(handle) 28 | return nil, common.NewCarbonError(ret) 29 | } 30 | config := new(AutoDetectSourceLanguageConfig) 31 | config.handle = handle 32 | config.properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 33 | return config, nil 34 | } 35 | 36 | // NewAutoDetectSourceLanguageConfigFromOpenRange creates an instance of the AutoDetectSourceLanguageConfig with open range as source languages 37 | func NewAutoDetectSourceLanguageConfigFromOpenRange() (*AutoDetectSourceLanguageConfig, error) { 38 | var handle C.SPXHANDLE 39 | ret := uintptr(C.create_auto_detect_source_lang_config_from_open_range(&handle)) 40 | if ret != C.SPX_NOERROR { 41 | return nil, common.NewCarbonError(ret) 42 | } 43 | return newAutoDetectSourceLanguageConfigFromHandle(handle) 44 | } 45 | 46 | // NewAutoDetectSourceLanguageConfigFromLanguages creates an instance of the AutoDetectSourceLanguageConfig with source languages 47 | func NewAutoDetectSourceLanguageConfigFromLanguages(languages []string) (*AutoDetectSourceLanguageConfig, error) { 48 | var handle C.SPXHANDLE 49 | languageStr := strings.Join(languages, ",") 50 | languageCStr := C.CString(languageStr) 51 | defer C.free(unsafe.Pointer(languageCStr)) 52 | ret := uintptr(C.create_auto_detect_source_lang_config_from_languages(&handle, languageCStr)) 53 | if ret != C.SPX_NOERROR { 54 | return nil, common.NewCarbonError(ret) 55 | } 56 | return newAutoDetectSourceLanguageConfigFromHandle(handle) 57 | } 58 | 59 | // NewAutoDetectSourceLanguageConfigFromLanguageConfigs creates an instance of the AutoDetectSourceLanguageConfig with a list of source language config 60 | func NewAutoDetectSourceLanguageConfigFromLanguageConfigs(configs []*SourceLanguageConfig) (*AutoDetectSourceLanguageConfig, error) { 61 | if len(configs) == 0 { 62 | return nil, common.NewCarbonError(C.SPXERR_INVALID_ARG) 63 | } 64 | var handle C.SPXHANDLE 65 | var ret uintptr 66 | first := true 67 | for i := 0; i < len(configs); i++ { 68 | c := configs[i] 69 | if c == nil { 70 | if !first { 71 | C.auto_detect_source_lang_config_release(handle) 72 | } 73 | return nil, common.NewCarbonError(C.SPXERR_INVALID_ARG) 74 | } 75 | if first { 76 | ret = uintptr(C.create_auto_detect_source_lang_config_from_source_lang_config(&handle, c.getHandle())) 77 | first = false 78 | if ret != C.SPX_NOERROR { 79 | return nil, common.NewCarbonError(ret) 80 | } 81 | } else { 82 | ret = uintptr(C.add_source_lang_config_to_auto_detect_source_lang_config(handle, c.getHandle())) 83 | if ret != C.SPX_NOERROR { 84 | return nil, common.NewCarbonError(ret) 85 | } 86 | } 87 | } 88 | return newAutoDetectSourceLanguageConfigFromHandle(handle) 89 | } 90 | 91 | func (config AutoDetectSourceLanguageConfig) getHandle() C.SPXHANDLE { 92 | return config.handle 93 | } 94 | 95 | // Close performs cleanup of resources. 96 | func (config AutoDetectSourceLanguageConfig) Close() { 97 | config.properties.Close() 98 | C.auto_detect_source_lang_config_release(config.handle) 99 | } 100 | -------------------------------------------------------------------------------- /speech/cancellation_details.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // 13 | import "C" 14 | 15 | // CancellationDetails contains detailed information about why a result was canceled. 16 | // Added in version 1.17.0 17 | type CancellationDetails struct { 18 | Reason common.CancellationReason 19 | ErrorCode common.CancellationErrorCode 20 | ErrorDetails string 21 | } 22 | 23 | // NewCancellationDetailsFromSpeechSynthesisResult creates the object from the speech synthesis result. 24 | func NewCancellationDetailsFromSpeechSynthesisResult(result *SpeechSynthesisResult) (*CancellationDetails, error) { 25 | cancellationDetails := new(CancellationDetails) 26 | /* Reason */ 27 | var cReason C.Result_CancellationReason 28 | ret := uintptr(C.synth_result_get_reason_canceled(result.handle, &cReason)) 29 | if ret != C.SPX_NOERROR { 30 | return nil, common.NewCarbonError(ret) 31 | } 32 | cancellationDetails.Reason = (common.CancellationReason)(cReason) 33 | /* ErrorCode */ 34 | var cCode C.Result_CancellationErrorCode 35 | ret = uintptr(C.synth_result_get_canceled_error_code(result.handle, &cCode)) 36 | if ret != C.SPX_NOERROR { 37 | return nil, common.NewCarbonError(ret) 38 | } 39 | cancellationDetails.ErrorCode = (common.CancellationErrorCode)(cCode) 40 | cancellationDetails.ErrorDetails = result.Properties.GetProperty(common.CancellationDetailsReasonDetailedText, "") 41 | return cancellationDetails, nil 42 | } 43 | -------------------------------------------------------------------------------- /speech/conversation_callback_helpers.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | // #include 7 | // #include 8 | import "C" 9 | 10 | // ConversationTranscriptionEventHandler is the callback type for conversation transcription events. 11 | type ConversationTranscriptionEventHandler func(event ConversationTranscriptionEventArgs) 12 | 13 | // ConversationTranscriptionCanceledEventHandler is the callback type for conversation transcription canceled events. 14 | type ConversationTranscriptionCanceledEventHandler func(event ConversationTranscriptionCanceledEventArgs) 15 | 16 | var ( 17 | conversationTranscribingCallbacks = make(map[C.SPXHANDLE]ConversationTranscriptionEventHandler) 18 | conversationTranscribedCallbacks = make(map[C.SPXHANDLE]ConversationTranscriptionEventHandler) 19 | conversationCanceledCallbacks = make(map[C.SPXHANDLE]ConversationTranscriptionCanceledEventHandler) 20 | ) 21 | 22 | func registerConversationTranscribingCallback(handler ConversationTranscriptionEventHandler, handle C.SPXHANDLE) { 23 | mu.Lock() 24 | defer mu.Unlock() 25 | conversationTranscribingCallbacks[handle] = handler 26 | } 27 | 28 | func getConversationTranscribingCallback(handle C.SPXHANDLE) ConversationTranscriptionEventHandler { 29 | mu.Lock() 30 | defer mu.Unlock() 31 | return conversationTranscribingCallbacks[handle] 32 | } 33 | 34 | //export conversationTranscriberFireEventTranscribing 35 | func conversationTranscriberFireEventTranscribing(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 36 | handler := getConversationTranscribingCallback(handle) 37 | event, err := NewConversationTranscriptionEventArgsFromHandle(handle2uintptr(eventHandle)) 38 | if err != nil || handler == nil { 39 | C.recognizer_event_handle_release(eventHandle) 40 | return 41 | } 42 | handler(*event) 43 | } 44 | 45 | func registerConversationTranscribedCallback(handler ConversationTranscriptionEventHandler, handle C.SPXHANDLE) { 46 | mu.Lock() 47 | defer mu.Unlock() 48 | conversationTranscribedCallbacks[handle] = handler 49 | } 50 | 51 | func getConversationTranscribedCallback(handle C.SPXHANDLE) ConversationTranscriptionEventHandler { 52 | mu.Lock() 53 | defer mu.Unlock() 54 | return conversationTranscribedCallbacks[handle] 55 | } 56 | 57 | //export conversationTranscriberFireEventTranscribed 58 | func conversationTranscriberFireEventTranscribed(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 59 | handler := getConversationTranscribedCallback(handle) 60 | event, err := NewConversationTranscriptionEventArgsFromHandle(handle2uintptr(eventHandle)) 61 | if err != nil || handler == nil { 62 | C.recognizer_event_handle_release(eventHandle) 63 | return 64 | } 65 | handler(*event) 66 | } 67 | 68 | func registerConversationCanceledCallback(handler ConversationTranscriptionCanceledEventHandler, handle C.SPXHANDLE) { 69 | mu.Lock() 70 | defer mu.Unlock() 71 | conversationCanceledCallbacks[handle] = handler 72 | } 73 | 74 | func getConversationCanceledCallback(handle C.SPXHANDLE) ConversationTranscriptionCanceledEventHandler { 75 | mu.Lock() 76 | defer mu.Unlock() 77 | return conversationCanceledCallbacks[handle] 78 | } 79 | 80 | //export conversationTranscriberFireEventCanceled 81 | func conversationTranscriberFireEventCanceled(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 82 | handler := getConversationCanceledCallback(handle) 83 | event, err := NewConversationTranscriptionCanceledEventArgsFromHandle(handle2uintptr(eventHandle)) 84 | if err != nil || handler == nil { 85 | C.recognizer_event_handle_release(eventHandle) 86 | return 87 | } 88 | handler(*event) 89 | } -------------------------------------------------------------------------------- /speech/conversation_transcription_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // #include 13 | // #include 14 | import "C" 15 | 16 | // ConversationTranscriptionEventArgs is used for conversation transcription events. 17 | type ConversationTranscriptionEventArgs struct { 18 | RecognitionEventArgs // Inherit from RecognitionEventArgs for consistency 19 | handle C.SPXHANDLE 20 | Result ConversationTranscriptionResult // Direct field instead of pointer 21 | } 22 | 23 | // NewConversationTranscriptionEventArgsFromHandle creates a ConversationTranscriptionEventArgs from an event handle 24 | func NewConversationTranscriptionEventArgsFromHandle(handle common.SPXHandle) (*ConversationTranscriptionEventArgs, error) { 25 | // Create the base RecognitionEventArgs first 26 | base, err := NewRecognitionEventArgsFromHandle(handle) 27 | if err != nil { 28 | return nil, err 29 | } 30 | 31 | event := new(ConversationTranscriptionEventArgs) 32 | event.RecognitionEventArgs = *base 33 | event.handle = uintptr2handle(handle) 34 | 35 | // Get the result handle 36 | var resultHandle C.SPXHANDLE 37 | ret := uintptr(C.recognizer_recognition_event_get_result(event.handle, &resultHandle)) 38 | if ret != C.SPX_NOERROR { 39 | return nil, common.NewCarbonError(ret) 40 | } 41 | 42 | // Create the result 43 | result, err := NewConversationTranscriptionResultFromHandle(handle2uintptr(resultHandle)) 44 | if err != nil { 45 | return nil, err 46 | } 47 | 48 | event.Result = *result 49 | return event, nil 50 | } 51 | 52 | // Close releases the underlying resources 53 | func (event ConversationTranscriptionEventArgs) Close() { 54 | event.RecognitionEventArgs.Close() 55 | event.Result.Close() 56 | } 57 | 58 | // ConversationTranscriptionEventHandler is the type of the event handler that receives ConversationTranscriptionEventArgs 59 | // type ConversationTranscriptionEventHandler func(event ConversationTranscriptionEventArgs) 60 | 61 | // ConversationTranscriptionCanceledEventArgs is used for conversation transcription canceled events. 62 | type ConversationTranscriptionCanceledEventArgs struct { 63 | ConversationTranscriptionEventArgs 64 | Reason common.CancellationReason // Direct field instead of nested object 65 | ErrorCode common.CancellationErrorCode // Direct field instead of nested object 66 | ErrorDetails string // Direct field instead of nested object 67 | } 68 | 69 | // NewConversationTranscriptionCanceledEventArgsFromHandle creates a ConversationTranscriptionCanceledEventArgs from an event handle 70 | func NewConversationTranscriptionCanceledEventArgsFromHandle(handle common.SPXHandle) (*ConversationTranscriptionCanceledEventArgs, error) { 71 | baseArgs, err := NewConversationTranscriptionEventArgsFromHandle(handle) 72 | if err != nil { 73 | return nil, err 74 | } 75 | 76 | event := new(ConversationTranscriptionCanceledEventArgs) 77 | event.ConversationTranscriptionEventArgs = *baseArgs 78 | 79 | /* Reason */ 80 | var cReason C.Result_CancellationReason 81 | ret := uintptr(C.result_get_reason_canceled(event.Result.handle, &cReason)) 82 | if ret != C.SPX_NOERROR { 83 | event.Close() 84 | return nil, common.NewCarbonError(ret) 85 | } 86 | event.Reason = (common.CancellationReason)(cReason) 87 | 88 | /* ErrorCode */ 89 | var cCode C.Result_CancellationErrorCode 90 | ret = uintptr(C.result_get_canceled_error_code(event.Result.handle, &cCode)) 91 | if ret != C.SPX_NOERROR { 92 | event.Close() 93 | return nil, common.NewCarbonError(ret) 94 | } 95 | event.ErrorCode = (common.CancellationErrorCode)(cCode) 96 | event.ErrorDetails = event.Result.Properties.GetProperty(common.SpeechServiceResponseJSONErrorDetails, "") 97 | 98 | return event, nil 99 | } 100 | 101 | // Close releases the associated resources. 102 | func (event ConversationTranscriptionCanceledEventArgs) Close() { 103 | event.ConversationTranscriptionEventArgs.Close() 104 | } 105 | 106 | // ConversationTranscriptionCanceledEventHandler is the type of the event handler that receives ConversationTranscriptionCanceledEventArgs 107 | //type ConversationTranscriptionCanceledEventHandler func(event ConversationTranscriptionCanceledEventArgs) -------------------------------------------------------------------------------- /speech/conversation_transcription_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | import "C" 18 | 19 | // ConversationTranscriptionResult contains detailed information about result of a conversation transcription operation. 20 | type ConversationTranscriptionResult struct { 21 | SpeechRecognitionResult // Embedded for common fields 22 | SpeakerID string 23 | } 24 | 25 | // NewConversationTranscriptionResultFromHandle creates a ConversationTranscriptionResult from a handle (for internal use) 26 | func NewConversationTranscriptionResultFromHandle(handle common.SPXHandle) (*ConversationTranscriptionResult, error) { 27 | // Create base result first 28 | baseResult, err := NewSpeechRecognitionResultFromHandle(handle) 29 | if err != nil { 30 | return nil, err 31 | } 32 | 33 | result := &ConversationTranscriptionResult{ 34 | SpeechRecognitionResult: *baseResult, 35 | } 36 | 37 | // Get speaker ID 38 | buffer := C.malloc(C.sizeof_char * 1024) 39 | defer C.free(unsafe.Pointer(buffer)) 40 | 41 | ret := uintptr(C.conversation_transcription_result_get_speaker_id(result.handle, (*C.char)(buffer), 1024)) 42 | if ret != C.SPX_NOERROR { 43 | return nil, common.NewCarbonError(ret) 44 | } 45 | result.SpeakerID = C.GoString((*C.char)(buffer)) 46 | 47 | return result, nil 48 | } 49 | 50 | // Close releases the underlying resources 51 | func (result ConversationTranscriptionResult) Close() { 52 | // Only call the base Close since we don't have additional resources to clean up 53 | result.SpeechRecognitionResult.Close() 54 | } -------------------------------------------------------------------------------- /speech/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | // Package speech provides functionality for speech recognizers along with their related configuration and event objects 5 | package speech -------------------------------------------------------------------------------- /speech/interop_utils.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | import "C" 14 | 15 | func uintptr2handle(h common.SPXHandle) C.SPXHANDLE { 16 | return (C.SPXHANDLE)(unsafe.Pointer(h)) //nolint:govet 17 | } 18 | 19 | func handle2uintptr(h C.SPXHANDLE) common.SPXHandle { 20 | return (common.SPXHandle)(unsafe.Pointer(h)) //nolint:govet 21 | } 22 | -------------------------------------------------------------------------------- /speech/interop_utils_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | "testing" 9 | ) 10 | 11 | func TestHandleConversion(t *testing.T) { 12 | orig := common.SPXHandle(3) 13 | handle := uintptr2handle(orig) 14 | dest := handle2uintptr(handle) 15 | if orig != dest { 16 | t.Error("Values are not equal") 17 | } 18 | if uintptr2handle(dest) != handle { 19 | t.Error("Values are not equal") 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /speech/keyword_recognition_model.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | import "C" 13 | import "unsafe" 14 | 15 | // KeywordRecognitionModel represents the keyword recognition model used with StartKeywordRecognitionAsync methods. 16 | type KeywordRecognitionModel struct { 17 | handle C.SPXHANDLE 18 | } 19 | 20 | // Close disposes the associated resources. 21 | func (model KeywordRecognitionModel) Close() { 22 | C.keyword_recognition_model_handle_release(model.handle) 23 | } 24 | 25 | // GetHandle gets the handle to the resource (for internal use) 26 | func (model KeywordRecognitionModel) GetHandle() common.SPXHandle { 27 | return handle2uintptr(model.handle) 28 | } 29 | 30 | /// NewKeywordRecognitionModelFromFile creates a keyword recognition model using the specified file. 31 | func NewKeywordRecognitionModelFromFile(filename string) (*KeywordRecognitionModel, error) { 32 | var handle C.SPXHANDLE 33 | f := C.CString(filename) 34 | defer C.free(unsafe.Pointer(f)) 35 | ret := uintptr(C.keyword_recognition_model_create_from_file(f, &handle)) 36 | if ret != C.SPX_NOERROR { 37 | return nil, common.NewCarbonError(ret) 38 | } 39 | model := new(KeywordRecognitionModel) 40 | model.handle = handle 41 | return model, nil 42 | } 43 | -------------------------------------------------------------------------------- /speech/recognition_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | import "C" 13 | 14 | // RecognitionEventArgs represents the recognition event arguments. 15 | type RecognitionEventArgs struct { 16 | SessionEventArgs 17 | Offset uint64 18 | } 19 | 20 | // NewRecognitionEventArgsFromHandle creates the object from the handle (for internal use) 21 | func NewRecognitionEventArgsFromHandle(handle common.SPXHandle) (*RecognitionEventArgs, error) { 22 | base, err := NewSessionEventArgsFromHandle(handle) 23 | if err != nil { 24 | return nil, err 25 | } 26 | var offset C.uint64_t 27 | ret := uintptr(C.recognizer_recognition_event_get_offset(uintptr2handle(handle), &offset)) 28 | if ret != C.SPX_NOERROR { 29 | return nil, common.NewCarbonError(ret) 30 | } 31 | event := new(RecognitionEventArgs) 32 | event.SessionEventArgs = *base 33 | event.Offset = uint64(offset) 34 | return event, nil 35 | } 36 | 37 | // RecognitionEventHandler is the type of the event handler that receives RecognitionEventArgs 38 | type RecognitionEventHandler func(event RecognitionEventArgs) 39 | -------------------------------------------------------------------------------- /speech/session_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | import "C" 15 | 16 | // SessionEventArgs represents the session event arguments. 17 | type SessionEventArgs struct { 18 | handle C.SPXHANDLE 19 | // SessionID Session identifier (a GUID in string format). 20 | SessionID string 21 | } 22 | 23 | // Close releases the underlying resources. 24 | func (event SessionEventArgs) Close() { 25 | C.recognizer_event_handle_release(event.handle) 26 | } 27 | 28 | // NewSessionEventArgsFromHandle creates the object from the handle (for internal use) 29 | func NewSessionEventArgsFromHandle(handle common.SPXHandle) (*SessionEventArgs, error) { 30 | buffer := C.malloc(C.sizeof_char * 37) 31 | defer C.free(unsafe.Pointer(buffer)) 32 | ret := uintptr(C.recognizer_session_event_get_session_id(uintptr2handle(handle), (*C.char)(buffer), 37)) 33 | if ret != C.SPX_NOERROR { 34 | return nil, common.NewCarbonError(ret) 35 | } 36 | event := new(SessionEventArgs) 37 | event.handle = uintptr2handle(handle) 38 | event.SessionID = C.GoString((*C.char)(buffer)) 39 | return event, nil 40 | } 41 | 42 | // SessionEventHandler is the type of the event handler that receives SessionEventArgs 43 | type SessionEventHandler func(event SessionEventArgs) 44 | -------------------------------------------------------------------------------- /speech/source_language_config.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | import "C" 15 | 16 | // SourceLanguageConfig defines source language configuration. 17 | type SourceLanguageConfig struct { 18 | handle C.SPXHANDLE 19 | properties *common.PropertyCollection 20 | } 21 | 22 | func newSourceLanguageConfigFromHandle(handle C.SPXHANDLE) (*SourceLanguageConfig, error) { 23 | var propBagHandle C.SPXHANDLE 24 | ret := uintptr(C.source_lang_config_get_property_bag(handle, &propBagHandle)) 25 | if ret != C.SPX_NOERROR { 26 | C.source_lang_config_release(handle) 27 | return nil, common.NewCarbonError(ret) 28 | } 29 | config := new(SourceLanguageConfig) 30 | config.handle = handle 31 | config.properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 32 | return config, nil 33 | } 34 | 35 | // NewSourceLanguageConfigFromLanguage creates an instance of the SourceLanguageConfig with source language 36 | func NewSourceLanguageConfigFromLanguage(language string) (*SourceLanguageConfig, error) { 37 | var handle C.SPXHANDLE 38 | languageCStr := C.CString(language) 39 | defer C.free(unsafe.Pointer(languageCStr)) 40 | ret := uintptr(C.source_lang_config_from_language(&handle, languageCStr)) 41 | if ret != C.SPX_NOERROR { 42 | return nil, common.NewCarbonError(ret) 43 | } 44 | return newSourceLanguageConfigFromHandle(handle) 45 | } 46 | 47 | // NewSourceLanguageConfigFromLanguageAndEndpointId creates an instance of the SourceLanguageConfig with source language and custom endpoint id. A custom endpoint id corresponds to custom models. 48 | //nolint:revive 49 | func NewSourceLanguageConfigFromLanguageAndEndpointId(language string, endpointID string) (*SourceLanguageConfig, error) { 50 | var handle C.SPXHANDLE 51 | languageCStr := C.CString(language) 52 | defer C.free(unsafe.Pointer(languageCStr)) 53 | endpointCStr := C.CString(endpointID) 54 | defer C.free(unsafe.Pointer(endpointCStr)) 55 | ret := uintptr(C.source_lang_config_from_language_and_endpointId(&handle, languageCStr, endpointCStr)) 56 | if ret != C.SPX_NOERROR { 57 | return nil, common.NewCarbonError(ret) 58 | } 59 | return newSourceLanguageConfigFromHandle(handle) 60 | } 61 | 62 | func (config SourceLanguageConfig) getHandle() C.SPXHANDLE { 63 | return config.handle 64 | } 65 | 66 | // Close performs cleanup of resources. 67 | func (config SourceLanguageConfig) Close() { 68 | config.properties.Close() 69 | C.source_lang_config_release(config.handle) 70 | } 71 | -------------------------------------------------------------------------------- /speech/speech_config_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | "testing" 9 | ) 10 | 11 | func TestFromSubscription(t *testing.T) { 12 | subscription := "test" 13 | region := "region" 14 | config, err := NewSpeechConfigFromSubscription(subscription, region) 15 | if err != nil { 16 | t.Error("Unexpected error") 17 | } 18 | if config.SubscriptionKey() != subscription { 19 | t.Error("Subscription not properly set") 20 | } 21 | if config.Region() != region { 22 | t.Error("Region not properly set") 23 | } 24 | } 25 | 26 | func TestFromAuthorizationToken(t *testing.T) { 27 | auth := "test" 28 | region := "region" 29 | config, err := NewSpeechConfigFromAuthorizationToken(auth, region) 30 | if err != nil { 31 | t.Error("Unexpected error") 32 | } 33 | if config.AuthorizationToken() != auth { 34 | t.Error("Authorization Token not properly set") 35 | } 36 | if config.Region() != region { 37 | t.Error("Region not properly set") 38 | } 39 | } 40 | 41 | func TestPropertiesByID(t *testing.T) { 42 | subscription := "test" 43 | region := "region" 44 | config, err := NewSpeechConfigFromSubscription(subscription, region) 45 | if err != nil { 46 | t.Error("Unexpected error") 47 | } 48 | value := "value1" 49 | err = config.SetProperty(common.SpeechServiceConnectionKey, value) 50 | if err != nil { 51 | t.Error("Unexpected error") 52 | } 53 | if config.GetProperty(common.SpeechServiceConnectionKey) != value { 54 | t.Error("Propery value not valid") 55 | } 56 | } 57 | 58 | func TestPropertiesByString(t *testing.T) { 59 | subscription := "test" 60 | region := "region" 61 | config, err := NewSpeechConfigFromSubscription(subscription, region) 62 | if err != nil { 63 | t.Error("Unexpected error") 64 | } 65 | value := "value1" 66 | err = config.SetPropertyByString("key1", value) 67 | if err != nil { 68 | t.Error("Unexpected error") 69 | } 70 | if config.GetPropertyByString("key1") != value { 71 | t.Error("Propery value not valid") 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /speech/speech_recognition_canceled_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | import "C" 13 | 14 | // SpeechRecognitionCanceledEventArgs represents speech recognition canceled event arguments. 15 | type SpeechRecognitionCanceledEventArgs struct { 16 | SpeechRecognitionEventArgs 17 | Reason common.CancellationReason 18 | ErrorCode common.CancellationErrorCode 19 | ErrorDetails string 20 | } 21 | 22 | // NewSpeechRecognitionCanceledEventArgsFromHandle creates the object from the handle (for internal use) 23 | func NewSpeechRecognitionCanceledEventArgsFromHandle(handle common.SPXHandle) (*SpeechRecognitionCanceledEventArgs, error) { 24 | baseArgs, err := NewSpeechRecognitionEventArgsFromHandle(handle) 25 | if err != nil { 26 | return nil, err 27 | } 28 | event := new(SpeechRecognitionCanceledEventArgs) 29 | event.SpeechRecognitionEventArgs = *baseArgs 30 | /* Reason */ 31 | var cReason C.Result_CancellationReason 32 | ret := uintptr(C.result_get_reason_canceled(event.Result.handle, &cReason)) 33 | if ret != C.SPX_NOERROR { 34 | event.Close() 35 | return nil, common.NewCarbonError(ret) 36 | } 37 | event.Reason = (common.CancellationReason)(cReason) 38 | /* ErrorCode */ 39 | var cCode C.Result_CancellationErrorCode 40 | ret = uintptr(C.result_get_canceled_error_code(event.Result.handle, &cCode)) 41 | if ret != C.SPX_NOERROR { 42 | event.Close() 43 | return nil, common.NewCarbonError(ret) 44 | } 45 | event.ErrorCode = (common.CancellationErrorCode)(cCode) 46 | event.ErrorDetails = event.Result.Properties.GetProperty(common.SpeechServiceResponseJSONErrorDetails, "") 47 | return event, nil 48 | } 49 | 50 | // SpeechRecognitionCanceledEventHandler is the type of the event handler that receives SpeechRecognitionCanceledEventArgs 51 | type SpeechRecognitionCanceledEventHandler func(event SpeechRecognitionCanceledEventArgs) 52 | -------------------------------------------------------------------------------- /speech/speech_recognition_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | import "C" 13 | 14 | // SpeechRecognitionEventArgs represents the speech recognition event arguments. 15 | type SpeechRecognitionEventArgs struct { 16 | RecognitionEventArgs 17 | handle C.SPXHANDLE 18 | Result SpeechRecognitionResult 19 | } 20 | 21 | // Close releases the underlying resources 22 | func (event SpeechRecognitionEventArgs) Close() { 23 | event.RecognitionEventArgs.Close() 24 | event.Result.Close() 25 | } 26 | 27 | // NewSpeechRecognitionEventArgsFromHandle creates the object from the handle (for internal use) 28 | func NewSpeechRecognitionEventArgsFromHandle(handle common.SPXHandle) (*SpeechRecognitionEventArgs, error) { 29 | base, err := NewRecognitionEventArgsFromHandle(handle) 30 | if err != nil { 31 | return nil, err 32 | } 33 | event := new(SpeechRecognitionEventArgs) 34 | event.RecognitionEventArgs = *base 35 | event.handle = uintptr2handle(handle) 36 | var resultHandle C.SPXHANDLE 37 | ret := uintptr(C.recognizer_recognition_event_get_result(event.handle, &resultHandle)) 38 | if ret != C.SPX_NOERROR { 39 | return nil, common.NewCarbonError(ret) 40 | } 41 | result, err := NewSpeechRecognitionResultFromHandle(handle2uintptr(resultHandle)) 42 | if err != nil { 43 | return nil, err 44 | } 45 | event.Result = *result 46 | return event, nil 47 | } 48 | 49 | // SpeechRecognitionEventHandler is the type of the event handler that receives SpeechRecognitionEventArgs 50 | type SpeechRecognitionEventHandler func(event SpeechRecognitionEventArgs) 51 | -------------------------------------------------------------------------------- /speech/speech_recognition_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "time" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | // 19 | import "C" 20 | 21 | // SpeechRecognitionResult contains detailed information about result of a recognition operation. 22 | type SpeechRecognitionResult struct { 23 | handle C.SPXHANDLE 24 | 25 | // ResultID specifies the result identifier. 26 | ResultID string 27 | 28 | // Reason specifies status of speech recognition result. 29 | Reason common.ResultReason 30 | 31 | // Text presents the recognized text in the result. 32 | Text string 33 | 34 | // Duration of the recognized speech. 35 | Duration time.Duration 36 | 37 | // Offset of the recognized speech in ticks. 38 | Offset time.Duration 39 | 40 | // Collection of additional RecognitionResult properties. 41 | Properties *common.PropertyCollection 42 | } 43 | 44 | // Close releases the underlying resources 45 | func (result SpeechRecognitionResult) Close() { 46 | result.Properties.Close() 47 | C.recognizer_result_handle_release(result.handle) 48 | } 49 | 50 | // NewSpeechRecognitionResultFromHandle creates a SpeechRecognitionResult from a handle (for internal use) 51 | func NewSpeechRecognitionResultFromHandle(handle common.SPXHandle) (*SpeechRecognitionResult, error) { 52 | buffer := C.malloc(C.sizeof_char * 1024) 53 | defer C.free(unsafe.Pointer(buffer)) 54 | result := new(SpeechRecognitionResult) 55 | result.handle = uintptr2handle(handle) 56 | /* ResultID */ 57 | ret := uintptr(C.result_get_result_id(result.handle, (*C.char)(buffer), 1024)) 58 | if ret != C.SPX_NOERROR { 59 | return nil, common.NewCarbonError(ret) 60 | } 61 | result.ResultID = C.GoString((*C.char)(buffer)) 62 | /* Reason */ 63 | var cReason C.Result_Reason 64 | ret = uintptr(C.result_get_reason(result.handle, &cReason)) 65 | if ret != C.SPX_NOERROR { 66 | return nil, common.NewCarbonError(ret) 67 | } 68 | result.Reason = (common.ResultReason)(cReason) 69 | /* Text */ 70 | ret = uintptr(C.result_get_text(result.handle, (*C.char)(buffer), 1024)) 71 | if ret != C.SPX_NOERROR { 72 | return nil, common.NewCarbonError(ret) 73 | } 74 | result.Text = C.GoString((*C.char)(buffer)) 75 | /* Duration */ 76 | var cDuration C.uint64_t 77 | ret = uintptr(C.result_get_duration(result.handle, &cDuration)) 78 | if ret != C.SPX_NOERROR { 79 | return nil, common.NewCarbonError(ret) 80 | } 81 | result.Duration = time.Nanosecond * time.Duration(100*cDuration) 82 | /* Offset */ 83 | var cOffset C.uint64_t 84 | ret = uintptr(C.result_get_offset(result.handle, &cOffset)) 85 | if ret != C.SPX_NOERROR { 86 | return nil, common.NewCarbonError(ret) 87 | } 88 | result.Offset = time.Nanosecond * time.Duration(100*cOffset) 89 | /* Properties */ 90 | var propBagHandle C.SPXHANDLE 91 | ret = uintptr(C.result_get_property_bag(uintptr2handle(handle), &propBagHandle)) 92 | if ret != C.SPX_NOERROR { 93 | return nil, common.NewCarbonError(ret) 94 | } 95 | result.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 96 | return result, nil 97 | } 98 | 99 | // SpeechRecognitionOutcome is a wrapper type to be returned by operations returning SpeechRecognitionResult and error 100 | type SpeechRecognitionOutcome struct { 101 | common.OperationOutcome 102 | 103 | // Result is the result of the operation 104 | Result *SpeechRecognitionResult 105 | } 106 | 107 | // Close releases the underlying resources 108 | func (outcome SpeechRecognitionOutcome) Close() { 109 | if outcome.Result != nil { 110 | outcome.Result.Close() 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /speech/speech_synthesis_bookmark_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // #include 13 | import "C" 14 | 15 | // SpeechSynthesisBookmarkEventArgs represents the speech synthesis bookmark event arguments. 16 | type SpeechSynthesisBookmarkEventArgs struct { 17 | handle C.SPXHANDLE 18 | 19 | // AudioOffset is the audio offset of the bookmark event, in ticks (100 nanoseconds). 20 | AudioOffset uint64 21 | 22 | // Text is the text of the bookmark. 23 | Text string 24 | } 25 | 26 | // Close releases the underlying resources 27 | func (event SpeechSynthesisBookmarkEventArgs) Close() { 28 | C.synthesizer_event_handle_release(event.handle) 29 | } 30 | 31 | // NewSpeechSynthesisBookmarkEventArgsFromHandle creates the object from the handle (for internal use) 32 | func NewSpeechSynthesisBookmarkEventArgsFromHandle(handle common.SPXHandle) (*SpeechSynthesisBookmarkEventArgs, error) { 33 | event := new(SpeechSynthesisBookmarkEventArgs) 34 | event.handle = uintptr2handle(handle) 35 | /* AudioOffset */ 36 | var cAudioOffset C.uint64_t 37 | ret := uintptr(C.synthesizer_bookmark_event_get_values(event.handle, &cAudioOffset)) 38 | if ret != C.SPX_NOERROR { 39 | return nil, common.NewCarbonError(ret) 40 | } 41 | event.AudioOffset = uint64(cAudioOffset) 42 | /* Text */ 43 | value := C.synthesizer_event_get_text(event.handle) 44 | event.Text = C.GoString(value) 45 | C.property_bag_free_string(value) 46 | return event, nil 47 | } 48 | 49 | // SpeechSynthesisBookmarkEventHandler is the type of the event handler that receives SpeechSynthesisBookmarkEventArgs 50 | type SpeechSynthesisBookmarkEventHandler func(event SpeechSynthesisBookmarkEventArgs) 51 | -------------------------------------------------------------------------------- /speech/speech_synthesis_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | import "C" 13 | 14 | // SpeechSynthesisEventArgs represents the speech synthesis event arguments. 15 | type SpeechSynthesisEventArgs struct { 16 | handle C.SPXHANDLE 17 | Result SpeechSynthesisResult 18 | } 19 | 20 | // Close releases the underlying resources 21 | func (event SpeechSynthesisEventArgs) Close() { 22 | event.Result.Close() 23 | C.synthesizer_event_handle_release(event.handle) 24 | } 25 | 26 | // NewSpeechSynthesisEventArgsFromHandle creates the object from the handle (for internal use) 27 | func NewSpeechSynthesisEventArgsFromHandle(handle common.SPXHandle) (*SpeechSynthesisEventArgs, error) { 28 | event := new(SpeechSynthesisEventArgs) 29 | event.handle = uintptr2handle(handle) 30 | var resultHandle C.SPXHANDLE 31 | ret := uintptr(C.synthesizer_synthesis_event_get_result(event.handle, &resultHandle)) 32 | if ret != C.SPX_NOERROR { 33 | return nil, common.NewCarbonError(ret) 34 | } 35 | result, err := NewSpeechSynthesisResultFromHandle(handle2uintptr(resultHandle)) 36 | if err != nil { 37 | return nil, err 38 | } 39 | event.Result = *result 40 | return event, nil 41 | } 42 | 43 | // SpeechSynthesisEventHandler is the type of the event handler that receives SpeechSynthesisEventArgs 44 | type SpeechSynthesisEventHandler func(event SpeechSynthesisEventArgs) 45 | -------------------------------------------------------------------------------- /speech/speech_synthesis_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "time" 8 | "unsafe" 9 | 10 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 11 | ) 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | // 19 | import "C" 20 | 21 | // SpeechSynthesisResult contains detailed information about result of a synthesis operation. 22 | type SpeechSynthesisResult struct { 23 | handle C.SPXHANDLE 24 | 25 | // ResultID specifies the result identifier. 26 | ResultID string 27 | 28 | // Reason specifies status of speech synthesis result. 29 | Reason common.ResultReason 30 | 31 | // AudioData presents the synthesized audio. 32 | AudioData []byte 33 | 34 | // AudioDuration presents the time duration of synthesized audio. 35 | AudioDuration time.Duration 36 | 37 | // Collection of additional synthesisResult properties. 38 | Properties *common.PropertyCollection 39 | } 40 | 41 | // Close releases the underlying resources 42 | func (result *SpeechSynthesisResult) Close() { 43 | result.Properties.Close() 44 | if result.handle != C.SPXHANDLE_INVALID { 45 | C.synthesizer_result_handle_release(result.handle) 46 | result.handle = C.SPXHANDLE_INVALID 47 | } 48 | } 49 | 50 | // NewSpeechSynthesisResultFromHandle creates a SpeechSynthesisResult from a handle (for internal use) 51 | func NewSpeechSynthesisResultFromHandle(handle common.SPXHandle) (*SpeechSynthesisResult, error) { 52 | 53 | result := new(SpeechSynthesisResult) 54 | result.handle = uintptr2handle(handle) 55 | /* AudioData length and duration */ 56 | var cAudioLength C.uint32_t 57 | var cAudioDuration C.uint64_t 58 | ret := uintptr(C.synth_result_get_audio_length_duration(result.handle, &cAudioLength, &cAudioDuration)) 59 | if ret != C.SPX_NOERROR { 60 | return nil, common.NewCarbonError(ret) 61 | } 62 | result.AudioDuration = time.Duration(cAudioDuration) * time.Millisecond 63 | // using max(1024, cAudioLength) as buffer size 64 | if cAudioLength < 1024 { 65 | cAudioLength = 1024 66 | } 67 | buffer := C.malloc(C.sizeof_char * (C.size_t)(cAudioLength)) 68 | defer C.free(unsafe.Pointer(buffer)) 69 | /* ResultID */ 70 | ret = uintptr(C.synth_result_get_result_id(result.handle, (*C.char)(buffer), 1024)) 71 | if ret != C.SPX_NOERROR { 72 | return nil, common.NewCarbonError(ret) 73 | } 74 | result.ResultID = C.GoString((*C.char)(buffer)) 75 | /* Reason */ 76 | var cReason C.Result_Reason 77 | ret = uintptr(C.synth_result_get_reason(result.handle, &cReason)) 78 | if ret != C.SPX_NOERROR { 79 | return nil, common.NewCarbonError(ret) 80 | } 81 | result.Reason = (common.ResultReason)(cReason) 82 | /* AudioData */ 83 | var outSize C.uint32_t 84 | ret = uintptr(C.synth_result_get_audio_data(result.handle, (*C.uint8_t)(buffer), cAudioLength, &outSize)) 85 | if ret != C.SPX_NOERROR { 86 | return nil, common.NewCarbonError(ret) 87 | } 88 | result.AudioData = C.GoBytes(buffer, (C.int)(outSize)) 89 | /* Properties */ 90 | var propBagHandle C.SPXHANDLE 91 | ret = uintptr(C.synth_result_get_property_bag(uintptr2handle(handle), &propBagHandle)) 92 | if ret != C.SPX_NOERROR { 93 | return nil, common.NewCarbonError(ret) 94 | } 95 | result.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 96 | return result, nil 97 | } 98 | 99 | // SpeechSynthesisOutcome is a wrapper type to be returned by operations returning SpeechSynthesisResult and error 100 | type SpeechSynthesisOutcome struct { 101 | common.OperationOutcome 102 | 103 | // Result is the result of the operation 104 | Result *SpeechSynthesisResult 105 | } 106 | 107 | // Close releases the underlying resources 108 | func (outcome SpeechSynthesisOutcome) Close() { 109 | if outcome.Result != nil { 110 | outcome.Result.Close() 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /speech/speech_synthesis_viseme_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // #include 13 | import "C" 14 | 15 | // SpeechSynthesisVisemeEventArgs represents the speech synthesis viseme event arguments. 16 | type SpeechSynthesisVisemeEventArgs struct { 17 | handle C.SPXHANDLE 18 | 19 | // AudioOffset is the audio offset of the viseme event, in ticks (100 nanoseconds). 20 | AudioOffset uint64 21 | 22 | // VisemeID is the viseme ID. 23 | VisemeID uint 24 | 25 | // Animation is the animation. 26 | Animation string 27 | } 28 | 29 | // Close releases the underlying resources 30 | func (event SpeechSynthesisVisemeEventArgs) Close() { 31 | C.synthesizer_event_handle_release(event.handle) 32 | } 33 | 34 | // NewSpeechSynthesisVisemeEventArgsFromHandle creates the object from the handle (for internal use) 35 | func NewSpeechSynthesisVisemeEventArgsFromHandle(handle common.SPXHandle) (*SpeechSynthesisVisemeEventArgs, error) { 36 | event := new(SpeechSynthesisVisemeEventArgs) 37 | event.handle = uintptr2handle(handle) 38 | /* AudioOffset and VisemeID */ 39 | var cAudioOffset C.uint64_t 40 | var cVisemeID C.uint32_t 41 | ret := uintptr(C.synthesizer_viseme_event_get_values(event.handle, &cAudioOffset, &cVisemeID)) 42 | if ret != C.SPX_NOERROR { 43 | return nil, common.NewCarbonError(ret) 44 | } 45 | event.AudioOffset = uint64(cAudioOffset) 46 | event.VisemeID = uint(cVisemeID) 47 | /* Animation */ 48 | value := C.synthesizer_viseme_event_get_animation(event.handle) 49 | event.Animation = C.GoString(value) 50 | C.property_bag_free_string(value) 51 | return event, nil 52 | } 53 | 54 | // SpeechSynthesisVisemeEventHandler is the type of the event handler that receives SpeechSynthesisVisemeEventArgs 55 | type SpeechSynthesisVisemeEventHandler func(event SpeechSynthesisVisemeEventArgs) 56 | -------------------------------------------------------------------------------- /speech/speech_synthesis_word_boundary_event_args.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "time" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | import "C" 16 | 17 | // SpeechSynthesisWordBoundaryEventArgs represents the speech synthesis word boundary event arguments. 18 | type SpeechSynthesisWordBoundaryEventArgs struct { 19 | handle C.SPXHANDLE 20 | 21 | // AudioOffset is the audio offset of the word boundary event, in ticks (100 nanoseconds). 22 | AudioOffset uint64 23 | 24 | // Duration is the duration of the word boundary event. 25 | Duration time.Duration 26 | 27 | // TextOffset is the text offset. 28 | TextOffset uint 29 | 30 | // WordLength is the length of the word. 31 | WordLength uint 32 | 33 | // Text is the text. 34 | Text string 35 | 36 | // BoundaryType is the boundary type. 37 | BoundaryType common.SpeechSynthesisBoundaryType 38 | } 39 | 40 | // Close releases the underlying resources 41 | func (event SpeechSynthesisWordBoundaryEventArgs) Close() { 42 | C.synthesizer_event_handle_release(event.handle) 43 | } 44 | 45 | // NewSpeechSynthesisWordBoundaryEventArgsFromHandle creates the object from the handle (for internal use) 46 | func NewSpeechSynthesisWordBoundaryEventArgsFromHandle(handle common.SPXHandle) (*SpeechSynthesisWordBoundaryEventArgs, error) { 47 | event := new(SpeechSynthesisWordBoundaryEventArgs) 48 | event.handle = uintptr2handle(handle) 49 | var cAudioOffset, cDuration C.uint64_t 50 | var cTextOffset, cWordLength C.uint32_t 51 | var cBoundaryType C.SpeechSynthesis_BoundaryType 52 | ret := uintptr(C.synthesizer_word_boundary_event_get_values(event.handle, &cAudioOffset, &cDuration, &cTextOffset, &cWordLength, &cBoundaryType)) 53 | if ret != C.SPX_NOERROR { 54 | return nil, common.NewCarbonError(ret) 55 | } 56 | event.AudioOffset = uint64(cAudioOffset) 57 | event.Duration = time.Duration(cDuration*100) * time.Nanosecond 58 | event.TextOffset = uint(cTextOffset) 59 | event.WordLength = uint(cWordLength) 60 | event.BoundaryType = (common.SpeechSynthesisBoundaryType)(cBoundaryType) 61 | /* Text */ 62 | value := C.synthesizer_event_get_text(event.handle) 63 | event.Text = C.GoString(value) 64 | C.property_bag_free_string(value) 65 | return event, nil 66 | } 67 | 68 | // SpeechSynthesisWordBoundaryEventHandler is the type of the event handler that receives SpeechSynthesisWordBoundaryEventArgs 69 | type SpeechSynthesisWordBoundaryEventHandler func(event SpeechSynthesisWordBoundaryEventArgs) 70 | -------------------------------------------------------------------------------- /speech/synthesis_voices_result.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "unsafe" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // 18 | import "C" 19 | 20 | // SynthesisVoicesResult contains information about result from voices list of speech synthesizers. 21 | type SynthesisVoicesResult struct { 22 | handle C.SPXHANDLE 23 | 24 | // Voices specifies all voices retrieved 25 | Voices []*VoiceInfo 26 | 27 | // ResultID specifies the result identifier. 28 | ResultID string 29 | 30 | // Reason specifies status of speech synthesis result. 31 | Reason common.ResultReason 32 | 33 | // ErrorDetails presents error details. 34 | ErrorDetails string 35 | 36 | // Collection of additional properties. 37 | Properties *common.PropertyCollection 38 | } 39 | 40 | // Close releases the underlying resources 41 | func (result SynthesisVoicesResult) Close() { 42 | for _, voice := range result.Voices { 43 | voice.Close() 44 | } 45 | result.Properties.Close() 46 | C.synthesizer_result_handle_release(result.handle) 47 | } 48 | 49 | // NewSynthesisVoicesResultFromHandle creates a SynthesisVoicesResult from a handle (for internal use) 50 | func NewSynthesisVoicesResultFromHandle(handle common.SPXHandle) (*SynthesisVoicesResult, error) { 51 | result := new(SynthesisVoicesResult) 52 | result.handle = uintptr2handle(handle) 53 | buffer := C.malloc(C.sizeof_char * 1024) 54 | defer C.free(unsafe.Pointer(buffer)) 55 | /* ResultID */ 56 | ret := uintptr(C.synthesis_voices_result_get_result_id(result.handle, (*C.char)(buffer), 1024)) 57 | if ret != C.SPX_NOERROR { 58 | return nil, common.NewCarbonError(ret) 59 | } 60 | result.ResultID = C.GoString((*C.char)(buffer)) 61 | /* Reason */ 62 | var cReason C.Result_Reason 63 | ret = uintptr(C.synthesis_voices_result_get_reason(result.handle, &cReason)) 64 | if ret != C.SPX_NOERROR { 65 | return nil, common.NewCarbonError(ret) 66 | } 67 | result.Reason = (common.ResultReason)(cReason) 68 | /* Properties */ 69 | var propBagHandle C.SPXHANDLE 70 | ret = uintptr(C.synthesis_voices_result_get_property_bag(result.handle, &propBagHandle)) 71 | if ret != C.SPX_NOERROR { 72 | return nil, common.NewCarbonError(ret) 73 | } 74 | result.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 75 | result.ErrorDetails = result.Properties.GetProperty(common.CancellationDetailsReasonDetailedText, "") 76 | /* Voices */ 77 | var voiceNum C.uint32_t 78 | ret = uintptr(C.synthesis_voices_result_get_voice_num(result.handle, &voiceNum)) 79 | if ret != C.SPX_NOERROR { 80 | return nil, common.NewCarbonError(ret) 81 | } 82 | voices := make([]*VoiceInfo, voiceNum) 83 | var voice *VoiceInfo 84 | var hVoice C.SPXRESULTHANDLE 85 | var err error 86 | for i := 0; i < int(voiceNum); i++ { 87 | ret = uintptr(C.synthesis_voices_result_get_voice_info(result.handle, (C.uint32_t)(i), &hVoice)) 88 | if ret != C.SPX_NOERROR { 89 | return nil, common.NewCarbonError(ret) 90 | } 91 | voice, err = NewVoiceInfoFromHandle(handle2uintptr(hVoice)) 92 | if err != nil { 93 | return nil, err 94 | } 95 | voices[i] = voice 96 | } 97 | result.Voices = voices 98 | return result, nil 99 | } 100 | 101 | // SpeechSynthesisVoicesOutcome is a wrapper type to be returned by operations returning SynthesisVoicesResult and error 102 | type SpeechSynthesisVoicesOutcome struct { 103 | common.OperationOutcome 104 | 105 | // Result is the result of the operation 106 | Result *SynthesisVoicesResult 107 | } 108 | 109 | // Close releases the underlying resources 110 | func (outcome SpeechSynthesisVoicesOutcome) Close() { 111 | if outcome.Result != nil { 112 | outcome.Result.Close() 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /speech/translation_callback_helpers.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "sync" 8 | ) 9 | 10 | // #include 11 | // #include 12 | // #include 13 | // 14 | import "C" 15 | 16 | var ( 17 | translationRecognizingCallbacks = make(map[C.SPXHANDLE]TranslationRecognitionEventHandler) 18 | translationRecognizedCallbacks = make(map[C.SPXHANDLE]TranslationRecognitionEventHandler) 19 | translationCanceledCallbacks = make(map[C.SPXHANDLE]TranslationRecognitionCanceledEventHandler) 20 | translationSynthesisCallbacks = make(map[C.SPXHANDLE]TranslationSynthesisEventHandler) 21 | translationCallbacksLock sync.Mutex 22 | ) 23 | 24 | func registerTranslationRecognizingCallback(callback TranslationRecognitionEventHandler, handle C.SPXHANDLE) { 25 | translationCallbacksLock.Lock() 26 | defer translationCallbacksLock.Unlock() 27 | translationRecognizingCallbacks[handle] = callback 28 | } 29 | 30 | func registerTranslationRecognizedCallback(callback TranslationRecognitionEventHandler, handle C.SPXHANDLE) { 31 | translationCallbacksLock.Lock() 32 | defer translationCallbacksLock.Unlock() 33 | translationRecognizedCallbacks[handle] = callback 34 | } 35 | 36 | func registerTranslationCanceledCallback(callback TranslationRecognitionCanceledEventHandler, handle C.SPXHANDLE) { 37 | translationCallbacksLock.Lock() 38 | defer translationCallbacksLock.Unlock() 39 | translationCanceledCallbacks[handle] = callback 40 | } 41 | 42 | func registerTranslationSynthesisCallback(callback TranslationSynthesisEventHandler, handle C.SPXHANDLE) { 43 | translationCallbacksLock.Lock() 44 | defer translationCallbacksLock.Unlock() 45 | translationSynthesisCallbacks[handle] = callback 46 | } 47 | 48 | //export cgoTranslationRecognizing 49 | func cgoTranslationRecognizing(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 50 | translationCallbacksLock.Lock() 51 | callback := translationRecognizingCallbacks[handle] 52 | translationCallbacksLock.Unlock() 53 | if callback != nil { 54 | eventArgs, _ := NewTranslationRecognitionEventArgsFromHandle(handle2uintptr(eventHandle)) 55 | callback(*eventArgs) 56 | } 57 | } 58 | 59 | //export cgoTranslationRecognized 60 | func cgoTranslationRecognized(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 61 | translationCallbacksLock.Lock() 62 | callback := translationRecognizedCallbacks[handle] 63 | translationCallbacksLock.Unlock() 64 | if callback != nil { 65 | eventArgs, _ := NewTranslationRecognitionEventArgsFromHandle(handle2uintptr(eventHandle)) 66 | callback(*eventArgs) 67 | } 68 | } 69 | 70 | //export cgoTranslationCanceled 71 | func cgoTranslationCanceled(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 72 | translationCallbacksLock.Lock() 73 | callback := translationCanceledCallbacks[handle] 74 | translationCallbacksLock.Unlock() 75 | if callback != nil { 76 | eventArgs, _ := NewTranslationRecognitionCanceledEventArgsFromHandle(handle2uintptr(eventHandle)) 77 | callback(*eventArgs) 78 | } 79 | } 80 | 81 | //export cgoTranslationSynthesis 82 | func cgoTranslationSynthesis(handle C.SPXRECOHANDLE, eventHandle C.SPXEVENTHANDLE) { 83 | translationCallbacksLock.Lock() 84 | callback := translationSynthesisCallbacks[handle] 85 | translationCallbacksLock.Unlock() 86 | if callback != nil { 87 | eventArgs, _ := NewTranslationSynthesisEventArgsFromHandle(handle2uintptr(eventHandle)) 88 | callback(*eventArgs) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /speech/voice_info.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 3 | 4 | package speech 5 | 6 | import ( 7 | "strings" 8 | 9 | "github.com/Microsoft/cognitive-services-speech-sdk-go/common" 10 | ) 11 | 12 | // #include 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // 18 | import "C" 19 | 20 | // VoiceInfo contains information about result from voices list of speech synthesizers. 21 | type VoiceInfo struct { 22 | handle C.SPXHANDLE 23 | 24 | // Name specifies the voice name. 25 | Name string 26 | 27 | // Locale specifies the locale of the voice 28 | Locale string 29 | 30 | // ShortName specifies the voice name in short format 31 | ShortName string 32 | 33 | // LocalName specifies the local name of the voice 34 | LocalName string 35 | 36 | // Gender specifies the gender of the voice. 37 | Gender common.SynthesisVoiceGender 38 | 39 | // VoiceType specifies the voice type. 40 | VoiceType common.SynthesisVoiceType 41 | 42 | // StyleList specifies the styles the voice supports. 43 | StyleList []string 44 | 45 | // VoicePath specifies the voice path 46 | VoicePath string 47 | 48 | // Collection of additional properties. 49 | Properties *common.PropertyCollection 50 | } 51 | 52 | // Close releases the underlying resources 53 | func (result VoiceInfo) Close() { 54 | result.Properties.Close() 55 | C.voice_info_handle_release(result.handle) 56 | } 57 | 58 | // NewVoiceInfoFromHandle creates a VoiceInfo from a handle (for internal use) 59 | func NewVoiceInfoFromHandle(handle common.SPXHandle) (*VoiceInfo, error) { 60 | voiceInfo := new(VoiceInfo) 61 | voiceInfo.handle = uintptr2handle(handle) 62 | /* Name */ 63 | value := C.voice_info_get_name(voiceInfo.handle) 64 | voiceInfo.Name = C.GoString(value) 65 | C.property_bag_free_string(value) 66 | /* Locale */ 67 | value = C.voice_info_get_locale(voiceInfo.handle) 68 | voiceInfo.Locale = C.GoString(value) 69 | C.property_bag_free_string(value) 70 | /* ShortName */ 71 | value = C.voice_info_get_short_name(voiceInfo.handle) 72 | voiceInfo.ShortName = C.GoString(value) 73 | C.property_bag_free_string(value) 74 | /* LocalName */ 75 | value = C.voice_info_get_local_name(voiceInfo.handle) 76 | voiceInfo.LocalName = C.GoString(value) 77 | C.property_bag_free_string(value) 78 | /* StyleList */ 79 | value = C.voice_info_get_style_list(voiceInfo.handle) 80 | voiceInfo.StyleList = strings.Split(C.GoString(value), "|") 81 | C.property_bag_free_string(value) 82 | /* VoiceType */ 83 | var cVoiceType C.Synthesis_VoiceType 84 | ret := uintptr(C.voice_info_get_voice_type(voiceInfo.handle, &cVoiceType)) 85 | if ret != C.SPX_NOERROR { 86 | return nil, common.NewCarbonError(ret) 87 | } 88 | voiceInfo.VoiceType = (common.SynthesisVoiceType)(cVoiceType) 89 | /* VoicePath */ 90 | value = C.voice_info_get_voice_path(voiceInfo.handle) 91 | voiceInfo.VoicePath = C.GoString(value) 92 | C.property_bag_free_string(value) 93 | /* Properties */ 94 | var propBagHandle C.SPXHANDLE 95 | ret = uintptr(C.voice_info_get_property_bag(uintptr2handle(handle), &propBagHandle)) 96 | if ret != C.SPX_NOERROR { 97 | return nil, common.NewCarbonError(ret) 98 | } 99 | voiceInfo.Properties = common.NewPropertyCollectionFromHandle(handle2uintptr(propBagHandle)) 100 | gender := voiceInfo.Properties.GetPropertyByString("Gender", "") 101 | if gender == "Female" { 102 | voiceInfo.Gender = common.Female 103 | } else if gender == "Male" { 104 | voiceInfo.Gender = common.Male 105 | } else { 106 | voiceInfo.Gender = common.GenderUnknown 107 | } 108 | return voiceInfo, nil 109 | } 110 | -------------------------------------------------------------------------------- /test_files/TalkForAFewSeconds16.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/32d0edd08e1fc4ff3227510d5bb8b06cfd54ae6e/test_files/TalkForAFewSeconds16.wav -------------------------------------------------------------------------------- /test_files/katiesteve_mono.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/32d0edd08e1fc4ff3227510d5bb8b06cfd54ae6e/test_files/katiesteve_mono.wav -------------------------------------------------------------------------------- /test_files/kws.table: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/32d0edd08e1fc4ff3227510d5bb8b06cfd54ae6e/test_files/kws.table -------------------------------------------------------------------------------- /test_files/myVoiceIsMyPassportVerifyMe01.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/32d0edd08e1fc4ff3227510d5bb8b06cfd54ae6e/test_files/myVoiceIsMyPassportVerifyMe01.wav -------------------------------------------------------------------------------- /test_files/peloozoid.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/32d0edd08e1fc4ff3227510d5bb8b06cfd54ae6e/test_files/peloozoid.wav -------------------------------------------------------------------------------- /test_files/turn_on_the_lamp.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/32d0edd08e1fc4ff3227510d5bb8b06cfd54ae6e/test_files/turn_on_the_lamp.wav -------------------------------------------------------------------------------- /test_files/whats_the_weather_like.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/cognitive-services-speech-sdk-go/32d0edd08e1fc4ff3227510d5bb8b06cfd54ae6e/test_files/whats_the_weather_like.wav --------------------------------------------------------------------------------