├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── build.sbt
├── examples
└── data
│ ├── batman-16khz-noheader.wav
│ ├── batman.wav
│ └── hall.mp3
├── lib
├── tritonus_remaining-0.3.6.jar
└── tritonus_share-0.3.6.jar
├── project
├── build.properties
└── plugins.sbt
├── sonatype.sbt
├── src
├── main
│ └── java
│ │ ├── SpeechToTextWebsocketsDemo.java
│ │ └── com
│ │ └── github
│ │ └── catalystcode
│ │ └── fortis
│ │ └── speechtotext
│ │ ├── Mp3Transcriber.java
│ │ ├── Transcriber.java
│ │ ├── WavTranscriber.java
│ │ ├── config
│ │ ├── OutputFormat.java
│ │ ├── SpeechServiceConfig.java
│ │ └── SpeechType.java
│ │ ├── constants
│ │ ├── EnvironmentVariables.java
│ │ ├── SpeechServiceConnectionHeaders.java
│ │ ├── SpeechServiceContentTypes.java
│ │ ├── SpeechServiceLimitations.java
│ │ ├── SpeechServiceMessageFields.java
│ │ ├── SpeechServiceMessageHeaders.java
│ │ ├── SpeechServiceMetrics.java
│ │ ├── SpeechServicePaths.java
│ │ ├── SpeechServiceSpeechConfig.java
│ │ └── SpeechServiceWebsocketStatusCodes.java
│ │ ├── lifecycle
│ │ ├── MessageReceiver.java
│ │ ├── SpeechHypothesisMessage.java
│ │ ├── SpeechPhraseMessage.java
│ │ ├── TurnEndMessage.java
│ │ └── TurnStartMessage.java
│ │ ├── messages
│ │ ├── AudioEndMessageCreator.java
│ │ ├── BinaryMessageCreator.java
│ │ ├── HeaderCreator.java
│ │ ├── MessageParser.java
│ │ └── TextMessageCreator.java
│ │ ├── telemetry
│ │ ├── AudioTelemetry.java
│ │ ├── CallsTelemetry.java
│ │ └── ConnectionTelemetry.java
│ │ ├── utils
│ │ ├── Environment.java
│ │ ├── ProtocolUtils.java
│ │ ├── RiffHeader.java
│ │ └── Units.java
│ │ └── websocket
│ │ ├── MessageSender.java
│ │ ├── PlatformInfo.java
│ │ ├── SpeechServiceClient.java
│ │ ├── TelemetryInfo.java
│ │ └── nv
│ │ ├── NvMessageReceiver.java
│ │ ├── NvMessageSender.java
│ │ └── NvSpeechServiceClient.java
└── test
│ └── java
│ └── com
│ └── github
│ └── catalystcode
│ └── fortis
│ └── speechtotext
│ ├── messages
│ └── MessageParserTest.java
│ └── websocket
│ ├── PlatformInfoTest.java
│ └── TelemetryInfoTest.java
└── version.sbt
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | target/
3 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist:
2 | trusty
3 |
4 | language:
5 | java
6 |
7 | jdk:
8 | - oraclejdk8
9 |
10 | script:
11 | - sbt test
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Microsoft Partner Catalyst Team
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | **There is now an official [Java SDK for the Bing Speech to Text API](https://docs.microsoft.com/en-us/azure/cognitive-services/speech/getstarted/getstartedjavaandroid) so this repository is deprecated.**
2 |
3 | ---
4 |
5 | A Java implementation of the [Bing Speech to Text API](https://azure.microsoft.com/en-ca/services/cognitive-services/speech/) [websocket protocol](https://docs.microsoft.com/en-us/azure/cognitive-services/speech/api-reference-rest/websocketprotocol) ([supporting article](https://www.microsoft.com/developerblog/2017/11/01/building-a-custom-spark-connector-for-near-real-time-speech-to-text-transcription/)).
6 |
7 | [](https://travis-ci.org/CatalystCode/SpeechToText-WebSockets-Java)
8 |
9 | ## Usage example ##
10 |
11 | Run a demo via:
12 |
13 | ```sh
14 | # set up all the requisite environment variables
15 | export OXFORD_SPEECH_TOKEN="..."
16 |
17 | # stream the audio and transcribe
18 | sbt "runMain SpeechToTextWebsocketsDemo examples/data/batman.wav"
19 | sbt "runMain SpeechToTextWebsocketsDemo examples/data/hall.mp3"
20 | sbt "runMain SpeechToTextWebsocketsDemo http://bbcwssc.ic.llnwd.net/stream/bbcwssc_mp1_ws-einws en-US .mp3"
21 | ```
22 |
23 | If you're consuming the library via Maven, make sure to also add the Tritonus (PCM audio conversion) jars to the classpath:
24 | - [tritonus_remaining-0.3.6.jar](https://github.com/CatalystCode/SpeechToText-WebSockets-Java/raw/master/lib/tritonus_remaining-0.3.6.jar)
25 | - [tritonus_share-0.3.6.jar](https://github.com/CatalystCode/SpeechToText-WebSockets-Java/raw/master/lib/tritonus_share-0.3.6.jar)
26 |
27 | ## Release process ##
28 |
29 | 1. Configure your credentials via the `SONATYPE_USER` and `SONATYPE_PASSWORD` environment variables.
30 | 2. Update `version.sbt`
31 | 3. Enter the SBT shell: `sbt`
32 | 4. Run `sonatypeOpen "enter staging description here"`
33 | 5. Run `publishSigned`
34 | 6. Run `sonatypeRelease`
35 |
36 | ## Other implementations ##
37 |
38 | - [NodeJS](https://github.com/noopkat/ms-bing-speech-service)
39 |
--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
1 | organization := "com.github.catalystcode"
2 | name := "SpeechToText-WebSockets-Java"
3 | description := "A Java implementation of the Bing Speech to Text API websocket protocol"
4 |
5 | javacOptions in (Compile, compile) ++= Seq(
6 | "-source", "1.8",
7 | "-target", "1.8")
8 |
9 | crossPaths := false
10 | autoScalaLibrary := false
11 |
12 | // Bundled dependencies
13 | libraryDependencies ++= Seq(
14 | "log4j" % "log4j" % "1.2.17",
15 | "org.json" % "json" % "20170516",
16 | "com.googlecode.soundlibs" % "jlayer" % "1.0.1-1",
17 | "com.neovisionaries" % "nv-websocket-client" % "2.2"
18 | )
19 |
20 | // Test dependencies
21 | libraryDependencies ++= Seq(
22 | "org.junit.jupiter" % "junit-jupiter-api" % "5.0.0-M4"
23 | ).map(_ % "test")
24 |
25 | assemblyMergeStrategy in assembly := {
26 | case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
27 | case PathList("javax", "servlet", xs @ _*) => MergeStrategy.last
28 | case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
29 | case PathList("org", "aopalliance", xs @ _*) => MergeStrategy.last
30 | case PathList("org", "apache", xs @ _*) => MergeStrategy.last
31 | case PathList("com", "google", xs @ _*) => MergeStrategy.last
32 | case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.last
33 | case PathList("com", "codahale", xs @ _*) => MergeStrategy.last
34 | case PathList("com", "yammer", xs @ _*) => MergeStrategy.last
35 | case "about.html" => MergeStrategy.rename
36 | case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
37 | case "META-INF/mailcap" => MergeStrategy.last
38 | case "META-INF/mimetypes.default" => MergeStrategy.last
39 | case "plugin.properties" => MergeStrategy.last
40 | case "log4j.properties" => MergeStrategy.last
41 | case x =>
42 | val oldStrategy = (assemblyMergeStrategy in assembly).value
43 | oldStrategy(x)
44 | }
45 |
--------------------------------------------------------------------------------
/examples/data/batman-16khz-noheader.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/SpeechToText-WebSockets-Java/1c1e7209a399674a984204bd7ae5a0f09cf8bdfe/examples/data/batman-16khz-noheader.wav
--------------------------------------------------------------------------------
/examples/data/batman.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/SpeechToText-WebSockets-Java/1c1e7209a399674a984204bd7ae5a0f09cf8bdfe/examples/data/batman.wav
--------------------------------------------------------------------------------
/examples/data/hall.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/SpeechToText-WebSockets-Java/1c1e7209a399674a984204bd7ae5a0f09cf8bdfe/examples/data/hall.mp3
--------------------------------------------------------------------------------
/lib/tritonus_remaining-0.3.6.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/SpeechToText-WebSockets-Java/1c1e7209a399674a984204bd7ae5a0f09cf8bdfe/lib/tritonus_remaining-0.3.6.jar
--------------------------------------------------------------------------------
/lib/tritonus_share-0.3.6.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/SpeechToText-WebSockets-Java/1c1e7209a399674a984204bd7ae5a0f09cf8bdfe/lib/tritonus_share-0.3.6.jar
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=0.13.13
2 |
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3")
2 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "1.1")
3 | addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.0")
4 | addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.4")
5 |
--------------------------------------------------------------------------------
/sonatype.sbt:
--------------------------------------------------------------------------------
1 | pomExtra in Global := {
2 | github.com/CatalystCode/SpeechToText-Websockets-Java
3 |
4 |
5 | MIT
6 | https://opensource.org/licenses/MIT
7 |
8 |
9 |
10 | scm:git:github.com/CatalystCode/SpeechToText-Websockets-Java
11 | scm:git:git@github.com:CatalystCode/SpeechToText-Websockets-Java
12 | github.com/CatalystCode/SpeechToText-Websockets-Java
13 |
14 |
15 |
16 | c-w
17 | Clemens Wolff
18 | clewolff@microsoft.com
19 | http://github.com/c-w
20 |
21 |
22 | }
23 |
24 | credentials += Credentials(
25 | "Sonatype Nexus Repository Manager",
26 | "oss.sonatype.org",
27 | System.getenv("SONATYPE_USER"),
28 | System.getenv("SONATYPE_PASSWORD"))
29 |
30 | organizationName := "Partner Catalyst"
31 | organizationHomepage := Some(url("https://github.com/CatalystCode"))
32 |
33 | publishTo := {
34 | val isSnapshot = version.value.trim.endsWith("SNAPSHOT")
35 | val nexus = "https://oss.sonatype.org/"
36 | if (isSnapshot) Some("snapshots" at nexus + "content/repositories/snapshots")
37 | else Some("releases" at nexus + "service/local/staging/deploy/maven2")
38 | }
39 |
40 | publishMavenStyle := true
41 | publishArtifact in Test := false
42 | useGpg := true
43 |
--------------------------------------------------------------------------------
/src/main/java/SpeechToTextWebsocketsDemo.java:
--------------------------------------------------------------------------------
1 | import com.github.catalystcode.fortis.speechtotext.Transcriber;
2 | import com.github.catalystcode.fortis.speechtotext.config.OutputFormat;
3 | import com.github.catalystcode.fortis.speechtotext.config.SpeechServiceConfig;
4 | import com.github.catalystcode.fortis.speechtotext.config.SpeechType;
5 | import org.apache.log4j.BasicConfigurator;
6 | import org.apache.log4j.Level;
7 | import org.apache.log4j.Logger;
8 |
9 | import java.io.BufferedInputStream;
10 | import java.io.FileInputStream;
11 | import java.io.IOException;
12 | import java.io.InputStream;
13 | import java.net.URL;
14 | import java.util.Locale;
15 |
16 | public class SpeechToTextWebsocketsDemo {
17 | static {
18 | BasicConfigurator.configure();
19 | Logger.getRootLogger().setLevel(Level.INFO);
20 | }
21 |
22 | public static void main(String[] args) throws Exception {
23 | final String subscriptionKey = System.getenv("OXFORD_SPEECH_TOKEN");
24 | final SpeechType speechType = SpeechType.CONVERSATION;
25 | final OutputFormat outputFormat = OutputFormat.SIMPLE;
26 | final String audioPath = args[0];
27 | final Locale locale = Locale.forLanguageTag(args.length > 1 ? args[1] : "en-US");
28 | final String audioType = args.length > 2 ? args[2] : audioPath;
29 |
30 | SpeechServiceConfig config = new SpeechServiceConfig(subscriptionKey, speechType, outputFormat, locale);
31 |
32 | try (InputStream audioStream = openStream(audioPath)) {
33 | Transcriber.create(audioType, config).transcribe(audioStream, SpeechToTextWebsocketsDemo::onPhrase, SpeechToTextWebsocketsDemo::onHypothesis,
34 | SpeechToTextWebsocketsDemo::onTurnStart, SpeechToTextWebsocketsDemo::onTurnEnd);
35 | }
36 | }
37 |
38 | private static InputStream openStream(String audioPath) throws IOException {
39 | InputStream inputStream = audioPath.startsWith("http://") || audioPath.startsWith("https://")
40 | ? new URL(audioPath).openConnection().getInputStream()
41 | : new FileInputStream(audioPath);
42 |
43 | return new BufferedInputStream(inputStream);
44 | }
45 |
46 | private static void onTurnEnd() {
47 | System.out.println("TurnEnd:");
48 | }
49 |
50 | private static void onPhrase(String phrase) {
51 | System.out.println("Phrase: " + phrase);
52 | }
53 |
54 | private static void onHypothesis(String hypothesis) {
55 | System.out.println("Hypothesis: " + hypothesis);
56 | }
57 |
58 | private static void onTurnStart(String serviceTag) {
59 | System.out.println("TurnStart: " + serviceTag);
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/Mp3Transcriber.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.config.SpeechServiceConfig;
4 | import com.github.catalystcode.fortis.speechtotext.websocket.MessageSender;
5 | import com.github.catalystcode.fortis.speechtotext.websocket.SpeechServiceClient;
6 | import javazoom.jl.converter.Converter;
7 | import javazoom.jl.decoder.JavaLayerException;
8 | import org.apache.log4j.Logger;
9 |
10 | import java.io.*;
11 | import java.nio.ByteBuffer;
12 | import java.nio.channels.FileChannel;
13 | import java.nio.file.Path;
14 | import java.nio.file.Paths;
15 |
16 | import static com.github.catalystcode.fortis.speechtotext.utils.Environment.getMp3BufferSize;
17 | import static java.nio.ByteBuffer.allocate;
18 | import static java.nio.file.Files.createTempFile;
19 | import static java.nio.file.Files.deleteIfExists;
20 |
21 | class Mp3Transcriber extends Transcriber {
22 | private static final Logger log = Logger.getLogger(Mp3Transcriber.class);
23 |
24 | private final int bufferSize;
25 |
26 | Mp3Transcriber(SpeechServiceConfig config, SpeechServiceClient client) {
27 | super(config, client);
28 | this.bufferSize = getMp3BufferSize();
29 | }
30 |
31 | @Override
32 | protected void sendAudio(InputStream mp3Stream, MessageSender sender) throws IOException {
33 | byte[] streamBuf = new byte[bufferSize];
34 | ByteBuffer mp3Buf = allocate(bufferSize);
35 | int mp3BufPos = 0;
36 | int read;
37 | while ((read = mp3Stream.read(streamBuf)) != -1) {
38 | if (mp3BufPos + read >= bufferSize) {
39 | if (mp3BufPos > 0) {
40 | log.debug("Buffer full, starting to process " + mp3BufPos + " bytes");
41 | String mp3Path = newTempFile(".mp3");
42 | writeBytes(mp3Path, mp3Buf, mp3BufPos);
43 | sendAudioAsync(mp3Path, sender);
44 | mp3Buf.clear();
45 | }
46 | mp3Buf.put(streamBuf, 0, read);
47 | mp3BufPos = read;
48 | } else {
49 | mp3Buf.put(streamBuf, 0, read);
50 | mp3BufPos += read;
51 | log.debug("Buffered " + mp3BufPos + "/" + bufferSize + " bytes from MP3 stream");
52 | }
53 | }
54 | sender.sendAudioEnd();
55 | }
56 |
57 | private static void convertAudio(String mp3Path, String wavPath) throws JavaLayerException {
58 | log.debug("Starting to convert " + mp3Path + " to " + wavPath);
59 | new Converter().convert(mp3Path, wavPath);
60 | log.debug("Converted " + mp3Path + " to " + wavPath);
61 | }
62 |
63 | private static void writeBytes(String path, ByteBuffer buf, int length) throws IOException {
64 | try (FileOutputStream outputStream = new FileOutputStream(path)) {
65 | try (FileChannel channel = outputStream.getChannel()) {
66 | buf.flip();
67 | channel.write(buf);
68 | }
69 | }
70 | log.debug("Wrote " + length + " bytes to " + path);
71 | }
72 |
73 | private void sendAudioAsync(String mp3Path, MessageSender sender) {
74 | new Thread(() -> {
75 | String wavPath;
76 | try {
77 | wavPath = newTempFile(".wav");
78 | } catch (IOException ex) {
79 | log.error("Error creating temp file", ex);
80 | return;
81 | }
82 |
83 | try {
84 | convertAudio(mp3Path, wavPath);
85 | } catch (JavaLayerException ex) {
86 | log.error("Error converting MP3 to WAV", ex);
87 | return;
88 | }
89 |
90 | try (InputStream wavStream = new BufferedInputStream(new FileInputStream(wavPath))) {
91 | sender.sendAudio(wavStream);
92 | } catch (Exception ex) {
93 | log.error("Error sending audio", ex);
94 | } finally {
95 | deleteTempFile(mp3Path);
96 | deleteTempFile(wavPath);
97 | }
98 | }).run();
99 | }
100 |
101 | private String newTempFile(String suffix) throws IOException {
102 | return createTempFile(getClass().getName(), suffix).toString();
103 | }
104 |
105 | private static void deleteTempFile(String tempFile) {
106 | Path path = Paths.get(tempFile);
107 | try {
108 | deleteIfExists(path);
109 | } catch (IOException ex) {
110 | log.error("Error deleting temp file: " + tempFile, ex);
111 | }
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/Transcriber.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.config.SpeechServiceConfig;
4 | import com.github.catalystcode.fortis.speechtotext.lifecycle.MessageReceiver;
5 | import com.github.catalystcode.fortis.speechtotext.websocket.MessageSender;
6 | import com.github.catalystcode.fortis.speechtotext.websocket.SpeechServiceClient;
7 | import com.github.catalystcode.fortis.speechtotext.websocket.nv.NvSpeechServiceClient;
8 |
9 | import java.io.IOException;
10 | import java.io.InputStream;
11 | import java.util.function.Consumer;
12 |
13 | public abstract class Transcriber {
14 | protected final SpeechServiceConfig config;
15 | private final SpeechServiceClient client;
16 |
17 | Transcriber(SpeechServiceConfig config, SpeechServiceClient client) {
18 | this.config = config;
19 | this.client = client;
20 | }
21 |
22 | public void transcribe(InputStream audioStream, Consumer onResult, Consumer onHypothesis) throws Exception {
23 | transcribe(audioStream, onResult, onHypothesis);
24 | }
25 |
26 | public void transcribe(InputStream audioStream, Consumer onResult, Consumer onHypothesis,
27 | Consumer onTurnStart, Runnable onTurnEnd) throws Exception {
28 | MessageReceiver receiver = new MessageReceiver(onResult, onHypothesis, onTurnStart, onTurnEnd, client.getEndLatch());
29 | try {
30 | MessageSender sender = client.start(config, receiver);
31 | receiver.setSender(sender);
32 | sender.sendConfiguration();
33 | sendAudio(audioStream, sender);
34 | client.awaitEnd();
35 | } finally {
36 | client.stop();
37 | }
38 | }
39 |
40 | protected abstract void sendAudio(InputStream audioStream, MessageSender sender) throws IOException;
41 |
42 | public static Transcriber create(String audioPath, SpeechServiceConfig config) {
43 | return create(audioPath, config, new NvSpeechServiceClient());
44 | }
45 |
46 | public static Transcriber create(SpeechServiceConfig config) {
47 | return create(config, new NvSpeechServiceClient());
48 | }
49 |
50 | private static Transcriber create(String audioPath, SpeechServiceConfig config, SpeechServiceClient client) {
51 | if (audioPath.endsWith(".wav")) {
52 | return new WavTranscriber(config, client);
53 | }
54 |
55 | if (audioPath.endsWith(".mp3")) {
56 | return new Mp3Transcriber(config, client);
57 | }
58 |
59 | throw new IllegalArgumentException("Unsupported audio file type: " + audioPath);
60 | }
61 |
62 | private static Transcriber create(SpeechServiceConfig config, SpeechServiceClient client) {
63 | return new WavTranscriber(config, client);
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/WavTranscriber.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.config.SpeechServiceConfig;
4 | import com.github.catalystcode.fortis.speechtotext.websocket.MessageSender;
5 | import com.github.catalystcode.fortis.speechtotext.websocket.SpeechServiceClient;
6 |
7 | import java.io.InputStream;
8 |
9 | class WavTranscriber extends Transcriber {
10 | WavTranscriber(SpeechServiceConfig config, SpeechServiceClient client) {
11 | super(config, client);
12 | }
13 |
14 | @Override
15 | protected void sendAudio(InputStream wavStream, MessageSender sender) {
16 | sendAudioAsync(wavStream, sender);
17 | }
18 |
19 | private void sendAudioAsync(InputStream wavStream, MessageSender sender) {
20 | new Thread(() -> {
21 | sender.sendAudio(wavStream);
22 | sender.sendAudioEnd();
23 | }).run();
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/config/OutputFormat.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.config;
2 |
3 | @SuppressWarnings("unused")
4 | public enum OutputFormat {
5 | SIMPLE("simple"),
6 | DETAILED("detailed"),
7 | ;
8 |
9 | public final String value;
10 |
11 | OutputFormat(String value) {
12 | this.value = value;
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/config/SpeechServiceConfig.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.config;
2 |
3 | import java.util.Locale;
4 |
5 | import static com.github.catalystcode.fortis.speechtotext.utils.Environment.getSpeechPlatformHost;
6 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceConnectionHeaders.*;
7 |
8 | public class SpeechServiceConfig {
9 | private final String subscriptionKey;
10 | private final SpeechType speechType;
11 | private final OutputFormat outputFormat;
12 | private final Locale locale;
13 | private final String host;
14 |
15 | public SpeechServiceConfig(String subscriptionKey, SpeechType speechType, OutputFormat outputFormat, Locale locale) {
16 | this.subscriptionKey = subscriptionKey;
17 | this.speechType = speechType;
18 | this.outputFormat = outputFormat;
19 | this.locale = locale;
20 | this.host = getSpeechPlatformHost();
21 | }
22 |
23 | public String getConnectionUrl(String connectionId) {
24 | return host + speechType.endpoint +
25 | '?' + LANGUAGE + '=' + locale.toLanguageTag() +
26 | '&' + FORMAT + '=' + outputFormat.value +
27 | '&' + CONNECTION_ID + '=' + connectionId +
28 | '&' + SUBSCRIPTION_KEY + '=' + subscriptionKey;
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/config/SpeechType.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.config;
2 |
3 | @SuppressWarnings("unused")
4 | public enum SpeechType {
5 | INTERACTIVE("/speech/recognition/interactive/cognitiveservices/v1"),
6 | DICTATION("/speech/recognition/dictation/cognitiveservices/v1"),
7 | CONVERSATION("/speech/recognition/conversation/cognitiveservices/v1"),
8 | ;
9 |
10 | public final String endpoint;
11 |
12 | SpeechType(String endpoint) {
13 | this.endpoint = endpoint;
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/constants/EnvironmentVariables.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.constants;
2 |
3 | public final class EnvironmentVariables {
4 | private EnvironmentVariables() {}
5 |
6 | private static final String PREFIX = "SSTWSJAVA";
7 | public static final String HOST = PREFIX + "_HOST";
8 | public static final String LIBRARY_VERSION = PREFIX + "_LIBRARY_VERSION";
9 | public static final String DEVICE_MANUFACTURER = PREFIX + "_DEVICE_MANUFACTURER";
10 | public static final String DEVICE_MODEL = PREFIX + "_DEVICE_MODEL";
11 | public static final String DEVICE_VERSION = PREFIX + "_DEVICE_VERSION";
12 | public static final String MP3_BUFFER_SIZE = PREFIX + "_MP3_BUFER_SIZE";
13 | }
14 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/constants/SpeechServiceConnectionHeaders.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.constants;
2 |
3 | public final class SpeechServiceConnectionHeaders {
4 | private SpeechServiceConnectionHeaders() {}
5 |
6 | public static final String LANGUAGE = "language";
7 | public static final String FORMAT = "format";
8 | public static final String CONNECTION_ID = "X-ConnectionId";
9 | public static final String SUBSCRIPTION_KEY = "Ocp-Apim-Subscription-Key";
10 | }
11 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/constants/SpeechServiceContentTypes.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.constants;
2 |
3 | public final class SpeechServiceContentTypes {
4 | private SpeechServiceContentTypes() {}
5 |
6 | public static final String WAV = "audio/wav";
7 | public static final String JSON = "application/json; charset=utf-8";
8 | }
9 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/constants/SpeechServiceLimitations.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.constants;
2 |
3 | public final class SpeechServiceLimitations {
4 | private SpeechServiceLimitations() {}
5 |
6 | public final static int MAX_ERROR_MESSAGE_NUM_CHARACTERS = 50;
7 | public static final int MAX_BYTES_PER_AUDIO_CHUNK = 8192;
8 | public static final int SAMPLE_RATE = 16000;
9 | public static final short NUM_CHANNELS = 1;
10 | }
11 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/constants/SpeechServiceMessageFields.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.constants;
2 |
3 | public final class SpeechServiceMessageFields {
4 | private SpeechServiceMessageFields() {}
5 |
6 | public static final String RECOGNITION_STATUS = "RecognitionStatus";
7 | public static final String SUCCESS_STATUS = "Success";
8 | public static final String END_OF_DICTATION_STATUS = "EndOfDictation";
9 | public static final String END_OF_DICTATION_SILENCE_STATUS = "DictationEndSilenceTimeout";
10 | public static final String DISPLAY_TEXT = "DisplayText";
11 | public static final String HYPOTHESIS_TEXT = "Text";
12 | public static final String CONTEXT = "context";
13 | public static final String SERVICE_TAG = "serviceTag";
14 | }
15 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/constants/SpeechServiceMessageHeaders.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.constants;
2 |
3 | public final class SpeechServiceMessageHeaders {
4 | private SpeechServiceMessageHeaders() {}
5 |
6 | public static final String PATH = "Path";
7 | public static final String REQUEST_ID = "X-RequestId";
8 | public static final String TIMESTAMP = "X-Timestamp";
9 | public static final String CONTENT_TYPE = "Content-Type";
10 |
11 | public static final String HEADER_DELIM = "\r\n";
12 | public static final String BODY_DELIM = "\r\n\r\n";
13 | }
14 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/constants/SpeechServiceMetrics.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.constants;
2 |
3 | public final class SpeechServiceMetrics {
4 | private SpeechServiceMetrics() {}
5 |
6 | public static final String METRICS = "Metrics";
7 | public static final String RECEIVED_MESSAGES = "ReceivedMessages";
8 | public static final String NAME = "Name";
9 | public static final String START = "Start";
10 | public static final String END = "End";
11 | public static final String ERROR = "Error";
12 | public static final String ID = "Id";
13 |
14 | public static final String CONNECTION_METRIC = "Connection";
15 | public static final String MICROPHONE_METRIC = "Microphone";
16 | }
17 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/constants/SpeechServicePaths.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.constants;
2 |
3 | public final class SpeechServicePaths {
4 | private SpeechServicePaths() {}
5 |
6 | public static final String AUDIO = "audio";
7 | public static final String SPEECH_CONFIG = "speech.config";
8 | public static final String SPEECH_PHRASE = "speech.phrase";
9 | public static final String SPEECH_HYPOTHESIS = "speech.hypothesis";
10 | public static final String SPEECH_END = "speech.endDetected";
11 | public static final String TURN_START = "turn.start";
12 | public static final String TURN_END = "turn.end";
13 | public static final String TELEMETRY = "telemetry";
14 | }
15 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/constants/SpeechServiceSpeechConfig.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.constants;
2 |
3 | public final class SpeechServiceSpeechConfig {
4 | private SpeechServiceSpeechConfig() {}
5 |
6 | public static final String CONTEXT = "context";
7 | public static final String SYSTEM = "system";
8 | public static final String OS = "os";
9 | public static final String DEVICE = "device";
10 | public static final String SYSTEM_VERSION = "version";
11 | public static final String OS_PLATFORM = "platform";
12 | public static final String OS_NAME = "name";
13 | public static final String OS_VERSION = "version";
14 | public static final String DEVICE_MANUFACTURER = "manufacturer";
15 | public static final String DEVICE_MODEL = "model";
16 | public static final String DEVICE_VERSION = "version";
17 | }
18 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/constants/SpeechServiceWebsocketStatusCodes.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.constants;
2 |
3 | @SuppressWarnings("unused")
4 | public final class SpeechServiceWebsocketStatusCodes {
5 | private SpeechServiceWebsocketStatusCodes() {}
6 |
7 | public static final int OK = 1000;
8 | public static final int PROTOCOL_ERROR = 1002;
9 | public static final int INVALID_PAYLOAD_DATA = 1007;
10 | public static final int SERVER_ERROR = 1011;
11 | }
12 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/lifecycle/MessageReceiver.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.lifecycle;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.messages.MessageParser;
4 | import com.github.catalystcode.fortis.speechtotext.telemetry.CallsTelemetry;
5 | import com.github.catalystcode.fortis.speechtotext.websocket.MessageSender;
6 | import org.apache.log4j.Logger;
7 | import org.json.JSONObject;
8 |
9 | import java.util.Map;
10 | import java.util.concurrent.CountDownLatch;
11 | import java.util.function.Consumer;
12 |
13 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageHeaders.PATH;
14 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageHeaders.REQUEST_ID;
15 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServicePaths.*;
16 |
17 |
18 | public class MessageReceiver {
19 | private static final Logger log = Logger.getLogger(MessageReceiver.class);
20 | private final Consumer onResult;
21 | private final Consumer onHypothesis;
22 | private final Consumer onTurnStart;
23 | private final Runnable onTurnEnd;
24 | private final CountDownLatch endLatch;
25 | private MessageSender sender;
26 |
27 | public MessageReceiver(Consumer onResult, Consumer onHypothesis, CountDownLatch endLatch) {
28 | this(onResult, onHypothesis, null, null, endLatch);
29 | }
30 |
31 | public MessageReceiver(Consumer onResult, Consumer onHypothesis,
32 | Consumer onTurnStart, Runnable onTurnEnd, CountDownLatch endLatch) {
33 | this.onResult = onResult;
34 | this.onHypothesis = onHypothesis;
35 | this.onTurnStart = onTurnStart;
36 | this.onTurnEnd = onTurnEnd;
37 | this.endLatch = endLatch;
38 | }
39 |
40 | public void onMessage(String message) {
41 | Map headers = MessageParser.parseHeaders(message);
42 | JSONObject body = MessageParser.parseBody(message);
43 |
44 | String path = headers.get(PATH);
45 | String requestId = headers.get(REQUEST_ID);
46 | CallsTelemetry.forId(requestId).recordCall(path);
47 | log.debug("Got message at path " + path + " with payload '" + body + "'");
48 |
49 | if (TURN_START.equalsIgnoreCase(path)) {
50 | TurnStartMessage.handle(body, onTurnStart);
51 | } else if (SPEECH_HYPOTHESIS.equalsIgnoreCase(path)) {
52 | SpeechHypothesisMessage.handle(body, onHypothesis);
53 | } else if (SPEECH_PHRASE.equalsIgnoreCase(path)) {
54 | SpeechPhraseMessage.handle(body, onResult);
55 | } else if (TURN_END.equalsIgnoreCase(path)) {
56 | TurnEndMessage.handle(sender, endLatch, onTurnEnd);
57 | }
58 | }
59 |
60 | public void setSender(MessageSender sender) {
61 | this.sender = sender;
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/lifecycle/SpeechHypothesisMessage.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.lifecycle;
2 |
3 | import org.json.JSONObject;
4 |
5 | import java.util.function.Consumer;
6 |
7 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageFields.HYPOTHESIS_TEXT;
8 |
9 | final class SpeechHypothesisMessage {
10 | private SpeechHypothesisMessage() {}
11 |
12 | static void handle(JSONObject message, Consumer onHypothesis) {
13 | if (onHypothesis == null) {
14 | return;
15 | }
16 |
17 | String displayText = message.getString(HYPOTHESIS_TEXT);
18 | onHypothesis.accept(displayText);
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/lifecycle/SpeechPhraseMessage.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.lifecycle;
2 |
3 | import org.apache.log4j.Logger;
4 | import org.json.JSONObject;
5 |
6 | import java.util.function.Consumer;
7 |
8 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageFields.*;
9 |
10 | final class SpeechPhraseMessage {
11 | private static final Logger log = Logger.getLogger(SpeechPhraseMessage.class);
12 | private SpeechPhraseMessage() {}
13 |
14 | static void handle(JSONObject message, Consumer onResult) {
15 | if (!isSuccess(message)) {
16 | return;
17 | }
18 |
19 | String displayText = message.getString(DISPLAY_TEXT);
20 | onResult.accept(displayText);
21 | }
22 |
23 | private static boolean isSuccess(JSONObject message) {
24 | String status = message.getString(RECOGNITION_STATUS);
25 |
26 | if (END_OF_DICTATION_STATUS.equalsIgnoreCase(status) ||
27 | END_OF_DICTATION_SILENCE_STATUS.equalsIgnoreCase(status)) {
28 | log.info("Detected end of speech");
29 | return false;
30 | }
31 |
32 | if (!SUCCESS_STATUS.equalsIgnoreCase(status)) {
33 | log.warn("Unable to recognize audio: " + message);
34 | return false;
35 | }
36 |
37 | return true;
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/lifecycle/TurnEndMessage.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.lifecycle;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.websocket.MessageSender;
4 |
5 | import java.util.concurrent.CountDownLatch;
6 |
7 | final class TurnEndMessage {
8 | private TurnEndMessage() {}
9 |
10 | static void handle(MessageSender sender, CountDownLatch turnEndLatch, Runnable onTurnEnd) {
11 | try {
12 | if (onTurnEnd != null) {
13 | onTurnEnd.run();
14 | }
15 | } finally {
16 | sender.sendTelemetry();
17 | turnEndLatch.countDown();
18 | }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/lifecycle/TurnStartMessage.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.lifecycle;
2 |
3 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageFields.*;
4 |
5 | import java.util.function.Consumer;
6 |
7 | import org.json.JSONObject;;
8 |
9 | final class TurnStartMessage {
10 | private TurnStartMessage() {}
11 |
12 | static void handle(JSONObject message, Consumer onTurnStart) {
13 | if (onTurnStart == null) {
14 | return;
15 | }
16 |
17 | JSONObject context = message.getJSONObject(CONTEXT);
18 | String serviceTag = context.getString(SERVICE_TAG);
19 | onTurnStart.accept(serviceTag);
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/messages/AudioEndMessageCreator.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.messages;
2 |
3 | import java.nio.ByteBuffer;
4 |
5 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceContentTypes.WAV;
6 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServicePaths.AUDIO;
7 |
8 | public final class AudioEndMessageCreator {
9 | private AudioEndMessageCreator() {}
10 |
11 | private static final BinaryMessageCreator binaryMessageCreator = new BinaryMessageCreator(false);
12 |
13 | public static ByteBuffer createAudioEndMessage(String requestId) {
14 | return binaryMessageCreator.createBinaryMessage(AUDIO, requestId, WAV, new byte[0], 0);
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/messages/BinaryMessageCreator.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.messages;
2 |
3 | import java.nio.ByteBuffer;
4 |
5 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceLimitations.NUM_CHANNELS;
6 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceLimitations.SAMPLE_RATE;
7 | import static com.github.catalystcode.fortis.speechtotext.messages.HeaderCreator.addHeaders;
8 | import static com.github.catalystcode.fortis.speechtotext.utils.RiffHeader.RIFF_HEADER_LENGTH;
9 | import static com.github.catalystcode.fortis.speechtotext.utils.RiffHeader.putRiffHeader;
10 | import static java.nio.ByteBuffer.allocate;
11 | import static java.nio.charset.StandardCharsets.UTF_8;
12 |
13 | public class BinaryMessageCreator {
14 | private boolean isFirstMessage;
15 |
16 | public BinaryMessageCreator() {
17 | this(true);
18 | }
19 |
20 | BinaryMessageCreator(boolean isFirstMessage) {
21 | this.isFirstMessage = isFirstMessage;
22 | }
23 |
24 | public ByteBuffer createBinaryMessage(String path, String requestId, String contentType, byte[] wavBytes, int count) {
25 | byte[] headers = formatHeaders(path, requestId, contentType);
26 | ByteBuffer buf = allocateBuffer(count, headers.length);
27 | putHeader(headers, buf);
28 | putContent(wavBytes, count, buf);
29 | updateState();
30 | return buf;
31 | }
32 |
33 | private static byte[] formatHeaders(String path, String requestId, String contentType) {
34 | return addHeaders(new StringBuilder(), path, requestId, contentType).toString().getBytes(UTF_8);
35 | }
36 |
37 | private void putContent(byte[] wavBytes, int count, ByteBuffer buf) {
38 | if (count <= 0) {
39 | return;
40 | }
41 |
42 | int offset = isFirstMessage ? RIFF_HEADER_LENGTH : 0;
43 | int length = isFirstMessage ? count - RIFF_HEADER_LENGTH : count;
44 | if (isFirstMessage) putRiffHeader(buf, SAMPLE_RATE, NUM_CHANNELS);
45 | buf.put(wavBytes, offset, length);
46 | }
47 |
48 | private void updateState() {
49 | if (isFirstMessage) {
50 | isFirstMessage = false;
51 | }
52 | }
53 |
54 | private static void putHeader(byte[] header, ByteBuffer buf) {
55 | buf.putShort((short)header.length);
56 | buf.put(header);
57 | }
58 |
59 | private ByteBuffer allocateBuffer(int numWavBytes, int numHeaderBytes) {
60 | int bufSize = 2 + numHeaderBytes;
61 | if (isFirstMessage) bufSize += RIFF_HEADER_LENGTH;
62 | bufSize += numWavBytes;
63 | return allocate(bufSize);
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/messages/HeaderCreator.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.messages;
2 |
3 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageHeaders.*;
4 | import static com.github.catalystcode.fortis.speechtotext.utils.ProtocolUtils.newTimestamp;
5 |
6 | final class HeaderCreator {
7 | private HeaderCreator() {}
8 |
9 | static StringBuilder addHeaders(StringBuilder sb, String path, String requestId, String contentType) {
10 | sb.append(PATH).append(": ").append(path).append(HEADER_DELIM);
11 | sb.append(REQUEST_ID).append(": ").append(requestId).append(HEADER_DELIM);
12 | sb.append(TIMESTAMP).append(": ").append(newTimestamp()).append(HEADER_DELIM);
13 | sb.append(CONTENT_TYPE).append(": ").append(contentType).append(HEADER_DELIM);
14 | return sb;
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/messages/MessageParser.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.messages;
2 |
3 | import org.json.JSONObject;
4 |
5 | import java.util.HashMap;
6 | import java.util.Map;
7 |
8 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageHeaders.BODY_DELIM;
9 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageHeaders.HEADER_DELIM;
10 |
11 | public final class MessageParser {
12 | private MessageParser() {}
13 |
14 | public static Map parseHeaders(String message) {
15 | String[] parts = message.split(BODY_DELIM);
16 | if (parts.length != 2) {
17 | throw new IllegalArgumentException("Message '" + message + "' does not have header and body");
18 | }
19 | String[] headerLines = parts[0].split(HEADER_DELIM);
20 | Map headers = new HashMap<>(headerLines.length);
21 | for (String headerLine : headerLines) {
22 | String[] headerParts = headerLine.split(":");
23 | if (headerParts.length < 2) {
24 | throw new IllegalArgumentException("Header '" + headerLine + "' does not have a name and value");
25 | }
26 | String headerName = headerParts[0].trim();
27 | StringBuilder headerValueBuilder = new StringBuilder();
28 | for (int i = 1; i < headerParts.length; i++) {
29 | headerValueBuilder.append(headerParts[i]).append(':');
30 | }
31 | headerValueBuilder.setLength(headerValueBuilder.length() - 1);
32 | String headerValue = headerValueBuilder.toString().trim();
33 | headers.put(headerName, headerValue);
34 | }
35 | return headers;
36 | }
37 |
38 | public static JSONObject parseBody(String message) {
39 | String[] parts = message.split(BODY_DELIM);
40 | if (parts.length != 2) {
41 | throw new IllegalArgumentException("Message '" + message + "' does not have header and body");
42 | }
43 | String content = parts[1];
44 | return new JSONObject(content);
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/messages/TextMessageCreator.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.messages;
2 |
3 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageHeaders.HEADER_DELIM;
4 | import static com.github.catalystcode.fortis.speechtotext.messages.HeaderCreator.addHeaders;
5 |
6 | public final class TextMessageCreator {
7 | private TextMessageCreator() {}
8 |
9 | public static String createTextMessage(String path, String requestId, String contentType, String message) {
10 | return addHeaders(new StringBuilder(), path, requestId, contentType).append(HEADER_DELIM).append(message).toString();
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/telemetry/AudioTelemetry.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.telemetry;
2 |
3 | import java.util.concurrent.ConcurrentHashMap;
4 | import java.util.concurrent.ConcurrentMap;
5 |
6 | import static com.github.catalystcode.fortis.speechtotext.utils.ProtocolUtils.newTimestamp;
7 |
8 | public final class AudioTelemetry {
9 | private static final ConcurrentMap POOL = new ConcurrentHashMap<>();
10 |
11 | private String audioStarted;
12 | private String audioEnded;
13 | private String audioErrored;
14 |
15 | private AudioTelemetry() {}
16 |
17 | public void recordAudioStarted() {
18 | if (audioStarted == null) {
19 | audioStarted = newTimestamp();
20 | }
21 | }
22 |
23 | public void recordAudioEnded() {
24 | if (audioEnded == null) {
25 | audioEnded = newTimestamp();
26 | }
27 | }
28 |
29 | public void recordAudioFailed(String message) {
30 | audioEnded = newTimestamp();
31 | audioErrored = message;
32 | }
33 |
34 | public String getAudioErrored() {
35 | return audioErrored;
36 | }
37 |
38 | public String getAudioEnded() {
39 | return audioEnded;
40 | }
41 |
42 | public String getAudioStarted() {
43 | return audioStarted;
44 | }
45 |
46 | public static AudioTelemetry forId(String requestId) {
47 | AudioTelemetry instance = POOL.get(requestId);
48 | if (instance == null) {
49 | AudioTelemetry newInstance = new AudioTelemetry();
50 | instance = POOL.putIfAbsent(requestId, newInstance);
51 | if (instance == null) {
52 | instance = newInstance;
53 | }
54 | }
55 | return instance;
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/telemetry/CallsTelemetry.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.telemetry;
2 |
3 | import java.util.Map;
4 | import java.util.Queue;
5 | import java.util.concurrent.ConcurrentHashMap;
6 | import java.util.concurrent.ConcurrentLinkedQueue;
7 | import java.util.concurrent.ConcurrentMap;
8 |
9 | import static com.github.catalystcode.fortis.speechtotext.utils.ProtocolUtils.newTimestamp;
10 |
11 | public final class CallsTelemetry {
12 | private static final ConcurrentMap POOL = new ConcurrentHashMap<>();
13 |
14 | private final ConcurrentMap> callTimestamps = new ConcurrentHashMap<>();
15 |
16 | private CallsTelemetry() {}
17 |
18 | public void recordCall(String endpoint) {
19 | String now = newTimestamp();
20 | Queue timestamps = callTimestamps.get(endpoint);
21 | if (timestamps == null) {
22 | Queue newTimestamps = new ConcurrentLinkedQueue<>();
23 | timestamps = callTimestamps.putIfAbsent(endpoint, newTimestamps);
24 | if (timestamps == null) {
25 | timestamps = newTimestamps;
26 | }
27 | }
28 | timestamps.add(now);
29 | }
30 |
31 | public Map> getCallTimestamps() {
32 | return callTimestamps;
33 | }
34 |
35 | public static CallsTelemetry forId(String requestId) {
36 | CallsTelemetry instance = POOL.get(requestId);
37 | if (instance == null) {
38 | CallsTelemetry newInstance = new CallsTelemetry();
39 | instance = POOL.putIfAbsent(requestId, newInstance);
40 | if (instance == null) {
41 | instance = newInstance;
42 | }
43 | }
44 | return instance;
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/telemetry/ConnectionTelemetry.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.telemetry;
2 |
3 | import java.util.concurrent.ConcurrentHashMap;
4 | import java.util.concurrent.ConcurrentMap;
5 |
6 | import static com.github.catalystcode.fortis.speechtotext.utils.ProtocolUtils.newTimestamp;
7 |
8 | public final class ConnectionTelemetry {
9 | private static final ConcurrentMap POOL = new ConcurrentHashMap<>();
10 |
11 | private String connectionStarted;
12 | private String connectionEstablished;
13 | private String connectionErrored;
14 |
15 | private ConnectionTelemetry() {}
16 |
17 | public void recordConnectionStarted() {
18 | if (connectionStarted == null) {
19 | connectionStarted = newTimestamp();
20 | }
21 | }
22 |
23 | public void recordConnectionEstablished() {
24 | if (connectionEstablished == null) {
25 | connectionEstablished = newTimestamp();
26 | }
27 | }
28 |
29 | public void recordConnectionFailed(String message) {
30 | connectionEstablished = newTimestamp();
31 | connectionErrored = message;
32 | }
33 |
34 | public String getConnectionErrored() {
35 | return connectionErrored;
36 | }
37 |
38 | public String getConnectionEstablished() {
39 | return connectionEstablished;
40 | }
41 |
42 | public String getConnectionStarted() {
43 | return connectionStarted;
44 | }
45 |
46 | public static ConnectionTelemetry forId(String connectionId) {
47 | ConnectionTelemetry instance = POOL.get(connectionId);
48 | if (instance == null) {
49 | ConnectionTelemetry newInstance = new ConnectionTelemetry();
50 | instance = POOL.putIfAbsent(connectionId, newInstance);
51 | if (instance == null) {
52 | instance = newInstance;
53 | }
54 | }
55 | return instance;
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/utils/Environment.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.utils;
2 |
3 | import static com.github.catalystcode.fortis.speechtotext.constants.EnvironmentVariables.*;
4 | import static com.github.catalystcode.fortis.speechtotext.utils.Units.KB;
5 | import static java.lang.Integer.parseInt;
6 |
7 | public final class Environment {
8 | private Environment() {}
9 |
10 | public static String getSpeechPlatformHost() {
11 | return getenv(HOST, "wss://speech.platform.bing.com");
12 | }
13 |
14 | public static String getLibraryVersion() {
15 | return getenv(LIBRARY_VERSION, "0.0.1");
16 | }
17 |
18 | public static String getDeviceManufacturer() {
19 | return getenv(DEVICE_MANUFACTURER, "SpeechToText-Websockets-Java");
20 | }
21 |
22 | public static String getDeviceModel() {
23 | return getenv(DEVICE_MODEL, "SpeechToText-Websockets-Java");
24 | }
25 |
26 | public static String getDeviceVersion() {
27 | return getenv(DEVICE_VERSION, "0.0.1");
28 | }
29 |
30 | public static int getMp3BufferSize() {
31 | return parseInt(getenv(MP3_BUFFER_SIZE, String.valueOf(8 * KB)));
32 | }
33 |
34 | private static String getenv(String key, String defaultValue) {
35 | String value = System.getenv(key);
36 | return value != null ? value : defaultValue;
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/utils/ProtocolUtils.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.utils;
2 |
3 | import static java.time.ZonedDateTime.now;
4 | import static java.time.format.DateTimeFormatter.ISO_INSTANT;
5 | import static java.util.UUID.randomUUID;
6 |
7 | public final class ProtocolUtils {
8 | private ProtocolUtils() {}
9 |
10 | public static String newGuid() {
11 | return randomUUID().toString().replace("-", "");
12 | }
13 |
14 | public static String newTimestamp() {
15 | return now().format(ISO_INSTANT);
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/utils/RiffHeader.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.utils;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.nio.ByteBuffer;
6 |
7 | import static java.nio.ByteBuffer.wrap;
8 | import static java.nio.ByteOrder.BIG_ENDIAN;
9 | import static java.nio.ByteOrder.LITTLE_ENDIAN;
10 |
11 | @SuppressWarnings({"unused", "WeakerAccess"})
12 | public final class RiffHeader {
13 | public static final int RIFF_HEADER_LENGTH = 44;
14 | private static final int FORMAT_WAVE = 0x57415645;
15 | private static final int CHUNKID_RIFF = 0x52494646;
16 | private static final int SUBCHUNK1ID_FMT = 0x666d7420;
17 | private static final int SUBCHUNK2ID_DATA = 0x64617461;
18 | private static final short AUDIO_FORMAT_PCM = 1;
19 |
20 | public final int chunkId;
21 | public final int chunkSize;
22 | public final int format;
23 | public final int subChunk1ID;
24 | public final int subChunk1Size;
25 | public final short audioFormat;
26 | public final short numChannels;
27 | public final int sampleRate;
28 | public final int byteRate;
29 | public final short blockAlign;
30 | public final short bitsPerSample;
31 | public final int subChunk2Id;
32 | public final int subChunk2Size;
33 |
34 | public RiffHeader(byte[] wavBytes) {
35 | ByteBuffer waveHeader = wrap(wavBytes, 0, RIFF_HEADER_LENGTH);
36 |
37 | waveHeader.order(BIG_ENDIAN);
38 | chunkId = waveHeader.getInt();
39 |
40 | waveHeader.order(LITTLE_ENDIAN);
41 | chunkSize = waveHeader.getInt();
42 |
43 | waveHeader.order(BIG_ENDIAN);
44 | format = waveHeader.getInt();
45 | subChunk1ID = waveHeader.getInt();
46 |
47 | waveHeader.order(LITTLE_ENDIAN);
48 | subChunk1Size = waveHeader.getInt();
49 | audioFormat = waveHeader.getShort();
50 | numChannels = waveHeader.getShort();
51 | sampleRate = waveHeader.getInt();
52 | byteRate = waveHeader.getInt();
53 | blockAlign = waveHeader.getShort();
54 | bitsPerSample = waveHeader.getShort();
55 |
56 | waveHeader.order(BIG_ENDIAN);
57 | subChunk2Id = waveHeader.getInt();
58 |
59 | waveHeader.order(LITTLE_ENDIAN);
60 | subChunk2Size = waveHeader.getInt();
61 | }
62 |
63 | public static void putRiffHeader(ByteBuffer buf, int sampleRate, short numChannels) {
64 | int chunkSize = 0;
65 | int subChunk1Size = 16;
66 | int subChunk2Size = 0;
67 | short bitsPerSample = 16;
68 | int bytesPerSample = bitsPerSample / 8;
69 | int byteRate = sampleRate * numChannels * bytesPerSample;
70 | short blockAlign = (short)(numChannels * bytesPerSample);
71 |
72 | buf.order(BIG_ENDIAN);
73 | buf.putInt(CHUNKID_RIFF);
74 | buf.order(LITTLE_ENDIAN);
75 | buf.putInt(chunkSize);
76 | buf.order(BIG_ENDIAN);
77 | buf.putInt(FORMAT_WAVE);
78 | buf.putInt(SUBCHUNK1ID_FMT);
79 | buf.order(LITTLE_ENDIAN);
80 | buf.putInt(subChunk1Size);
81 | buf.putShort(AUDIO_FORMAT_PCM);
82 | buf.putShort(numChannels);
83 | buf.putInt(sampleRate);
84 | buf.putInt(byteRate);
85 | buf.putShort(blockAlign);
86 | buf.putShort(bitsPerSample);
87 | buf.order(BIG_ENDIAN);
88 | buf.putInt(SUBCHUNK2ID_DATA);
89 | buf.order(LITTLE_ENDIAN);
90 | buf.putInt(subChunk2Size);
91 | }
92 |
93 | public static RiffHeader fromStream(InputStream wavStream) throws IOException {
94 | byte[] header = new byte[RIFF_HEADER_LENGTH];
95 | int read = wavStream.read(header);
96 | if (read != RIFF_HEADER_LENGTH) {
97 | throw new IOException("Unable to read " + RIFF_HEADER_LENGTH + " bytes of RIFF header from stream");
98 | }
99 | return new RiffHeader(header);
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/utils/Units.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.utils;
2 |
3 | public final class Units {
4 | private Units() {}
5 |
6 | public static final int KB = 1024;
7 | }
8 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/websocket/MessageSender.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.websocket;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.messages.BinaryMessageCreator;
4 | import com.github.catalystcode.fortis.speechtotext.telemetry.AudioTelemetry;
5 | import com.github.catalystcode.fortis.speechtotext.telemetry.CallsTelemetry;
6 | import com.github.catalystcode.fortis.speechtotext.telemetry.ConnectionTelemetry;
7 | import com.github.catalystcode.fortis.speechtotext.utils.RiffHeader;
8 | import org.apache.log4j.Logger;
9 |
10 | import javax.sound.sampled.AudioFormat;
11 | import javax.sound.sampled.AudioInputStream;
12 | import javax.sound.sampled.UnsupportedAudioFileException;
13 | import java.io.IOException;
14 | import java.io.InputStream;
15 | import java.nio.ByteBuffer;
16 |
17 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceContentTypes.JSON;
18 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceContentTypes.WAV;
19 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceLimitations.*;
20 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServicePaths.*;
21 | import static com.github.catalystcode.fortis.speechtotext.messages.AudioEndMessageCreator.createAudioEndMessage;
22 | import static com.github.catalystcode.fortis.speechtotext.messages.TextMessageCreator.createTextMessage;
23 | import static com.github.catalystcode.fortis.speechtotext.utils.ProtocolUtils.newGuid;
24 | import static javax.sound.sampled.AudioFormat.Encoding.PCM_SIGNED;
25 | import static javax.sound.sampled.AudioSystem.getAudioInputStream;
26 |
27 | public abstract class MessageSender {
28 | private static final Logger log = Logger.getLogger(MessageSender.class);
29 |
30 | private final String connectionId;
31 | private final String requestId;
32 | private final BinaryMessageCreator binaryMessageCreator;
33 |
34 | protected MessageSender(String connectionId) {
35 | this.connectionId = connectionId;
36 | this.requestId = newGuid();
37 | this.binaryMessageCreator = new BinaryMessageCreator();
38 | }
39 |
40 | public final void sendConfiguration() {
41 | String config = new PlatformInfo().toJson();
42 | String configMessage = createTextMessage(SPEECH_CONFIG, requestId, JSON, config);
43 | sendTextMessage(configMessage);
44 | log.info("Sent speech config: " + config);
45 | }
46 |
47 | public final void sendAudio(InputStream wavStream) {
48 | AudioInputStream pcmStream;
49 | try {
50 | pcmStream = adjustAudioEncoding(wavStream);
51 | } catch (UnsupportedAudioFileException | IOException ex) {
52 | log.error("Problem adjusting audio", ex);
53 | return;
54 | }
55 | send16khzMonoPcmAudio(pcmStream);
56 | }
57 |
58 | private static AudioInputStream adjustAudioEncoding(InputStream sourceWavStream) throws UnsupportedAudioFileException, IOException {
59 | AudioInputStream audioPcm;
60 | if (sourceWavStream instanceof AudioInputStream) {
61 | audioPcm = (AudioInputStream) sourceWavStream;
62 | } else {
63 | audioPcm = getAudioInputStream(sourceWavStream);
64 | }
65 | AudioInputStream audio16khz = to16khz(audioPcm);
66 | AudioInputStream audio16khzMono = toMono(audio16khz);
67 | AudioInputStream audio16khzMonoPcm = toPcm(audio16khzMono);
68 | skipRiffHeader(audio16khzMonoPcm);
69 | return audio16khzMonoPcm;
70 | }
71 |
72 | private static AudioInputStream toPcm(AudioInputStream sourceAudioStream) {
73 | AudioFormat sourceFormat = sourceAudioStream.getFormat();
74 | return getAudioInputStream(new AudioFormat(
75 | PCM_SIGNED,
76 | sourceFormat.getSampleRate(),
77 | sourceFormat.getSampleSizeInBits(),
78 | sourceFormat.getChannels(),
79 | sourceFormat.getFrameSize(),
80 | sourceFormat.getFrameRate(),
81 | sourceFormat.isBigEndian()), sourceAudioStream);
82 | }
83 |
84 | private static AudioInputStream toMono(AudioInputStream sourceAudioStream) {
85 | AudioFormat sourceFormat = sourceAudioStream.getFormat();
86 | return getAudioInputStream(new AudioFormat(
87 | sourceFormat.getEncoding(),
88 | sourceFormat.getSampleRate(),
89 | sourceFormat.getSampleSizeInBits(),
90 | NUM_CHANNELS,
91 | sourceFormat.getFrameSize(),
92 | sourceFormat.getFrameRate(),
93 | sourceFormat.isBigEndian()), sourceAudioStream);
94 | }
95 |
96 | private static AudioInputStream to16khz(AudioInputStream sourceAudioStream) {
97 | AudioFormat sourceFormat = sourceAudioStream.getFormat();
98 | return getAudioInputStream(new AudioFormat(
99 | sourceFormat.getEncoding(),
100 | SAMPLE_RATE,
101 | sourceFormat.getSampleSizeInBits(),
102 | sourceFormat.getChannels(),
103 | sourceFormat.getFrameSize(),
104 | sourceFormat.getFrameRate(),
105 | sourceFormat.isBigEndian()), sourceAudioStream);
106 | }
107 |
108 | private static void skipRiffHeader(InputStream wavStream) throws IOException {
109 | RiffHeader.fromStream(wavStream);
110 | }
111 |
112 | private void send16khzMonoPcmAudio(InputStream wavStream) {
113 | AudioTelemetry audioTelemetry = AudioTelemetry.forId(requestId);
114 | audioTelemetry.recordAudioStarted();
115 | try {
116 | byte[] buf = new byte[MAX_BYTES_PER_AUDIO_CHUNK];
117 | int chunksSent = 0;
118 | int read;
119 | while ((read = wavStream.read(buf)) != -1) {
120 | ByteBuffer audioChunkMessage = binaryMessageCreator.createBinaryMessage(AUDIO, requestId, WAV, buf, read);
121 | sendBinaryMessage(audioChunkMessage);
122 | chunksSent++;
123 | }
124 | log.info("Sent " + chunksSent + " audio chunks");
125 | } catch (Exception ex) {
126 | audioTelemetry.recordAudioFailed(ex.getMessage());
127 | throw new RuntimeException(ex);
128 | }
129 | }
130 |
131 | public final void sendAudioEnd() {
132 | AudioTelemetry audioTelemetry = AudioTelemetry.forId(requestId);
133 | ByteBuffer audioEndMessage = createAudioEndMessage(requestId);
134 | sendBinaryMessage(audioEndMessage);
135 | log.debug("Sent explicit end-of-audio marker");
136 | audioTelemetry.recordAudioEnded();
137 | }
138 |
139 | public final void sendTelemetry() {
140 | CallsTelemetry callsTelemetry = CallsTelemetry.forId(requestId);
141 | ConnectionTelemetry connectionTelemetry = ConnectionTelemetry.forId(connectionId);
142 | AudioTelemetry audioTelemetry = AudioTelemetry.forId(requestId);
143 | String telemetry = new TelemetryInfo(connectionId, callsTelemetry, connectionTelemetry, audioTelemetry).toJson();
144 | String telemetryMessage = createTextMessage(TELEMETRY, requestId, JSON, telemetry);
145 | sendTextMessage(telemetryMessage);
146 | log.info("Sent telemetry: " + telemetry);
147 | }
148 |
149 | protected abstract void sendBinaryMessage(ByteBuffer message);
150 | protected abstract void sendTextMessage(String message);
151 | }
152 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/websocket/PlatformInfo.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.websocket;
2 |
3 | import org.json.JSONObject;
4 |
5 | import static com.github.catalystcode.fortis.speechtotext.utils.Environment.*;
6 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceSpeechConfig.*;
7 | import static java.lang.System.getProperty;
8 |
9 | class PlatformInfo {
10 | String toJson() {
11 | JSONObject json = new JSONObject();
12 | json.put(CONTEXT, createContext());
13 | return json.toString();
14 | }
15 |
16 | private JSONObject createContext() {
17 | JSONObject json = new JSONObject();
18 | json.put(SYSTEM, createSystem());
19 | json.put(OS, createOs());
20 | json.put(DEVICE, createDevice());
21 | return json;
22 | }
23 |
24 | private JSONObject createSystem() {
25 | JSONObject json = new JSONObject();
26 | json.put(SYSTEM_VERSION, getLibraryVersion());
27 | return json;
28 | }
29 |
30 | private JSONObject createOs() {
31 | JSONObject json = new JSONObject();
32 | json.put(OS_PLATFORM, getProperty("os.name").split(" ")[0]);
33 | json.put(OS_NAME, getProperty("os.name"));
34 | json.put(OS_VERSION, getProperty("os.version"));
35 | return json;
36 | }
37 |
38 | private JSONObject createDevice() {
39 | JSONObject json = new JSONObject();
40 | json.put(DEVICE_MANUFACTURER, getDeviceManufacturer());
41 | json.put(DEVICE_MODEL, getDeviceModel());
42 | json.put(DEVICE_VERSION, getDeviceVersion());
43 | return json;
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/websocket/SpeechServiceClient.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.websocket;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.config.SpeechServiceConfig;
4 | import com.github.catalystcode.fortis.speechtotext.lifecycle.MessageReceiver;
5 |
6 | import java.util.concurrent.CountDownLatch;
7 |
8 | public interface SpeechServiceClient {
9 | MessageSender start(SpeechServiceConfig config, MessageReceiver receiver) throws Exception;
10 | void stop();
11 | void awaitEnd() throws InterruptedException;
12 | CountDownLatch getEndLatch();
13 | }
14 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/websocket/TelemetryInfo.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.websocket;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.telemetry.AudioTelemetry;
4 | import com.github.catalystcode.fortis.speechtotext.telemetry.CallsTelemetry;
5 | import com.github.catalystcode.fortis.speechtotext.telemetry.ConnectionTelemetry;
6 | import org.json.JSONObject;
7 |
8 | import java.util.ArrayList;
9 | import java.util.Collection;
10 | import java.util.Map;
11 | import java.util.Queue;
12 |
13 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceLimitations.MAX_ERROR_MESSAGE_NUM_CHARACTERS;
14 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMetrics.*;
15 |
16 | class TelemetryInfo {
17 | private final String connectionId;
18 | private final CallsTelemetry callsTelemetry;
19 | private final ConnectionTelemetry connectionTelemetry;
20 | private final AudioTelemetry audioTelemetry;
21 |
22 | TelemetryInfo(String connectionId, CallsTelemetry callsTelemetry, ConnectionTelemetry connectionTelemetry, AudioTelemetry audioTelemetry) {
23 | this.connectionId = connectionId;
24 | this.callsTelemetry = callsTelemetry;
25 | this.connectionTelemetry = connectionTelemetry;
26 | this.audioTelemetry = audioTelemetry;
27 | }
28 |
29 | String toJson() {
30 | JSONObject json = new JSONObject();
31 | putReceivedMessages(json);
32 | putMetrics(json);
33 | return json.toString();
34 | }
35 |
36 | private void putMetrics(JSONObject json) {
37 | Collection metrics = new ArrayList<>();
38 | metrics.add(createConnectionMetric());
39 | metrics.add(createMicrophoneMetric());
40 | json.put(METRICS, metrics);
41 | }
42 |
43 | private void putReceivedMessages(JSONObject json) {
44 | Collection receivedMessages = new ArrayList<>();
45 | for (Map.Entry> entry : callsTelemetry.getCallTimestamps().entrySet()) {
46 | String endpoint = entry.getKey();
47 | Queue calls = entry.getValue();
48 | JSONObject receivedMessage = new JSONObject();
49 | if (calls.size() > 1) {
50 | receivedMessage.put(endpoint, calls);
51 | } else {
52 | receivedMessage.put(endpoint, calls.peek());
53 | }
54 | receivedMessages.add(receivedMessage);
55 | }
56 | json.put(RECEIVED_MESSAGES, receivedMessages);
57 | }
58 |
59 | private JSONObject createConnectionMetric() {
60 | JSONObject metric = new JSONObject();
61 | metric.put(NAME, CONNECTION_METRIC);
62 | metric.put(ID, connectionId);
63 | metric.put(START, connectionTelemetry.getConnectionStarted());
64 | metric.put(END, connectionTelemetry.getConnectionEstablished());
65 | addError(metric, connectionTelemetry.getConnectionErrored());
66 | return metric;
67 | }
68 |
69 | private JSONObject createMicrophoneMetric() {
70 | JSONObject metric = new JSONObject();
71 | metric.put(NAME, MICROPHONE_METRIC);
72 | metric.put(START, audioTelemetry.getAudioStarted());
73 | metric.put(END, audioTelemetry.getAudioEnded());
74 | addError(metric, audioTelemetry.getAudioErrored());
75 | return metric;
76 | }
77 |
78 | private void addError(JSONObject metric, String error) {
79 | if (error != null) {
80 | metric.put(ERROR, error.substring(0, MAX_ERROR_MESSAGE_NUM_CHARACTERS));
81 | }
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/websocket/nv/NvMessageReceiver.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.websocket.nv;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.lifecycle.MessageReceiver;
4 | import com.github.catalystcode.fortis.speechtotext.telemetry.ConnectionTelemetry;
5 | import com.neovisionaries.ws.client.WebSocket;
6 | import com.neovisionaries.ws.client.WebSocketAdapter;
7 | import com.neovisionaries.ws.client.WebSocketException;
8 | import com.neovisionaries.ws.client.WebSocketFrame;
9 | import org.apache.log4j.Logger;
10 |
11 | import java.util.List;
12 | import java.util.Map;
13 | import java.util.concurrent.CountDownLatch;
14 |
15 | class NvMessageReceiver extends WebSocketAdapter {
16 | private static final Logger log = Logger.getLogger(NvMessageReceiver.class);
17 | private final CountDownLatch socketCloseLatch;
18 | private final MessageReceiver receiver;
19 | private final ConnectionTelemetry telemetry;
20 |
21 | NvMessageReceiver(CountDownLatch socketCloseLatch, MessageReceiver receiver, ConnectionTelemetry telemetry) {
22 | this.socketCloseLatch = socketCloseLatch;
23 | this.receiver = receiver;
24 | this.telemetry = telemetry;
25 | }
26 |
27 | @Override
28 | public void onConnected(WebSocket websocket, Map> headers) throws Exception {
29 | telemetry.recordConnectionEstablished();
30 | log.debug("Websocket connected");
31 | }
32 |
33 | @Override
34 | public void onConnectError(WebSocket websocket, WebSocketException exception) throws Exception {
35 | telemetry.recordConnectionFailed(exception.getMessage());
36 | log.error("Websocket connection failed", exception);
37 | }
38 |
39 | @Override
40 | public void onTextMessage(WebSocket websocket, String text) throws Exception {
41 | receiver.onMessage(text);
42 | }
43 |
44 | @Override
45 | public void onError(WebSocket websocket, WebSocketException cause) throws Exception {
46 | log.error("Websocket read error", cause);
47 | socketCloseLatch.countDown();
48 | }
49 |
50 | @Override
51 | public void onCloseFrame(WebSocket websocket, WebSocketFrame frame) throws Exception {
52 | int closeCode = frame.getCloseCode();
53 | String closeReason = frame.getCloseReason();
54 |
55 | log.info("Websocket closed with status '" + closeCode + "' and reason '" + closeReason + "'");
56 | socketCloseLatch.countDown();
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/websocket/nv/NvMessageSender.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.websocket.nv;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.websocket.MessageSender;
4 | import com.neovisionaries.ws.client.WebSocket;
5 |
6 | import java.nio.ByteBuffer;
7 |
8 | class NvMessageSender extends MessageSender {
9 | private final WebSocket webSocket;
10 |
11 | NvMessageSender(String connectionId, WebSocket webSocket) {
12 | super(connectionId);
13 | this.webSocket = webSocket;
14 | }
15 |
16 | @Override
17 | protected void sendBinaryMessage(ByteBuffer message) {
18 | webSocket.sendBinary(message.array());
19 | }
20 |
21 | @Override
22 | protected void sendTextMessage(String message) {
23 | webSocket.sendText(message);
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/com/github/catalystcode/fortis/speechtotext/websocket/nv/NvSpeechServiceClient.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.websocket.nv;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.config.SpeechServiceConfig;
4 | import com.github.catalystcode.fortis.speechtotext.lifecycle.MessageReceiver;
5 | import com.github.catalystcode.fortis.speechtotext.telemetry.ConnectionTelemetry;
6 | import com.github.catalystcode.fortis.speechtotext.websocket.MessageSender;
7 | import com.github.catalystcode.fortis.speechtotext.websocket.SpeechServiceClient;
8 | import com.neovisionaries.ws.client.WebSocket;
9 | import com.neovisionaries.ws.client.WebSocketFactory;
10 |
11 | import java.util.concurrent.CountDownLatch;
12 |
13 | import static com.github.catalystcode.fortis.speechtotext.utils.ProtocolUtils.newGuid;
14 |
15 | public class NvSpeechServiceClient implements SpeechServiceClient {
16 | private final CountDownLatch socketCloseLatch;
17 | private WebSocket webSocket;
18 |
19 | public NvSpeechServiceClient() {
20 | this.socketCloseLatch = new CountDownLatch(1);
21 | }
22 |
23 | @Override
24 | public MessageSender start(SpeechServiceConfig config, MessageReceiver receiver) throws Exception {
25 | String connectionId = newGuid();
26 | ConnectionTelemetry telemetry = ConnectionTelemetry.forId(connectionId);
27 |
28 | WebSocketFactory factory = new WebSocketFactory();
29 | webSocket = factory.createSocket(config.getConnectionUrl(connectionId));
30 | webSocket.addListener(new NvMessageReceiver(socketCloseLatch, receiver, telemetry));
31 | telemetry.recordConnectionStarted();
32 | webSocket.connect();
33 | return new NvMessageSender(connectionId, webSocket);
34 | }
35 |
36 | @Override
37 | public void stop() {
38 | webSocket.disconnect();
39 | }
40 |
41 | @Override
42 | public void awaitEnd() throws InterruptedException {
43 | socketCloseLatch.await();
44 | }
45 |
46 | @Override
47 | public CountDownLatch getEndLatch() {
48 | return socketCloseLatch;
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/test/java/com/github/catalystcode/fortis/speechtotext/messages/MessageParserTest.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.messages;
2 |
3 | import org.json.JSONObject;
4 | import org.junit.jupiter.api.Test;
5 |
6 | import java.util.Map;
7 |
8 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageHeaders.CONTENT_TYPE;
9 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageHeaders.PATH;
10 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMessageHeaders.REQUEST_ID;
11 | import static org.junit.jupiter.api.Assertions.assertEquals;
12 |
13 | class MessageParserTest {
14 | private static final String turnStartMessage = "" +
15 | "X-RequestId:e7a1b5d70b814aab8e5f43d9bc3fbf96\r\n" +
16 | "Content-Type:application/json; charset=utf-8\r\n" +
17 | "Path: turn.start\r\n" +
18 | "\r\n" +
19 | "{\r\n" +
20 | " \"context\": {\r\n" +
21 | " \"serviceTag\": \"04319a8c660a4d1e8b0ba640d9b9c6ed\"\r\n" +
22 | " }\r\n" +
23 | "}";
24 |
25 | @Test
26 | void parseHeaders() {
27 | Map headers = MessageParser.parseHeaders(turnStartMessage);
28 | assertEquals(3, headers.size());
29 | assertEquals("turn.start", headers.get(PATH));
30 | assertEquals("application/json; charset=utf-8", headers.get(CONTENT_TYPE));
31 | assertEquals("e7a1b5d70b814aab8e5f43d9bc3fbf96", headers.get(REQUEST_ID));
32 | }
33 |
34 | @Test
35 | void parseBody() {
36 | JSONObject body = MessageParser.parseBody(turnStartMessage);
37 | assertEquals("04319a8c660a4d1e8b0ba640d9b9c6ed", body.getJSONObject("context").getString("serviceTag"));
38 | }
39 | }
--------------------------------------------------------------------------------
/src/test/java/com/github/catalystcode/fortis/speechtotext/websocket/PlatformInfoTest.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.websocket;
2 |
3 | import org.junit.jupiter.api.Test;
4 |
5 | import static org.junit.jupiter.api.Assertions.*;
6 |
7 | class PlatformInfoTest {
8 | @Test
9 | void canBeConvertedToJson() {
10 | String config = new PlatformInfo().toJson();
11 | assertNotNull(config);
12 | assertNotEquals("", config);
13 | assertNotEquals("{}", config);
14 | }
15 | }
--------------------------------------------------------------------------------
/src/test/java/com/github/catalystcode/fortis/speechtotext/websocket/TelemetryInfoTest.java:
--------------------------------------------------------------------------------
1 | package com.github.catalystcode.fortis.speechtotext.websocket;
2 |
3 | import com.github.catalystcode.fortis.speechtotext.telemetry.AudioTelemetry;
4 | import com.github.catalystcode.fortis.speechtotext.telemetry.CallsTelemetry;
5 | import com.github.catalystcode.fortis.speechtotext.telemetry.ConnectionTelemetry;
6 | import org.json.JSONArray;
7 | import org.json.JSONObject;
8 | import org.junit.jupiter.api.Test;
9 |
10 | import java.util.HashMap;
11 | import java.util.Map;
12 | import java.util.Set;
13 |
14 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServiceMetrics.*;
15 | import static com.github.catalystcode.fortis.speechtotext.constants.SpeechServicePaths.*;
16 | import static org.junit.jupiter.api.Assertions.assertEquals;
17 | import static org.junit.jupiter.api.Assertions.assertNotEquals;
18 | import static org.junit.jupiter.api.Assertions.assertNotNull;
19 |
20 | class TelemetryInfoTest {
21 | @Test
22 | void canBeConvertedToJson() {
23 | String telemetryJson = setupTelemetry("canBeConvertedToJson");
24 | JSONObject telemetry = new JSONObject(telemetryJson);
25 |
26 | verifyReceivedMessages(telemetry);
27 | verifyMetrics(telemetry);
28 | }
29 |
30 | @Test
31 | void sameTelemetryIsUsedForRequest() {
32 | String testName = "sameTelemetryIsUsedForRequest";
33 | String connectionId = newConnectionId(testName);
34 | String requestId = newRequestId(testName);
35 |
36 | ConnectionTelemetry connectionTelemetry1 = ConnectionTelemetry.forId(connectionId);
37 | ConnectionTelemetry connectionTelemetry2 = ConnectionTelemetry.forId("otherConnectionId");
38 | ConnectionTelemetry connectionTelemetry3 = ConnectionTelemetry.forId(connectionId);
39 | assertNotEquals(connectionTelemetry1, connectionTelemetry2);
40 | assertEquals(connectionTelemetry1, connectionTelemetry3);
41 |
42 | CallsTelemetry callsTelemetry1 = CallsTelemetry.forId(requestId);
43 | CallsTelemetry callsTelemetry2 = CallsTelemetry.forId("otherRequestId");
44 | CallsTelemetry callsTelemetry3 = CallsTelemetry.forId(requestId);
45 | assertNotEquals(callsTelemetry1, callsTelemetry2);
46 | assertEquals(callsTelemetry1, callsTelemetry3);
47 |
48 | CallsTelemetry audioTelemetry1 = CallsTelemetry.forId(requestId);
49 | CallsTelemetry audioTelemetry2 = CallsTelemetry.forId("otherRequestId");
50 | CallsTelemetry audioTelemetry3 = CallsTelemetry.forId(requestId);
51 | assertNotEquals(audioTelemetry1, audioTelemetry2);
52 | assertEquals(audioTelemetry1, audioTelemetry3);
53 | }
54 |
55 | private String setupTelemetry(String testName) {
56 | String connectionId = newConnectionId(testName);
57 | String requestId = newRequestId(testName);
58 | CallsTelemetry callsTelemetry = CallsTelemetry.forId(requestId);
59 | ConnectionTelemetry connectionTelemetry = ConnectionTelemetry.forId(connectionId);
60 | AudioTelemetry audioTelemetry = AudioTelemetry.forId(requestId);
61 |
62 | connectionTelemetry.recordConnectionStarted();
63 | connectionTelemetry.recordConnectionEstablished();
64 | audioTelemetry.recordAudioStarted();
65 | callsTelemetry.recordCall(TURN_START);
66 | callsTelemetry.recordCall(SPEECH_HYPOTHESIS);
67 | callsTelemetry.recordCall(SPEECH_HYPOTHESIS);
68 | callsTelemetry.recordCall(SPEECH_PHRASE);
69 | callsTelemetry.recordCall(SPEECH_END);
70 | callsTelemetry.recordCall(TURN_END);
71 | audioTelemetry.recordAudioEnded();
72 |
73 | return new TelemetryInfo(connectionId, callsTelemetry, connectionTelemetry, audioTelemetry).toJson();
74 | }
75 |
76 | private String newRequestId(String testName) {
77 | return getClass().getName() + "-" + testName + "-requestId";
78 | }
79 |
80 | private String newConnectionId(String testName) {
81 | return getClass().getName() + "-" + testName + "-connectionId";
82 | }
83 |
84 | private void verifyReceivedMessages(JSONObject telemetry) {
85 | JSONArray receivedMessages = telemetry.getJSONArray(RECEIVED_MESSAGES);
86 | for (Object obj : receivedMessages) {
87 | JSONObject receivedMessage = (JSONObject) obj;
88 | Set keys = receivedMessage.keySet();
89 | assertEquals(1, keys.size());
90 | String key = keys.iterator().next();
91 | if (SPEECH_HYPOTHESIS.equalsIgnoreCase(key)) {
92 | JSONArray values = receivedMessage.getJSONArray(key);
93 | assertNotNull(values);
94 | assertEquals(2, values.length());
95 | } else {
96 | String value = receivedMessage.getString(key);
97 | assertNotNull(value);
98 | }
99 | }
100 | }
101 |
102 | private void verifyMetrics(JSONObject telemetry) {
103 | JSONArray metrics = telemetry.getJSONArray(METRICS);
104 | assertEquals(2, metrics.length());
105 | Map parsedMetrics = new HashMap<>();
106 | for (Object obj : metrics) {
107 | JSONObject metric = (JSONObject) obj;
108 | parsedMetrics.put(metric.getString(NAME), metric);
109 | }
110 | assertEquals(2, parsedMetrics.size());
111 | JSONObject connectionMetric = parsedMetrics.get(CONNECTION_METRIC);
112 | JSONObject microphoneMetric = parsedMetrics.get(MICROPHONE_METRIC);
113 | assertNotNull(connectionMetric);
114 | assertNotNull(microphoneMetric);
115 | assertNotNull(connectionMetric.getString(START));
116 | assertNotNull(microphoneMetric.getString(START));
117 | assertNotNull(connectionMetric.getString(END));
118 | assertNotNull(microphoneMetric.getString(END));
119 | }
120 | }
--------------------------------------------------------------------------------
/version.sbt:
--------------------------------------------------------------------------------
1 | version := "0.0.7"
2 |
--------------------------------------------------------------------------------