├── scripts ├── compile.sh └── run.sh ├── src ├── test │ ├── resources │ │ ├── test.wav │ │ ├── test_chunk1.wav │ │ ├── test_chunk2.wav │ │ ├── test_chunk3.wav │ │ ├── test_chunk4.wav │ │ └── test_chunk5.wav │ └── java │ │ ├── TestFiles.java │ │ ├── SessionTest.java │ │ ├── TranscribingTest.java │ │ ├── PropertiesTest.java │ │ ├── ConversionTest.java │ │ ├── MergeTest.java │ │ ├── PerformanceTest.java │ │ └── AccuracyTest.java └── main │ ├── resources │ ├── jetty-logging.properties │ ├── sphinx4http.properties │ ├── log4j.properties │ └── edu │ │ └── cmu │ │ └── sphinx │ │ └── api │ │ └── default.config.xml │ └── java │ └── org │ └── jitsi │ └── sphinx4http │ ├── exceptions │ ├── OperationFailedException.java │ ├── InvalidDirectoryException.java │ ├── NotInDirectoryException.java │ └── ServerConfigurationException.java │ ├── util │ ├── TimeStrings.java │ ├── SessionManager.java │ ├── StreamEater.java │ └── FileManager.java │ └── server │ ├── SphinxConstants.java │ ├── HttpServer.java │ ├── JSONBuilder.java │ ├── AudioTranscriber.java │ ├── Session.java │ ├── AudioFileManipulator.java │ ├── ServerConfiguration.java │ └── RequestHandler.java ├── .gitignore ├── pom.xml ├── README.md └── LICENSE /scripts/compile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | mvn test compile -------------------------------------------------------------------------------- /src/test/resources/test.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jitsi/Sphinx4-HTTP-server/HEAD/src/test/resources/test.wav -------------------------------------------------------------------------------- /src/test/resources/test_chunk1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jitsi/Sphinx4-HTTP-server/HEAD/src/test/resources/test_chunk1.wav -------------------------------------------------------------------------------- /src/test/resources/test_chunk2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jitsi/Sphinx4-HTTP-server/HEAD/src/test/resources/test_chunk2.wav -------------------------------------------------------------------------------- /src/test/resources/test_chunk3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jitsi/Sphinx4-HTTP-server/HEAD/src/test/resources/test_chunk3.wav -------------------------------------------------------------------------------- /src/test/resources/test_chunk4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jitsi/Sphinx4-HTTP-server/HEAD/src/test/resources/test_chunk4.wav -------------------------------------------------------------------------------- /src/test/resources/test_chunk5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jitsi/Sphinx4-HTTP-server/HEAD/src/test/resources/test_chunk5.wav -------------------------------------------------------------------------------- /src/main/resources/jetty-logging.properties: -------------------------------------------------------------------------------- 1 | # Configure Jetty for StdErrLog Logging 2 | org.eclipse.jetty.util.log.class=org.eclipse.jetty.util.log.StrErrLog 3 | # Overall Logging Level is INFO 4 | org.eclipse.jetty.LEVEL=INFO 5 | 6 | -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if [ $# -eq 0 ] 3 | then 4 | mvn exec:java -Dexec.mainClass="org.jitsi.sphinx4http.server.HttpServer" 5 | else 6 | mvn exec:java -Dexec.mainClass="org.jitsi.sphinx4http.server.HttpServer" -Dexec.args="$1" 7 | fi 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # General stuff 7 | *.iml 8 | *.ipr 9 | *.iws 10 | .idea 11 | .DS_Store 12 | .classpath 13 | .ekstazi 14 | .project 15 | .settings 16 | .checkstyle 17 | # Package Files # 18 | *.jar 19 | *.war 20 | *.ear 21 | 22 | # data directories 23 | data/* 24 | log/* 25 | 26 | # compiled directory 27 | target/* 28 | 29 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 30 | hs_err_pid* 31 | -------------------------------------------------------------------------------- /src/main/resources/sphinx4http.properties: -------------------------------------------------------------------------------- 1 | # Edit this file to change the behaviour of this server 2 | # The values displayed in this file right now are also hard-coded as the 3 | # the default values the server should use 4 | 5 | # the port the server should use. 6 | port=8081 7 | 8 | # the absolute path to a ffmpeg executable 9 | ffmpeg_path=/usr/bin/ffmpeg 10 | 11 | # the absolute path to the directory to write files to 12 | # This value defaults to /data in the project root directory 13 | # make sure the specified parent directories exists 14 | #data_folder_path=/edit/to/an/absolute/path/ 15 | 16 | # to make responses be chunked 17 | chunked_response=true 18 | 19 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # set root logging level to info and define console and file logs 2 | log4j.rootLogger = TRACE, stdout, file 3 | 4 | # the console log 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n 9 | 10 | # the rolling file log 11 | log4j.appender.file=org.apache.log4j.RollingFileAppender 12 | log4j.appender.file.File=log/sphinx4-http-server.log 13 | log4j.appender.file.Append=true 14 | log4j.appender.file.layout=org.apache.log4j.PatternLayout 15 | log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/exceptions/OperationFailedException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.exceptions; 20 | 21 | /** 22 | * Exception thrown when a Process does not complete successfully. 23 | * Example is in the AudioFileManipulator class 24 | * 25 | * @author Nik Vaessen 26 | */ 27 | public class OperationFailedException extends Exception 28 | { 29 | public OperationFailedException(String s) 30 | { 31 | super(s); 32 | } 33 | } -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/exceptions/InvalidDirectoryException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.exceptions; 20 | 21 | /** 22 | * Used in the FileManager class when it's API is used to get a file from a 23 | * directory not managed by the class 24 | * 25 | * @author Nik Vaessen 26 | */ 27 | public class InvalidDirectoryException extends Exception 28 | { 29 | public InvalidDirectoryException(String s) 30 | { 31 | super(s); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/exceptions/NotInDirectoryException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.exceptions; 20 | 21 | /** 22 | * Exception thrown in the FileManager class when it's API is used to 23 | * check on a file not in the directory of the FileManager 24 | * 25 | * @author Nik Vaessen 26 | */ 27 | public class NotInDirectoryException extends Exception 28 | { 29 | public NotInDirectoryException() 30 | { 31 | super(); 32 | } 33 | 34 | public NotInDirectoryException(String s) 35 | { 36 | super(s); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/exceptions/ServerConfigurationException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.exceptions; 20 | 21 | /** 22 | * Thrown when something goes wrong in reading the sphinx4http.properties 23 | * file in the ServerConfiguration class 24 | * 25 | * @author Nik Vaessen 26 | */ 27 | public class ServerConfigurationException extends Exception 28 | { 29 | public ServerConfigurationException() 30 | { 31 | super(); 32 | } 33 | 34 | public ServerConfigurationException(String s) 35 | { 36 | super(s); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/java/TestFiles.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import java.io.File; 20 | 21 | /** 22 | * Files for testing with 23 | */ 24 | public class TestFiles 25 | { 26 | 27 | public static final File TEST_FILE = 28 | new File("src/test/resources/test.wav"); 29 | 30 | public static final File[] TEST_FILE_CHUNKS = 31 | { 32 | new File("src/test/resources/test_chunk1.wav"), 33 | new File("src/test/resources/test_chunk2.wav"), 34 | new File("src/test/resources/test_chunk3.wav"), 35 | new File("src/test/resources/test_chunk4.wav"), 36 | new File("src/test/resources/test_chunk5.wav"), 37 | }; 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/test/java/SessionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import org.jitsi.sphinx4http.util.SessionManager; 20 | import org.junit.Assert; 21 | 22 | import java.util.ArrayList; 23 | 24 | /** 25 | * Tests if sessions create unique ID's 26 | */ 27 | public class SessionTest 28 | { 29 | @org.junit.Test 30 | public void testSessionCreation() 31 | { 32 | SessionManager manager = new SessionManager(); 33 | ArrayList sessionIDs = new ArrayList<>(); 34 | for(int i = 0; i < 100; i++) 35 | { 36 | String newID = manager.createNewSession().getId(); 37 | Assert.assertFalse("Session ID was already created", 38 | sessionIDs.contains(newID)); 39 | sessionIDs.add(newID); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/TranscribingTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import edu.cmu.sphinx.result.WordResult; 20 | import org.jitsi.sphinx4http.server.AudioTranscriber; 21 | import org.junit.Assert; 22 | 23 | import java.io.FileInputStream; 24 | import java.util.ArrayList; 25 | 26 | /** 27 | * Tests whether the transcribing works 28 | */ 29 | public class TranscribingTest 30 | { 31 | @org.junit.Test 32 | public void testTranscription() throws Exception 33 | { 34 | FileInputStream stream = new FileInputStream(TestFiles.TEST_FILE); 35 | 36 | AudioTranscriber transcriber = new AudioTranscriber(); 37 | ArrayList result = transcriber.transcribe(stream); 38 | 39 | Assert.assertFalse("result should not be empty", result.isEmpty()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/PropertiesTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import org.jitsi.sphinx4http.server.ServerConfiguration; 20 | 21 | /** 22 | * 23 | */ 24 | public class PropertiesTest 25 | { 26 | 27 | private static final String path = 28 | "src/main/resources/sphinx4http.properties"; 29 | 30 | @org.junit.Test 31 | public void testProperties() throws Exception 32 | { 33 | ServerConfiguration configuration = ServerConfiguration.getInstance(); 34 | // Assert.assertNotNull(configuration.getPort()); 35 | // Assert.assertNotNull(configuration.getFfmpegPath()); 36 | // Assert.assertNotNull(configuration.getDataFolderPath()); 37 | System.out.println(configuration.getPort()); 38 | System.out.println(configuration.getFfmpegPath()); 39 | System.out.println(configuration.getDataFolderPath()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/ConversionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import org.jitsi.sphinx4http.server.AudioFileManipulator; 20 | import org.jitsi.sphinx4http.util.TimeStrings; 21 | import org.junit.Assert; 22 | 23 | import java.io.File; 24 | 25 | /** 26 | * Tests whether the converting of audio files work 27 | */ 28 | public class ConversionTest 29 | { 30 | @org.junit.Test 31 | public void testConversion() throws Exception 32 | { 33 | File file = TestFiles.TEST_FILE; 34 | 35 | String path = file.getParentFile().getAbsolutePath() + "/" + 36 | TimeStrings.getNowString() + ".wav"; 37 | File convertedFile = AudioFileManipulator.convertToWAV(file, path); 38 | 39 | Assert.assertTrue("Converted file did not exist", 40 | convertedFile.exists()); 41 | //delete the temp converted file 42 | convertedFile.delete(); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/util/TimeStrings.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.util; 20 | 21 | import java.time.Instant; 22 | import java.time.ZoneId; 23 | import java.time.format.DateTimeFormatter; 24 | 25 | /** 26 | * This class holds methods which can translate a time/date into string format 27 | * 28 | * @author Nik Vaessen 29 | */ 30 | public class TimeStrings 31 | { 32 | /** 33 | * Formats a date into a string with a pattern 34 | * "day-month___hour-minutes-second-millisecond" 35 | */ 36 | private static DateTimeFormatter formatter = 37 | DateTimeFormatter.ofPattern("dd-MM___HH-mm-ss-SSSS") 38 | .withZone(ZoneId.systemDefault()); 39 | 40 | /** 41 | * Get the current time as a string in the format dd-MM___HH-mm-ss-SSSS 42 | * @return the string representing the "current" time at the moment 43 | * of calling the method 44 | */ 45 | public static String getNowString() 46 | { 47 | return formatter.format(Instant.now()); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/server/SphinxConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.server; 20 | 21 | 22 | /** 23 | * Stores the Paths to acoustic models, dictionaries and language models 24 | * in the Sphinx4 library, which are needed to accomplish speech recognition. 25 | * They are used for creating the Configurations for Sphinx4's 26 | * SpeechRecognition objects. 27 | * 28 | * @author Nik Vaessen 29 | */ 30 | public class SphinxConstants 31 | { 32 | /** 33 | * Acoustic model for american english 34 | */ 35 | public final static String ACOUSTIC_MODEL_EN_US = 36 | "resource:/edu/cmu/sphinx/models/en-us/en-us"; 37 | 38 | /** 39 | * Dictionary of american english words 40 | */ 41 | public final static String DICTIONARY_EN_US = 42 | "resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict"; 43 | 44 | /** 45 | * Language model for american english 46 | */ 47 | public final static String LANGUAGE_MODEL_EN_US = 48 | "resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin"; 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/MergeTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import org.jitsi.sphinx4http.server.AudioFileManipulator; 20 | import org.jitsi.sphinx4http.util.FileManager; 21 | import org.junit.Assert; 22 | 23 | import java.io.File; 24 | import java.io.FileInputStream; 25 | import java.io.FileOutputStream; 26 | 27 | /** 28 | * test the merging method of the program ffmpeg 29 | */ 30 | public class MergeTest 31 | { 32 | @org.junit.Test 33 | public void testMerging() throws Exception 34 | { 35 | File[] testFiles = TestFiles.TEST_FILE_CHUNKS; 36 | File[] filesToMerge = new File[testFiles.length]; 37 | for (int i = 0; i < testFiles.length; i++) 38 | { 39 | File file = testFiles[i]; 40 | File newFile = new File( 41 | FileManager.getInstance() 42 | .getNewFile(FileManager.CONVERTED_DIR, ".wav") 43 | .toString()); 44 | 45 | FileInputStream in = new FileInputStream(file); 46 | FileOutputStream out = new FileOutputStream(newFile); 47 | while (in.available() > 0) 48 | { 49 | out.write(in.read()); 50 | } 51 | filesToMerge[i] = newFile; 52 | } 53 | 54 | File merged = AudioFileManipulator.mergeWAVFiles(filesToMerge); 55 | Assert.assertTrue("Merged file did not exist", merged.exists()); 56 | 57 | //delete test files 58 | merged.delete(); 59 | for (File file : filesToMerge) 60 | { 61 | file.delete(); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/util/SessionManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.util; 20 | 21 | import org.jitsi.sphinx4http.server.Session; 22 | 23 | import java.util.Collections; 24 | import java.util.HashMap; 25 | import java.util.Map; 26 | import java.util.UUID; 27 | 28 | /** 29 | * Manages a HashMap containing all Sessions handled 30 | * by the server 31 | * 32 | * @author Nik Vaessen 33 | */ 34 | public class SessionManager 35 | { 36 | /** 37 | * HashMap mapping the ID of the session the the session 38 | */ 39 | private Map sessions; 40 | 41 | /** 42 | * Constructor for a SessionManager 43 | */ 44 | public SessionManager() 45 | { 46 | sessions = Collections.synchronizedMap(new HashMap()); 47 | } 48 | 49 | /** 50 | * Creates a new Session with a unique ID 51 | * @return a new uniquely identified session 52 | */ 53 | public Session createNewSession() 54 | { 55 | Session session = new Session(UUID.randomUUID().toString()); 56 | sessions.put(session.getId(), session); 57 | return session; 58 | } 59 | 60 | /** 61 | * Gets a session corresponding to the given ID. If the session doesn't 62 | * exist, null will be returned 63 | * @param key the ID for the session 64 | * @return the session associated with the ID or null if the session doesn't 65 | * exist 66 | */ 67 | public Session getSession(String key) 68 | { 69 | return sessions.get(key); 70 | } 71 | 72 | /** 73 | * Checks whether the given key maps to a session 74 | * @param key the ID of a possible session 75 | * @return whether the ID has an associated Session 76 | */ 77 | public boolean hasID(String key) 78 | { 79 | return sessions.containsKey(key); 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/server/HttpServer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.server; 20 | 21 | import org.eclipse.jetty.server.Server; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | /** 26 | * Main method starting the HTTP server 27 | * Accepts HTTP POST requests on a specifiec port 28 | * 29 | * @author Nik Vaessen 30 | */ 31 | public class HttpServer 32 | { 33 | /** 34 | * The logger of this class 35 | */ 36 | private static final Logger logger = 37 | LoggerFactory.getLogger(HttpServer.class); 38 | 39 | /** 40 | * The server configuration 41 | */ 42 | private static final ServerConfiguration config = ServerConfiguration. 43 | getInstance(); 44 | 45 | /** 46 | * Main method starting the server 47 | * @param args command line argument specifying the port 48 | */ 49 | public static void main(String[] args) 50 | { 51 | int port; 52 | if (args.length >= 1) 53 | { 54 | try 55 | { 56 | port = Integer.parseInt(args[0]); 57 | } 58 | catch (NumberFormatException e) 59 | { 60 | logger.info("{} is not a valid port. Exiting.", args[0]); 61 | System.exit(1); 62 | return; 63 | } 64 | } 65 | else 66 | { 67 | port = config.getPort(); 68 | } 69 | 70 | //log server information 71 | 72 | logger.info("starting server with the following configuration:\n" + 73 | "port: {}\n" + 74 | "ffmpeg path: {}\n" + 75 | "data folder path: {}\n" + 76 | "chunked responses: {}", 77 | port, config.getFfmpegPath(), config.getDataFolderPath(), 78 | config.isChunkedResponse()); 79 | 80 | try 81 | { 82 | Server server = new Server(port); 83 | server.setHandler(new RequestHandler()); 84 | 85 | server.start(); 86 | server.join(); 87 | } 88 | catch (Exception e) 89 | { 90 | logger.info("Something went wrong while starting the " + 91 | "server. Is the port {} already in use?", 92 | port, e); 93 | System.exit(2); 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.jitsi 8 | sphinx4HTTPserver 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 9.3.10.v20160621 13 | 14 | 15 | 16 | 17 | snapshots-repo 18 | https://oss.sonatype.org/content/repositories/snapshots 19 | false 20 | true 21 | 22 | 23 | 24 | 25 | 26 | org.slf4j 27 | slf4j-api 28 | 1.7.21 29 | 30 | 31 | org.slf4j 32 | slf4j-log4j12 33 | 1.7.21 34 | 35 | 36 | org.eclipse.jetty 37 | jetty-server 38 | ${jettyVersion} 39 | 40 | 41 | org.eclipse.jetty 42 | jetty-client 43 | ${jettyVersion} 44 | 45 | 46 | edu.cmu.sphinx 47 | sphinx4-core 48 | 5prealpha-SNAPSHOT 49 | 50 | 51 | edu.cmu.sphinx 52 | sphinx4-data 53 | 5prealpha-SNAPSHOT 54 | 55 | 56 | junit 57 | junit 58 | 4.12 59 | test 60 | 61 | 62 | com.googlecode.json-simple 63 | json-simple 64 | 1.1.1 65 | 66 | 67 | 68 | 69 | 70 | 71 | org.apache.maven.plugins 72 | maven-compiler-plugin 73 | 3.5.1 74 | 75 | 1.7 76 | 1.7 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/test/java/PerformanceTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import org.jitsi.sphinx4http.server.AudioTranscriber; 20 | import org.junit.Assert; 21 | import org.junit.Test; 22 | 23 | import java.io.FileInputStream; 24 | import java.util.ArrayList; 25 | import java.util.Collection; 26 | import java.util.concurrent.Callable; 27 | import java.util.concurrent.ExecutorService; 28 | import java.util.concurrent.Executors; 29 | import java.util.concurrent.TimeUnit; 30 | 31 | /** 32 | * Test the speed performance of transcribing a file 33 | */ 34 | public class PerformanceTest 35 | { 36 | /** 37 | * The amount of files which will be transcribed at the same time in the 38 | * MultipleFilesSpeed test 39 | */ 40 | private final static int AMOUNT_OF_MULTIPLE_FILES = 6; 41 | 42 | /** 43 | * The time which is deemed as the maximum allowed for transcribing an 44 | * audio file of 25 seconds 45 | */ 46 | private final static int DESIRED_TRANSCRIPTION_TIME = 60000; //in ms 47 | 48 | @Test 49 | public void testSingleFileSpeed() 50 | throws Exception 51 | { 52 | long time = testFileTranscription(); 53 | Assert.assertTrue(time < DESIRED_TRANSCRIPTION_TIME); 54 | } 55 | 56 | @Test 57 | public void testMultipleFilesSpeed() 58 | throws Exception 59 | { 60 | ExecutorService service = Executors.newFixedThreadPool( 61 | AMOUNT_OF_MULTIPLE_FILES); 62 | Collection> callables = new ArrayList<>(); 63 | for(int i = 0; i < AMOUNT_OF_MULTIPLE_FILES; i++) 64 | { 65 | callables.add(new Callable() 66 | { 67 | @Override 68 | public Long call() 69 | throws Exception 70 | { 71 | return testFileTranscription(); 72 | } 73 | }); 74 | } 75 | 76 | long start = System.currentTimeMillis(); 77 | service.invokeAll(callables); 78 | service.shutdown(); 79 | service.awaitTermination(DESIRED_TRANSCRIPTION_TIME, 80 | TimeUnit.MILLISECONDS); 81 | service.shutdownNow(); 82 | long end = System.currentTimeMillis(); 83 | 84 | Assert.assertTrue((end - start) < DESIRED_TRANSCRIPTION_TIME); 85 | } 86 | 87 | /** 88 | * Test the transcription of "test.wav", which is an audio file of 25 89 | * seconds 90 | * 91 | * @return the time it took the transcribe the file 92 | */ 93 | private static long testFileTranscription() 94 | throws Exception 95 | { 96 | long start, end; 97 | AudioTranscriber transcriber = new AudioTranscriber(); 98 | 99 | start = System.currentTimeMillis(); 100 | transcriber.transcribe(new FileInputStream(TestFiles.TEST_FILE)); 101 | end = System.currentTimeMillis(); 102 | 103 | return end - start; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/util/StreamEater.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.util; 20 | 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | import java.io.BufferedReader; 25 | import java.io.IOException; 26 | import java.io.InputStream; 27 | import java.io.InputStreamReader; 28 | 29 | /** 30 | * This class is used to consume from streams so that the Process API does not 31 | * block when the buffer from the Error- and OutputStream gets full. 32 | * 33 | * @author Nik Vaessen 34 | */ 35 | public class StreamEater 36 | { 37 | /** 38 | * Logger of this class 39 | */ 40 | private static final Logger logger = 41 | LoggerFactory.getLogger(StreamEater.class); 42 | 43 | /** 44 | * The stream being read from 45 | */ 46 | private InputStream stream; 47 | /** 48 | * Arbitrary name of the stream used to clarify in the console 49 | * from which stream the output is 50 | */ 51 | private String name; 52 | 53 | /** 54 | * Whether to log the content of the stream. Defaults to false 55 | */ 56 | private boolean print; 57 | 58 | /** 59 | * Creates a new thread consuming from the specified stream. Will not 60 | * print the content of the streams 61 | * @param stream the stream being consumed 62 | * @param streamName arbitrary name for the stream 63 | */ 64 | public StreamEater(InputStream stream, String streamName) 65 | { 66 | this(stream, streamName, false); 67 | } 68 | 69 | /** 70 | * Creates a new thread consuming for the specified stream 71 | * @param stream the stream being consumed 72 | * @param streamName arbitrary name for the stream 73 | * @param print whether the content of the stream will be logged 74 | */ 75 | public StreamEater(InputStream stream, String streamName, boolean print) 76 | { 77 | this.stream = stream; 78 | this.name = streamName; 79 | this.print = print; 80 | run(); 81 | } 82 | 83 | /** 84 | * Consumes from the given stream. Will not print the content 85 | * as long as print will be false. 86 | */ 87 | private void run() 88 | { 89 | new Thread(new Runnable() 90 | { 91 | @Override 92 | public void run() 93 | { 94 | try(InputStreamReader streamReader = 95 | new InputStreamReader(stream); 96 | BufferedReader br = new BufferedReader(streamReader)) 97 | { 98 | String line = null; 99 | while ((line = br.readLine()) != null) 100 | { 101 | if (print) 102 | { 103 | logger.debug(name + ":" + line); 104 | } 105 | } 106 | br.close(); 107 | streamReader.close(); 108 | stream.close(); 109 | } 110 | catch (IOException e) 111 | { 112 | e.printStackTrace(); 113 | } 114 | } 115 | }).start(); 116 | } 117 | } 118 | 119 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/server/JSONBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.server; 20 | 21 | import edu.cmu.sphinx.result.WordResult; 22 | import org.json.simple.JSONArray; 23 | import org.json.simple.JSONObject; 24 | 25 | import java.util.ArrayList; 26 | 27 | /** 28 | * Constructs the output of the speech-to-text in JSON format 29 | * 30 | * @author Nik Vaessen 31 | */ 32 | public class JSONBuilder 33 | { 34 | /** 35 | * Identifier for the word 36 | */ 37 | private static final String JSON_WORD = "word"; 38 | 39 | /** 40 | * Identifier for the time when the word started to get spoken 41 | */ 42 | private static final String JSON_TIMESTAMP_START = "start"; 43 | 44 | /** 45 | * Identifier for the time when the word stopped being spoken 46 | */ 47 | private static final String JSON_TIMESTAMP_END = "end"; 48 | 49 | /** 50 | * identifier that the word is a fill word, e.g a sigh 51 | */ 52 | private static final String JSON_FILL_WORD = "filler"; 53 | 54 | /** 55 | * Builds the array of words from the Sphinx4 into a JSON array 56 | * @param results the ArrayList of WordResults from a Speech-to-text 57 | * transcription 58 | * @return a JSONArray holding a JSONObject for each word, with additional 59 | * information 60 | * 61 | */ 62 | @SuppressWarnings("unchecked") //for JSONArray.add() 63 | public JSONArray buildSpeechToTextResult(ArrayList results) 64 | { 65 | JSONArray toReturn = new JSONArray(); 66 | for(WordResult result : results) 67 | { 68 | toReturn.add(this.buildWordObject( 69 | result.getWord().toString(), 70 | result.getTimeFrame().getStart(), 71 | result.getTimeFrame().getEnd(), 72 | result.getWord().isFiller() 73 | )); 74 | } 75 | return toReturn; 76 | } 77 | 78 | /** 79 | * Create a JSONObject with a word, start, end and filler value based 80 | * on a WordResult 81 | * @param result the WordResult whose values will be held in the JSONObject 82 | * @return a JSONObject holding the word, start, end and filler 83 | * values of the given WordResult 84 | */ 85 | public JSONObject buildWordObject(WordResult result) 86 | { 87 | return buildWordObject( 88 | result.getWord().toString(), 89 | result.getTimeFrame().getStart(), 90 | result.getTimeFrame().getEnd(), 91 | result.getWord().isFiller()); 92 | } 93 | 94 | /** 95 | * Create a JSONObject with a word, start, end and filler value based 96 | * on a WordResult 97 | * @param word the word value of the JSONObject 98 | * @param start the start value of the JSONObject 99 | * @param end the end value of the JSONObject 100 | * @param filler the filler value of the JSONObject 101 | * @return a JSONObject holding the word, start, end and filler 102 | * values of the given WordResult 103 | */ 104 | @SuppressWarnings("unchecked") //for JSONObject.put() 105 | public JSONObject buildWordObject(String word, long start, 106 | long end, boolean filler) 107 | { 108 | JSONObject jsonWord = new JSONObject(); 109 | jsonWord.put(JSON_WORD, word); 110 | jsonWord.put(JSON_TIMESTAMP_START, start); 111 | jsonWord.put(JSON_TIMESTAMP_END, end); 112 | jsonWord.put(JSON_FILL_WORD, filler); 113 | return jsonWord; 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sphinx4-HTTP-server 2 | 3 | A simple speech-to-text HTTP server. It uses Jetty as the server component and Sphinx4 as the speech-to-text library. 4 | 5 | ## Requirements 6 | This project is build using Maven. Maven can be installed using: 7 | ``` 8 | $ apt-get install maven 9 | ``` 10 | It also requires the program FFMPEG with version 1.1 or higher. It expects FFMPEG to be located in /usr/bin, but this can be configured. 11 | FFMPEG can be downloaded at: https://ffmpeg.org/download.html 12 | 13 | It is also possible to get the latest version by: 14 | ``` 15 | $ git clone https://git.ffmpeg.org/ffmpeg.git ffmpeg 16 | ``` 17 | ## Running the server 18 | 19 | You can run the server by executing the following commands in the root folder: 20 | ``` 21 | $ mvn compile 22 | $ mvn exec:java -Dexec.mainClass="server.HttpServer" 23 | ``` 24 | 25 | Alternatively, run the scripts in the scripts folder: 26 | ``` 27 | $ ./scripts/compile.sh 28 | $ ./scripts/run.sh 29 | ``` 30 | ## Configuration 31 | 32 | It is possible to configure the server in src/main/resources/sphinx4http.properties. 33 | 34 | The config file has the following values: 35 | ``` 36 | # The port the server should use. 37 | port=8081 38 | 39 | # The absolute path to a ffmpeg executable 40 | ffmpeg_path=/usr/bin/ffmpeg 41 | 42 | # The absolute path to the directory to write files to 43 | #data_folder_path=/edit/to/an/absolute/path/ 44 | 45 | # To make responses be chunked 46 | chunked_response=true 47 | ``` 48 | The ```port``` value will be overwritten when it's specified in the running script 49 | 50 | Leave ```data_folder_path``` commented out unless the default behaviour is not desired. 51 | 52 | If ```chunked_response``` is set to true, the server will respond to requests using a chunked HTTP response format, preventing time outs 53 | 54 | ## Usage 55 | 56 | The server will listen for POST requests on the specified port. The post request should include the audio file which has to be transcribed. When sending a POST request, the url of the server should tail "/recognize". The server accepts requests with a content type "audio/xxx". 57 | 58 | An example of a request would look like the following: 59 | ``` 60 | $ curl -X POST --data-binary @filename.webm -H "Content-Type: audio/webm" http://localhost:8081/recognize 61 | ``` 62 | After retrieving a request, the server will convert the given audio file to the right format for Sphinx4. It will then do speech recognition. The result will be send back in JSON format. The structure of the JSON is dependent on the configuration. 63 | 64 | A non-chunked reply will look like the following: 65 | ``` 66 | { 67 | 68 | "session-id":"TpOeSN0gVM00OFHnSCHol9ESpaWNN5aF", 69 | "result":[ 70 | { 71 | "word":"hello", 72 | "start":0, 73 | "end":390, 74 | "filler":false 75 | }, 76 | { 77 | "word":"", 78 | "start":400, 79 | "end":420, 80 | "filler":true 81 | }, 82 | { 83 | "word":"world", 84 | "start":430, 85 | "end":830, 86 | "filler":false 87 | } 88 | ] 89 | 90 | } 91 | ``` 92 | 93 | If the server is set to use a chunked reply it will look like this: 94 | ``` 95 | { 96 | "objects":[ 97 | { 98 | "session-id":"TpOeSN0gVM00OFHnSCHol9ESpaWNN5aF", 99 | }, 100 | { 101 | "word":"hello", 102 | "start":0, 103 | "end":390, 104 | "filler":false 105 | }, 106 | { 107 | "word":"", 108 | "start":400, 109 | "end":420, 110 | "filler":true 111 | }, 112 | { 113 | "word":"world", 114 | "start":430, 115 | "end":830, 116 | "filler":false 117 | } 118 | ] 119 | } 120 | ``` 121 | The JSON array will hold objects with every word uttered in the given audio file. It will also include timestamp of when the word was uttered relative to the start of the audio file. If the word is filler, e.g a sigh, the filler value will be true. The session-id can be used to give multiple audio files to the server, telling it that they belongg together. This can be done by including ```?session-id=``` in the URL. There is currently no difference in response, however. 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/server/AudioTranscriber.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.server; 20 | 21 | import edu.cmu.sphinx.api.Configuration; 22 | import edu.cmu.sphinx.api.SpeechResult; 23 | import edu.cmu.sphinx.api.StreamSpeechRecognizer; 24 | import edu.cmu.sphinx.result.WordResult; 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | import java.io.IOException; 29 | import java.io.InputStream; 30 | import java.util.ArrayList; 31 | import java.util.concurrent.SynchronousQueue; 32 | 33 | /** 34 | * Uses the Sphinx4 speech-to-text library to get hypothesises of the 35 | * spoken text in a given audio fragment 36 | * Currently only predicts U.S english text 37 | * The Sphinx4 API requires audio fragments to be in .wav format, have 38 | * mono sound and have 16000 KHz audio rate 39 | * 40 | * @author Nik Vaessen 41 | */ 42 | public class AudioTranscriber 43 | { 44 | /** 45 | * The logger for this class 46 | */ 47 | private static final Logger logger = LoggerFactory. 48 | getLogger(AudioTranscriber.class); 49 | 50 | /** 51 | * The configuration used for creating the speech recognizer 52 | */ 53 | private Configuration config; 54 | 55 | /** 56 | * Constructs an AudioTranscriber object 57 | * */ 58 | public AudioTranscriber() 59 | { 60 | this.config = new Configuration(); 61 | config.setAcousticModelPath(SphinxConstants.ACOUSTIC_MODEL_EN_US); 62 | config.setDictionaryPath(SphinxConstants.DICTIONARY_EN_US); 63 | config.setLanguageModelPath(SphinxConstants.LANGUAGE_MODEL_EN_US); 64 | } 65 | 66 | /** 67 | * Tries to predict the speech in a given audio fragment 68 | * @param audioStream the audio stream on which speech prediction is desired 69 | * @return the hypotheses of the speech in the given audio fragment 70 | * as a list of words 71 | * @throws IOException when the config does not point to correct 72 | * files needed for the transcription 73 | */ 74 | public ArrayList transcribe(InputStream audioStream) 75 | throws IOException 76 | { 77 | StreamSpeechRecognizer recognizer = new StreamSpeechRecognizer(config); 78 | recognizer.startRecognition(audioStream); 79 | 80 | ArrayList utteredWords = new ArrayList<>(); 81 | SpeechResult result; 82 | while ((result = recognizer.getResult()) != null) 83 | { 84 | utteredWords.addAll(result.getWords()); 85 | } 86 | recognizer.stopRecognition(); 87 | 88 | return utteredWords; 89 | } 90 | 91 | /** 92 | * Tries the predict the speech in a given audio fragment. It 93 | * will offer the result of every predicted word to a SynchronousQueue, 94 | * to be processed immediately 95 | * @param audioStream the audio fragment to transcribe 96 | * @param queue the queue to offer every WordResult to 97 | * @throws IOException when the config does not point to correct 98 | * files needed for the transcription 99 | */ 100 | public void transcribeSynchronous(InputStream audioStream, 101 | SynchronousQueue queue) 102 | throws IOException 103 | { 104 | StreamSpeechRecognizer recognizer = new StreamSpeechRecognizer(config); 105 | recognizer.startRecognition(audioStream); 106 | 107 | logger.trace("Started chunked transcription"); 108 | SpeechResult result; 109 | while( (result = recognizer.getResult()) != null) 110 | { 111 | logger.trace("got a word result of length {}", 112 | result.getWords().size()); 113 | 114 | for(WordResult word : result.getWords()) 115 | { 116 | logger.trace("offering {}", word.toString()); 117 | try 118 | { 119 | queue.put(word); 120 | } 121 | catch (InterruptedException e) 122 | { 123 | e.printStackTrace(); 124 | } 125 | } 126 | } 127 | recognizer.stopRecognition(); 128 | } 129 | 130 | } 131 | -------------------------------------------------------------------------------- /src/test/java/AccuracyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import edu.cmu.sphinx.result.WordResult; 20 | import org.jitsi.sphinx4http.server.AudioTranscriber; 21 | import org.junit.Assert; 22 | import org.junit.Test; 23 | 24 | import java.io.FileInputStream; 25 | import java.util.ArrayList; 26 | import java.util.Collections; 27 | import java.util.HashMap; 28 | 29 | /** 30 | * Test the accuracy of transcribing a file 31 | */ 32 | public class AccuracyTest 33 | { 34 | 35 | /** 36 | * The expected accuracy of the test audio file 37 | */ 38 | private final static double EXPECTED_ACCURACY = 60; // % 39 | 40 | /** 41 | * The TRANSCRIPTION of the test audio file 42 | */ 43 | private final static String TRANSCRIPTION = 44 | "A hungry Fox saw some fine bunches of Grapes hanging from a vine " + 45 | "that was trained along a high trellis and did his best to reach them" + 46 | "by jumping as high as he could into the air But it was all in vain " + 47 | "for" + 48 | " they were just out of reach so he gave up trying and walked away" + 49 | " with an air of dignity and unconcern remarking I thought those" + 50 | " Grapes were ripe but I see now they are quite sour"; 51 | 52 | @Test 53 | public void testAccuracy() 54 | throws Exception 55 | { 56 | AudioTranscriber transcriber = new AudioTranscriber(); 57 | ArrayList results = transcriber.transcribe( 58 | new FileInputStream(TestFiles.TEST_FILE)); 59 | if(!results.isEmpty()) 60 | { 61 | ArrayList observedWords = new ArrayList<>(); 62 | for(WordResult result : results) 63 | { 64 | if(!result.isFiller()) 65 | { 66 | observedWords.add(result.getWord().toString()); 67 | } 68 | } 69 | 70 | double accuracy = computeAccuracy( 71 | getCorrectTranscription(), 72 | createWordCountHashMap(observedWords)); 73 | 74 | Assert.assertTrue(accuracy >= EXPECTED_ACCURACY); 75 | } 76 | else 77 | { 78 | Assert.fail(); 79 | } 80 | } 81 | 82 | /** 83 | * Compute the accuracy of two "histograms" of words, by comparing if the 84 | * observed histogram has the same frequency of words as the expected 85 | * histogram 86 | * 87 | * @param expected the correct histogram of a transcription 88 | * @param observed the histogram of a transcription to test for accuracy 89 | * @return the accuracy in % of the observed histogram 90 | */ 91 | private double computeAccuracy(HashMap expected, 92 | HashMap observed) 93 | { 94 | double total = 0; 95 | double correct = 0; 96 | for(String word : expected.keySet()) 97 | { 98 | total += expected.get(word); 99 | if(observed.containsKey(word)) 100 | { 101 | correct += observed.get(word); 102 | } 103 | } 104 | 105 | return (correct / total) * 100; 106 | } 107 | 108 | /** 109 | * Create a HashMap which stores the frequency for each word in an ArrayList 110 | * of words 111 | * 112 | * @param words the arraylist of words to convert to a frequency HashMap 113 | * @return the frequency HashMap 114 | */ 115 | private HashMap createWordCountHashMap( 116 | ArrayList words) 117 | { 118 | HashMap histogram = new HashMap<>(); 119 | for(String s : words) 120 | { 121 | if(histogram.containsKey(s)) 122 | { 123 | histogram.put(s, histogram.get(s) + 1); 124 | } 125 | else 126 | { 127 | histogram.put(s, 1); 128 | } 129 | } 130 | return histogram; 131 | } 132 | 133 | /** 134 | * Get the frequency HashMap of the correct transcription of the test audio 135 | * file 136 | * 137 | * @return the frequency HashMap 138 | */ 139 | private HashMap getCorrectTranscription() 140 | { 141 | String[] transcribedWords = TRANSCRIPTION.split(" "); 142 | ArrayList strings = new ArrayList<>(); 143 | Collections.addAll(strings, transcribedWords); 144 | 145 | return createWordCountHashMap(strings); 146 | } 147 | 148 | } 149 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/server/Session.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.server; 20 | 21 | 22 | import edu.cmu.sphinx.result.WordResult; 23 | import org.json.simple.JSONArray; 24 | import org.json.simple.JSONObject; 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | import java.io.*; 29 | import java.util.ArrayList; 30 | import java.util.concurrent.SynchronousQueue; 31 | 32 | /** 33 | * Class representing a session for repeated audio files from the same voice. 34 | * 35 | * note: 36 | * Used to merge and store previous files to increase accuracy of transcriptions 37 | * but was deemed impractical because of bad run time performance. It is kept 38 | * for wrapping the AudioTranscriber class 39 | * 40 | * @author Nik Vaessen 41 | */ 42 | public class Session 43 | { 44 | /** 45 | * The logger of this class 46 | */ 47 | private static final Logger logger = LoggerFactory.getLogger(Session.class); 48 | 49 | /** 50 | * Class that formats the output of the speech-to-text to a JSONArray 51 | */ 52 | private static JSONBuilder builder = new JSONBuilder(); 53 | /** 54 | * class doing the speech-to-text on the given auduo file 55 | */ 56 | private AudioTranscriber transcriber; 57 | 58 | /** 59 | * ID of the session 60 | */ 61 | private String id; 62 | 63 | /** 64 | * Creates a session object with a unique ID 65 | * @param id the id of the new session 66 | */ 67 | public Session(String id) 68 | { 69 | this.transcriber = new AudioTranscriber(); 70 | this.id = id; 71 | } 72 | 73 | /** 74 | * transcribes the given audio file and returns it in a JSON format 75 | * where each word is a JSON object with values 'word, start, end, 76 | * filler'. 77 | * 78 | * @param audioFile the audio file to transcribe 79 | * @return JSON array with every uttered word in the audio file 80 | * @throws IOException when the audio file cannot be read 81 | */ 82 | public JSONArray transcribe(File audioFile) 83 | throws IOException 84 | { 85 | logger.trace("transcribing audio file with id: {}", id); 86 | try(InputStream in = new FileInputStream(audioFile)) 87 | { 88 | ArrayList results = transcriber.transcribe(in); 89 | return builder.buildSpeechToTextResult(results); 90 | } 91 | } 92 | 93 | 94 | /** 95 | * transcribe the given audio file and send each retrieved word back 96 | * immediately. The word will be a JSON object with values, word, start, 97 | * end, filler'. 98 | * @param audioFile the audio file to transcribe 99 | * @param out the outputstream to write each word results to immediately 100 | * @return JSON array with every uttered word in the audio file 101 | */ 102 | public JSONArray chunkedTranscribe(File audioFile, PrintWriter out) 103 | throws IOException 104 | { 105 | logger.trace("started chunked transcribing of " + 106 | "audio file with id : {}", id); 107 | 108 | try(InputStream in = new FileInputStream(audioFile)) 109 | { 110 | // create a thread to immediately get the word result out 111 | // of the synchronousQueue 112 | final SynchronousQueue results 113 | = new SynchronousQueue<>(); 114 | final ArrayList storedResults = new ArrayList<>(); 115 | //make sure the printwriter does not close because it's needed 116 | //else where to finish the object 117 | final PrintWriter printWriter = new PrintWriter(out); 118 | Thread queueManager = new Thread(new Runnable() 119 | { 120 | @Override 121 | public void run() 122 | { 123 | //listen for the first word outside of the loop 124 | //to prevent a trailing "," at the end of the transcription 125 | //json array 126 | try 127 | { 128 | WordResult word = results.take(); 129 | logger.trace("retrieved word outside loop\"{}\"", 130 | word.toString()); 131 | storedResults.add(word); 132 | JSONObject toSend = builder.buildWordObject(word); 133 | printWriter.write("," + toSend.toJSONString()); 134 | printWriter.flush(); 135 | } 136 | catch (InterruptedException e) 137 | { 138 | Thread.currentThread().interrupt(); 139 | } 140 | 141 | while(!Thread.currentThread().isInterrupted()) 142 | { 143 | try 144 | { 145 | //blocks until result is retrieved 146 | WordResult word = results.take(); 147 | logger.trace("retrieved word \"{}\"", 148 | word.toString()); 149 | storedResults.add(word); 150 | JSONObject toSend = builder.buildWordObject(word); 151 | printWriter.write("," + toSend.toJSONString()); 152 | printWriter.flush(); 153 | } 154 | catch (InterruptedException e) 155 | { 156 | //make sure the thread ends 157 | Thread.currentThread().interrupt(); 158 | } 159 | } 160 | } 161 | }); 162 | queueManager.start(); 163 | transcriber.transcribeSynchronous(in, results); 164 | //stop the thread as the transcribing is done 165 | queueManager.interrupt(); 166 | 167 | return builder.buildSpeechToTextResult(storedResults); 168 | } 169 | } 170 | 171 | /** 172 | * Get the ID of the session 173 | * @return the ID of the session 174 | */ 175 | public String getId() 176 | { 177 | return this.id; 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/server/AudioFileManipulator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.server; 20 | 21 | import org.jitsi.sphinx4http.exceptions.InvalidDirectoryException; 22 | import org.jitsi.sphinx4http.exceptions.NotInDirectoryException; 23 | import org.jitsi.sphinx4http.exceptions.OperationFailedException; 24 | import org.jitsi.sphinx4http.util.FileManager; 25 | import org.jitsi.sphinx4http.util.StreamEater; 26 | 27 | import java.io.*; 28 | 29 | /** 30 | * Class holds methods for the tasks below related to audio files. The tasks 31 | * are completed by the external program "ffmpeg", which needs to be installed 32 | * on the device. more info on ffmpeg: 33 | * https://ffmpeg.org/download.html#get-sources 34 | * The ffpmeg version needs to be version >= 1.1 35 | * 36 | * Tasks: 37 | * - Convert a given audio file to the correct format required by Sphinx4 38 | * - Merge multiple audio files 39 | * 40 | * @author Nik Vaessen 41 | */ 42 | public class AudioFileManipulator 43 | { 44 | /** 45 | * The path to the ffmpeg executable, version >= 1.1 46 | * Defaults to "/usr/bin/ffmpeg" when not in config file 47 | */ 48 | public final static String PROGRAM = ServerConfiguration. 49 | getInstance().getFfmpegPath(); 50 | 51 | /** 52 | * Command to to convert an audio file to 53 | * .wav format with mono sound and a KHz of 16000 54 | * "-i %s" - the file to convert, where %s needs to be formatted in 55 | * "-acodec pcm_s16le" - convert to raw pcm for .wav file 56 | * "-ac 1" - convert to one audio chanel (mono sound) 57 | * "-ar 16000" - convert to 16000 KHz 58 | * "%s" the file to convert to, where %s needs to be formatted in 59 | */ 60 | private final static String CONVERT_COMMAND = 61 | PROGRAM + " -i %s -acodec pcm_s16le -ac 1 -ar 16000 %s"; 62 | 63 | /** 64 | * Command to merge 2 or more .wav file. 65 | * Requires a .txt file with the paths of all files with the structure: 66 | * #mylist.txt 67 | * file 'path/to/file'\n 68 | * file 'second/file'\n 69 | * The first %s contains the path to the .txt file 70 | * The second %s contains the path to the output file 71 | */ 72 | private final static String MERGE_COMMAND = 73 | PROGRAM + " -f concat -i %s -c copy %s"; 74 | 75 | /** 76 | * whether the output of the commands get logged 77 | */ 78 | public static boolean OUTPUT = false; 79 | 80 | /** 81 | * The file manager 82 | */ 83 | private static FileManager fileManager = FileManager.getInstance(); 84 | 85 | /** 86 | * Converts the given file to .wav format with a sampling rate of 87 | * 16 kHz and mono audio 88 | * @param toConvert the file to convert 89 | * @param newFilePath the path where the converted file should be 90 | * @throws OperationFailedException when converting fails 91 | */ 92 | public static File convertToWAV(File toConvert, String newFilePath) 93 | throws OperationFailedException 94 | { 95 | //build the correct command string and return the converted file 96 | String formattedCommand = String.format(CONVERT_COMMAND, 97 | toConvert.getAbsolutePath(), newFilePath); 98 | try 99 | { 100 | return runCommand(formattedCommand, newFilePath); 101 | } 102 | catch (IOException | InterruptedException e) 103 | { 104 | throw new OperationFailedException(e.getMessage()); 105 | } 106 | } 107 | 108 | /** 109 | * Merges the given files. The program relies on an .txt file containing 110 | * the files names to merge. All files need to be located in the 111 | * same folder. The new merged file will be placed in the this folder. 112 | * @param toMerge all the files to merge 113 | * @return one audio file containing the merged input files 114 | * @throws OperationFailedException when merging fails 115 | */ 116 | public static File mergeWAVFiles(File... toMerge) 117 | throws OperationFailedException 118 | { 119 | //check for invalid inputs 120 | if(toMerge.length == 0) 121 | { 122 | throw new OperationFailedException("cannot merge zero files"); 123 | } 124 | 125 | //make sure all files to merge are in the same directory 126 | String directory; 127 | try 128 | { 129 | directory = fileManager.getDirectory(toMerge[0]); 130 | for (int i = 1; i < toMerge.length; i++) 131 | { 132 | if (!fileManager.getDirectory(toMerge[i]).equals(directory)) 133 | { 134 | throw new NotInDirectoryException(); 135 | } 136 | } 137 | } 138 | catch (NotInDirectoryException e) 139 | { 140 | throw new OperationFailedException("all files to merge need" + 141 | " to be in the same folder"); 142 | } 143 | 144 | //create the .txt file 145 | File text; 146 | try 147 | { 148 | text = FileManager.getInstance().getNewFile(directory, ".txt"); 149 | } 150 | catch (InvalidDirectoryException e) 151 | { 152 | throw new OperationFailedException("couldn't make text file"); 153 | } 154 | 155 | try(PrintWriter printWriter = 156 | new PrintWriter(new FileOutputStream(text))) 157 | { 158 | for(File file : toMerge) 159 | { 160 | printWriter.printf("file '%s'\n", file.getName()); 161 | } 162 | printWriter.flush(); 163 | } 164 | catch (FileNotFoundException e) 165 | { 166 | throw new OperationFailedException("Couldn't write to the" + 167 | "merged .txt file"); 168 | } 169 | 170 | //format the command and return the merged file 171 | String mergedFilePath; 172 | try 173 | { 174 | mergedFilePath = FileManager.getInstance(). 175 | getNewFile(directory, ".wav").getPath(); 176 | } 177 | catch (InvalidDirectoryException e) 178 | { 179 | throw new OperationFailedException("couldn't make merged file path"); 180 | } 181 | String command = String.format(MERGE_COMMAND, 182 | text.getPath(), mergedFilePath); 183 | 184 | try 185 | { 186 | File fileToReturn = runCommand(command, mergedFilePath); 187 | text.delete(); 188 | return fileToReturn; 189 | } 190 | catch (InterruptedException | IOException e) 191 | { 192 | throw new OperationFailedException(e.getMessage()); 193 | } 194 | } 195 | 196 | /** 197 | * run a FFMPEG command 198 | * @param command the command to run 199 | * @param newFilePath the path to the resulting command 200 | * @return the resulting file from the command operation 201 | * @throws IOException when the process cannot be executed 202 | * @throws InterruptedException when the process gets interrupted 203 | * @throws OperationFailedException when the resulting file cannot be found 204 | */ 205 | private static File runCommand(String command, String newFilePath) 206 | throws IOException, InterruptedException, OperationFailedException 207 | { 208 | //run the command 209 | final Process process = Runtime.getRuntime().exec(command); 210 | 211 | //make sure the errStream and outputStream don't get blocked, 212 | //which would block the command from executing 213 | new StreamEater(process.getErrorStream(), "error", OUTPUT); 214 | new StreamEater(process.getInputStream(), "input", OUTPUT); 215 | 216 | //get the return value of the command. if not 0, something 217 | //went wrong 218 | int retVal = process.waitFor(); 219 | if(retVal != 0) 220 | { 221 | throw new OperationFailedException("operation \"" + command + 222 | "\" returned with value " + retVal + "!"); 223 | } 224 | 225 | // Get the newly created file and return it. If it doesn't exist, 226 | // something went wrong; 227 | File file = new File(newFilePath); 228 | if (file.exists()) 229 | { 230 | return file; 231 | } 232 | else 233 | { 234 | throw new OperationFailedException("not able to get resulting " + 235 | "file " + newFilePath + " from command \"" + command+ "\'"); 236 | } 237 | } 238 | 239 | } 240 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/server/ServerConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.server; 20 | 21 | import org.jitsi.sphinx4http.exceptions.ServerConfigurationException; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import java.io.File; 26 | import java.io.FileInputStream; 27 | import java.io.IOException; 28 | import java.net.URL; 29 | import java.nio.file.Paths; 30 | import java.util.Properties; 31 | 32 | /** 33 | * Class handling the configuration file sphinx4http.properties 34 | * It currently supports 3 configurable options for the server: 35 | * 1. which port to use 36 | * 2. where the executable for ffmpeg is located in the system 37 | * 3. folder wherein the server stores and processes the incoming files 38 | * 39 | * @author Nik Vaessen 40 | */ 41 | public class ServerConfiguration 42 | { 43 | /** 44 | * Identifier for the port in the config file 45 | */ 46 | public final static String PORT_PROPERTY = "port"; 47 | 48 | /** 49 | * Identifier for the ffmpeg path in the config file 50 | */ 51 | public final static String FFMPEG_PATH_PROPERTY = "ffmpeg_path"; 52 | 53 | /** 54 | * Identifier for the data folder path in the config file 55 | */ 56 | public final static String DATA_FOLDER_PATH_PROPERTY = "data_folder_path"; 57 | 58 | /** 59 | * Identifier for the returning method of the server in the config file 60 | */ 61 | public final static String CHUNKED_HTTP_RESPONSE_PROPERTY = 62 | "chunked_response"; 63 | 64 | /** 65 | * Name of the config file 66 | */ 67 | private static final String CONFIG_FILE_NAME = "sphinx4http.properties"; 68 | 69 | /** 70 | * Path to the config file 71 | */ 72 | private static final String CONFIG_FILE_PATH = 73 | "src/main/resources/" + CONFIG_FILE_NAME; 74 | 75 | /** 76 | * Logger for this class 77 | */ 78 | private static final Logger logger = 79 | LoggerFactory.getLogger(ServerConfiguration.class); 80 | 81 | /** 82 | * Default value for the port 83 | */ 84 | private int port = 8081; 85 | 86 | /** 87 | * Default value for the ffmpeg path 88 | */ 89 | private String ffmpegPath = "/usr/bin/ffmpeg"; 90 | 91 | /** 92 | * Default value for the data folder path 93 | */ 94 | private String dataFolderPath = "data/"; 95 | 96 | /** 97 | * Default value for the chunked response returning policy 98 | */ 99 | private boolean chunkedResponse = false; 100 | 101 | /** 102 | * Singleton instance of the configuration class 103 | */ 104 | private static ServerConfiguration config = new ServerConfiguration(); 105 | 106 | /** 107 | * Stores the properties found in the file 108 | */ 109 | private Properties properties; 110 | 111 | 112 | /** 113 | * Construct an instance of the configuration. 114 | */ 115 | private ServerConfiguration() 116 | { 117 | //get the config file 118 | File configFile = findConfigFile(); 119 | if(configFile == null) 120 | { 121 | logger.warn("Could not find {}", CONFIG_FILE_NAME); 122 | return; 123 | } 124 | 125 | //load the Properties class 126 | this.properties = new Properties(); 127 | try (FileInputStream in = new FileInputStream(configFile)) 128 | { 129 | properties.load(in); 130 | } 131 | catch (IOException e) 132 | { 133 | logger.info("Couldn't load {}", CONFIG_FILE_NAME); 134 | } 135 | 136 | //load properties 137 | loadProperties(); 138 | } 139 | 140 | /** 141 | * Find the config file with name "sphinx4http.properties". It will first 142 | * try to use the getResource() method and if that fales, 143 | * try the location /src/main/resources/sphinx4http.properties" 144 | * @return the file containing the configuration, or null if not found 145 | */ 146 | private File findConfigFile() 147 | { 148 | //first try resources 149 | URL url = getClass().getResource(CONFIG_FILE_NAME); 150 | 151 | //if url is null, try the absolute path in the /src 152 | if(url == null) 153 | { 154 | File file = new File(CONFIG_FILE_PATH); 155 | if(file.exists()) 156 | { 157 | return file; 158 | } 159 | } 160 | else 161 | { 162 | try 163 | { 164 | return Paths.get(url.toURI()).toFile(); 165 | } 166 | catch (Exception e) 167 | { 168 | e.printStackTrace(); 169 | } 170 | } 171 | 172 | //if it cannot be found, return null 173 | return null; 174 | } 175 | 176 | /** 177 | * Return the singleton object of this configuration class 178 | * @return the configuration instance 179 | */ 180 | public static ServerConfiguration getInstance() 181 | { 182 | return config; 183 | } 184 | 185 | /** 186 | * Get the port the server should run on 187 | * @return the port as an integer, or null if not specified 188 | */ 189 | public int getPort() 190 | { 191 | return this.port; 192 | } 193 | 194 | /** 195 | * Get the path to an ffmpeg executable 196 | * @return the path to an ffmpeg executable 197 | */ 198 | public String getFfmpegPath() 199 | { 200 | return this.ffmpegPath; 201 | } 202 | 203 | /** 204 | * Get the path to where the data folder should be created 205 | * @return the path to where the data folder should be created 206 | */ 207 | public String getDataFolderPath() 208 | { 209 | return this.dataFolderPath; 210 | } 211 | 212 | public boolean isChunkedResponse() 213 | { 214 | return this.chunkedResponse; 215 | } 216 | 217 | /** 218 | * Load the properties read by the Properties class into instance 219 | * variables 220 | */ 221 | private void loadProperties() 222 | { 223 | for(String property: properties.stringPropertyNames()) 224 | { 225 | switch(property) 226 | { 227 | case PORT_PROPERTY: 228 | this.port = getInteger(this.port, 229 | properties.get(PORT_PROPERTY)); 230 | break; 231 | case FFMPEG_PATH_PROPERTY: 232 | this.ffmpegPath = getString(this.ffmpegPath, 233 | properties.get(FFMPEG_PATH_PROPERTY)); 234 | break; 235 | case DATA_FOLDER_PATH_PROPERTY: 236 | this.dataFolderPath = getString(dataFolderPath, 237 | properties.get(DATA_FOLDER_PATH_PROPERTY)); 238 | break; 239 | case CHUNKED_HTTP_RESPONSE_PROPERTY: 240 | this.chunkedResponse = getBoolean(chunkedResponse, 241 | properties.get(CHUNKED_HTTP_RESPONSE_PROPERTY)); 242 | break; 243 | default: 244 | logger.warn("property {} in config is not a valid " + 245 | "configuration setting", property); 246 | } 247 | } 248 | } 249 | 250 | /** 251 | * Cast an object given by the Properties class into an integer 252 | * @param original the default value of the setting for 253 | * if the integer cannot be converted 254 | * @param integerToConvert the Object to cast into an integer 255 | */ 256 | private int getInteger(int original, Object integerToConvert) 257 | { 258 | try 259 | { 260 | return Integer.parseInt((String) integerToConvert); 261 | } 262 | catch ( NumberFormatException | ClassCastException | 263 | NullPointerException e) 264 | { 265 | logger.warn("Property {} in config file does not have a valid " + 266 | "integer value", PORT_PROPERTY, e); 267 | return original; 268 | } 269 | } 270 | 271 | /** 272 | * Cast an object given by the Properties class into a String 273 | * @param original The defautl value of the setting for if the String 274 | * cannot be converted 275 | * @param stringToConvert the Object to cast into a String 276 | */ 277 | private String getString(String original, Object stringToConvert) 278 | { 279 | try 280 | { 281 | return (String) stringToConvert; 282 | } 283 | catch (ClassCastException | NullPointerException e) 284 | { 285 | logger.warn("Property {} in config file does not have a valid" + 286 | "String value", FFMPEG_PATH_PROPERTY, e); 287 | return original; 288 | } 289 | } 290 | 291 | /** 292 | * Cast an object given by the Properties class into a boolean 293 | * @param original The defautl value of the setting for if the boolean 294 | * cannot be converted 295 | * @param booleanToConvert the Object to cast into a boolean 296 | */ 297 | private boolean getBoolean(boolean original, Object booleanToConvert) 298 | { 299 | try 300 | { 301 | return Boolean.parseBoolean((String) booleanToConvert) ; 302 | } 303 | catch (ClassCastException | NullPointerException e) 304 | { 305 | logger.warn("Property {} in config file does not have a valid" + 306 | " boolean value", CHUNKED_HTTP_RESPONSE_PROPERTY, e); 307 | return original; 308 | } 309 | } 310 | 311 | /** 312 | * generic method to verify that a requested configuration setting 313 | * was actually set. If it wasn't set, and thus is null, it will throw 314 | * an error 315 | * @param o the object to verify 316 | * @param c the class belonging to the object being verified 317 | * @param name the name of the object in the cofiguration 318 | * @param the type of the object being verified 319 | * @return the verified object, which will not be null 320 | * @throws ServerConfigurationException if the passed object is null 321 | */ 322 | private T verifyNotNull(Object o, Class c, String name) 323 | throws ServerConfigurationException 324 | { 325 | if(o == null) 326 | { 327 | throw new ServerConfigurationException(name + " was not " + 328 | "specified in the config file"); 329 | } 330 | else 331 | { 332 | return c.cast(o); 333 | } 334 | } 335 | 336 | } 337 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/util/FileManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.util; 20 | 21 | import org.jitsi.sphinx4http.exceptions.InvalidDirectoryException; 22 | import org.jitsi.sphinx4http.exceptions.NotInDirectoryException; 23 | import org.jitsi.sphinx4http.server.ServerConfiguration; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | import java.io.File; 28 | import java.util.concurrent.atomic.AtomicInteger; 29 | 30 | /** 31 | * Manages a directory and creation of unique file names so IOExceptions 32 | * while reading and writing the audio files for speech recognition are 33 | * less likely to occur. 34 | * 35 | * @author Nik Vaessen 36 | */ 37 | public class FileManager 38 | { 39 | /** 40 | * Logger of this class 41 | */ 42 | private static final Logger logger = 43 | LoggerFactory.getLogger(FileManager.class); 44 | 45 | /** 46 | * Name of the main directory 47 | * Defaults to "data/" when not specified in config file 48 | */ 49 | private static final String MAIN_DIR = ServerConfiguration. 50 | getInstance().getDataFolderPath(); 51 | 52 | /** 53 | * Name of the directory where initially retrieved files are stored in 54 | * before they are converted to the right format 55 | */ 56 | public static final String INCOMING_DIR = MAIN_DIR + "incoming/"; 57 | 58 | /** 59 | * Name of the directory where converted files are stored 60 | */ 61 | public static final String CONVERTED_DIR = MAIN_DIR + "converted/"; 62 | 63 | /** 64 | * Name of the directory where the files are moved to once the speech- 65 | * recognition is done with the audio files and no longer needs them. 66 | */ 67 | private static final String DISPOSED_DIR = MAIN_DIR + "disposed/"; 68 | 69 | /** 70 | * Singleton object of this FileManager class 71 | */ 72 | private static FileManager fileManager = new FileManager(); 73 | 74 | /** 75 | * The main directory 76 | */ 77 | private File mainDir; 78 | 79 | /** 80 | * The incoming file's directory 81 | */ 82 | private File incomingDir; 83 | 84 | /** 85 | * The converted file's directory 86 | */ 87 | private File convertedDir; 88 | 89 | /** 90 | * The disposed file's directory 91 | */ 92 | private File disposedDir; 93 | 94 | /** 95 | * tag that gets added the every file name. The tag gets incremented every 96 | * time a new file is created so that all file names will be unique 97 | */ 98 | private static AtomicInteger tag; 99 | 100 | /** 101 | * Sets the tag to 0 and creates the directory where the files 102 | * will be stored, as well as deleting all files in them 103 | * and making a thread which deletes all files from the disposed 104 | * directory every 5 minutes 105 | */ 106 | private FileManager() 107 | { 108 | tag = new AtomicInteger(0); 109 | checkDirectoryExistence(); 110 | cleanDirectories(); 111 | createDisposedDirectoryCleaner(); 112 | } 113 | 114 | /** 115 | * Gets the singleton object of this FileManager class 116 | * @return the singleton object representing this FileManager class 117 | */ 118 | public static FileManager getInstance() 119 | { 120 | return fileManager; 121 | } 122 | 123 | /** 124 | * Finds out in which directory, managed by this class, a file is located 125 | * @param file the file to get the directory from 126 | * @return A string of the name of the directory, equal to one of the 127 | * constant strings from this class 128 | * @throws NotInDirectoryException when the file is not in any of the 129 | * directories manged by this class 130 | */ 131 | public String getDirectory(File file) 132 | throws NotInDirectoryException 133 | { 134 | String path = file.getAbsolutePath(); 135 | if(path.contains(MAIN_DIR)) 136 | { 137 | if(path.contains(INCOMING_DIR)) 138 | { 139 | return INCOMING_DIR; 140 | } 141 | else if(path.contains(CONVERTED_DIR)) 142 | { 143 | return CONVERTED_DIR; 144 | } 145 | else if(path.contains(DISPOSED_DIR)) 146 | { 147 | return DISPOSED_DIR; 148 | } 149 | else 150 | { 151 | return MAIN_DIR; 152 | } 153 | } 154 | else 155 | { 156 | throw new NotInDirectoryException(); 157 | } 158 | } 159 | 160 | /** 161 | * A tag with a unique integer and the rough time and date of the methods 162 | * execution 163 | * @return the unique tag 164 | */ 165 | private static String getTag() 166 | { 167 | return tag.getAndIncrement() + "_" + TimeStrings.getNowString(); 168 | } 169 | 170 | /** 171 | * Internal method to get the File object associated with the constant 172 | * string representing a directory managed by the class 173 | * @param dir One of the constant strings representing a directory 174 | * managed by this class 175 | * @return the File object of the directory 176 | * @throws InvalidDirectoryException when the input is not one of the 177 | * constant directory strings of this class 178 | */ 179 | private File getDirectory(String dir) 180 | throws InvalidDirectoryException 181 | { 182 | if(dir.equals(MAIN_DIR)) 183 | { 184 | return mainDir; 185 | } 186 | else if(dir.equals(INCOMING_DIR)) 187 | { 188 | return incomingDir; 189 | } 190 | else if(dir.equals(CONVERTED_DIR)) 191 | { 192 | return convertedDir; 193 | } 194 | else if(dir.equals(DISPOSED_DIR)) 195 | { 196 | return disposedDir; 197 | } 198 | else 199 | { 200 | throw new InvalidDirectoryException(dir + " is not directory" + 201 | " managed by the FileManager"); 202 | } 203 | } 204 | 205 | /** 206 | * Get a new potential File object with a unique name in one of the 207 | * specified directories 208 | * @param directory the name of the directory in which the file should 209 | * be located 210 | * @return a File representing a potential new file in the specified 211 | * directory which is safe to write to 212 | * @throws InvalidDirectoryException when the specified directory is not 213 | * one of the directories managed by this class 214 | */ 215 | public File getNewFile(String directory) 216 | throws InvalidDirectoryException 217 | { 218 | return getNewFile(directory, ""); 219 | } 220 | 221 | /** 222 | * Get a new potential File object with a unique name in one of the 223 | * specified directories 224 | * @param directory the name of the directory in which the file should be 225 | * located 226 | * @param name part of the name of the new file trailing the unique 227 | * tag 228 | * @return A file representing a potential new file in the specified 229 | * directory which is safe to write to 230 | * @throws InvalidDirectoryException when the specified directory is not 231 | * one of the directories managed by this class 232 | */ 233 | public File getNewFile(String directory, String name) 234 | throws InvalidDirectoryException 235 | { 236 | File dir = getDirectory(directory); 237 | return new File(dir.getAbsolutePath() + File.separator + 238 | getTag() + name); 239 | } 240 | 241 | /** 242 | * Puts all the file given as arguments in the disposed file's directory. 243 | * If this fails, the files are instead deleted 244 | * @param files all files needed to be moved the disposed directory 245 | */ 246 | public void disposeFiles(File... files) 247 | { 248 | for(File file : files) 249 | { 250 | if(file.exists()) 251 | { 252 | try 253 | { 254 | boolean success = file.renameTo( 255 | new File(getNewFile(FileManager.DISPOSED_DIR, 256 | file.getName()).getAbsolutePath())); 257 | if(!success) 258 | { 259 | file.delete(); 260 | } 261 | } 262 | catch (InvalidDirectoryException e) 263 | { 264 | e.printStackTrace(); 265 | } 266 | } 267 | } 268 | } 269 | 270 | /** 271 | * If the directories do not yet exist in the file system, 272 | * they will be created. 273 | */ 274 | private void checkDirectoryExistence() 275 | { 276 | //check if the directory "data/" exists 277 | mainDir = new File(MAIN_DIR); 278 | mainDir.mkdir(); 279 | //check if the directory "data/incoming" exists 280 | incomingDir = new File(INCOMING_DIR); 281 | incomingDir.mkdir(); 282 | //check if the directory "data/converted" exists 283 | convertedDir = new File(CONVERTED_DIR); 284 | convertedDir.mkdir(); 285 | //check if the directory "data/transcribed" exists 286 | disposedDir = new File(DISPOSED_DIR); 287 | disposedDir.mkdir(); 288 | } 289 | 290 | /** 291 | * Removes all files in all subdirectories 292 | */ 293 | private void cleanDirectories() 294 | { 295 | //delete every file in the incoming directory 296 | if(incomingDir.isDirectory()) // to prevent null pointers 297 | { 298 | for(File file : incomingDir.listFiles()) 299 | { 300 | file.delete(); 301 | } 302 | } 303 | else 304 | { 305 | new Exception("could not delete files from incoming directory") 306 | .printStackTrace(); 307 | } 308 | //delete every file in the converted directory 309 | if(convertedDir.isDirectory()) // to prevent null pointers 310 | { 311 | for(File file : convertedDir.listFiles()) 312 | { 313 | file.delete(); 314 | } 315 | } 316 | else 317 | { 318 | new Exception("could not delete files from converted directory") 319 | .printStackTrace(); 320 | } 321 | //delete every file in the disposed directory 322 | if(disposedDir.isDirectory()) // to prevent null pointers 323 | { 324 | for(File file : disposedDir.listFiles()) 325 | { 326 | file.delete(); 327 | } 328 | } 329 | else 330 | { 331 | new Exception("could not delete files from disposed directory") 332 | .printStackTrace(); 333 | } 334 | } 335 | 336 | /** 337 | * Creates a thread which removes every file from the 338 | * disposed directory every 5 minutes 339 | */ 340 | private void createDisposedDirectoryCleaner() 341 | { 342 | new Thread(new Runnable() 343 | { 344 | @Override 345 | public void run() 346 | { 347 | while(true) 348 | { 349 | try 350 | { 351 | Thread.sleep(60 * 5 * 1000); 352 | } 353 | catch (InterruptedException e) 354 | { 355 | e.printStackTrace(); 356 | } 357 | if(disposedDir.isDirectory())//to prevent null pointer 358 | { 359 | for(File file : disposedDir.listFiles()) 360 | { 361 | if(!file.delete()) 362 | { 363 | logger.warn("could not delete {} from " + 364 | "the the disposed directory", 365 | file.getPath()); 366 | } 367 | } 368 | } 369 | } 370 | } 371 | }).start(); 372 | } 373 | } 374 | -------------------------------------------------------------------------------- /src/main/resources/edu/cmu/sphinx/api/default.config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | speedTracker 29 | memoryTracker 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 99 | 100 | 101 | 102 | 103 | 105 | 106 | 107 | 108 | 109 | 111 | 112 | standardActiveListFactory 113 | wordActiveListFactory 114 | wordActiveListFactory 115 | standardActiveListFactory 116 | standardActiveListFactory 117 | standardActiveListFactory 118 | 119 | 120 | 121 | 123 | 124 | 125 | 126 | 127 | 129 | 130 | 131 | 132 | 133 | 135 | 136 | 138 | 139 | 140 | 141 | 143 | 144 | 145 | 147 | 149 | 150 | 151 | 152 | 153 | 155 | 156 | 157 | 158 | 159 | 160 | 162 | 163 | 164 | 166 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 195 | 196 | 197 | 198 | 199 | 200 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 218 | 219 | 220 | 221 | 222 | 223 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 245 | 246 | 247 | 248 | 249 | 250 | 252 | 253 | 254 | 255 | 256 | 258 | 259 | 260 | 261 | 262 | 263 | 265 | 266 | 267 | 268 | dataSource 269 | dataBlocker 270 | speechClassifier 271 | speechMarker 272 | preemphasizer 273 | windower 274 | fft 275 | autoCepstrum 276 | liveCMN 277 | featureExtraction 278 | featureTransform 279 | 280 | 281 | 282 | 284 | 285 | 286 | 287 | 288 | 289 | 291 | 292 | 293 | 294 | 296 | 297 | 298 | 300 | 301 | 303 | 304 | 305 | 307 | 308 | 309 | 311 | 312 | 313 | 314 | 316 | 317 | 319 | 320 | 322 | 323 | 325 | 326 | 327 | 328 | 330 | 331 | 332 | 333 | 334 | 335 | 337 | 338 | 339 | 340 | -------------------------------------------------------------------------------- /src/main/java/org/jitsi/sphinx4http/server/RequestHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx4 HTTP server 3 | * 4 | * Copyright @ 2016 Atlassian Pty Ltd 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.jitsi.sphinx4http.server; 20 | 21 | import org.eclipse.jetty.http.HttpMethod; 22 | import org.eclipse.jetty.server.Request; 23 | import org.eclipse.jetty.server.handler.AbstractHandler; 24 | import org.jitsi.sphinx4http.exceptions.InvalidDirectoryException; 25 | import org.jitsi.sphinx4http.exceptions.OperationFailedException; 26 | import org.jitsi.sphinx4http.util.FileManager; 27 | import org.jitsi.sphinx4http.util.SessionManager; 28 | import org.json.simple.JSONArray; 29 | import org.json.simple.JSONObject; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | import javax.servlet.http.HttpServletRequest; 34 | import javax.servlet.http.HttpServletResponse; 35 | import java.io.File; 36 | import java.io.FileOutputStream; 37 | import java.io.IOException; 38 | import java.io.InputStream; 39 | 40 | /** 41 | * This class does most of the work. It accepts incoming HTTP requests, 42 | * see if their content is correct, converts the given audio file to the 43 | * correct format, predicts the speech in said audio file, and sends the text 44 | * back to the requester 45 | * 46 | * @author Nik Vaessen 47 | */ 48 | public class RequestHandler extends AbstractHandler 49 | { 50 | /** 51 | * The logger for this class 52 | */ 53 | private static final Logger logger = 54 | LoggerFactory.getLogger(RequestHandler.class); 55 | 56 | /** 57 | * Keyword in the http url specifying that an audio transcription 58 | * is being requested 59 | */ 60 | private static final String ACCEPTED_TARGET = "/recognize"; 61 | 62 | /** 63 | * Keyword in the http url specifying the session id 64 | */ 65 | private static final String SESSION_PARAMETER = "session-id"; 66 | 67 | /** 68 | * Name of the json value holding the session id 69 | */ 70 | private static final String JSON_SESSION_ID = "session-id"; 71 | 72 | /** 73 | * name of the json value holding the speech-to-text result 74 | */ 75 | private static final String JSON_RESULT = "result"; 76 | 77 | /** 78 | * The class managing creation of directories and names for the audio files 79 | * which are needed for handling the speech-to-text service 80 | */ 81 | private FileManager fileManager; 82 | 83 | /** 84 | * The class managing the sessions for every request 85 | */ 86 | private SessionManager sessionManager; 87 | 88 | /** 89 | * The configuration of the server 90 | */ 91 | private ServerConfiguration config; 92 | 93 | /** 94 | * Creates an object being able to handle incoming HTTP POST requests 95 | * with audio files wanting to be transcribed 96 | */ 97 | public RequestHandler() 98 | { 99 | fileManager = FileManager.getInstance(); 100 | sessionManager = new SessionManager(); 101 | config = ServerConfiguration.getInstance(); 102 | } 103 | 104 | /** 105 | * Handles incoming HTTP post requests. Checks for validity of the request 106 | * by checking if has the right url, if it's a POST request and if it 107 | * has an audio file as content type 108 | *

109 | * It than stores the audio file to disk, converts it and transcribed the 110 | * audio before sending the text string back 111 | * 112 | * @param target the target of the request - should be equal to 113 | * ACCEPTED_TARGET 114 | * @param baseRequest the original unwrapped request object 115 | * @param request The request either as the Request object or a wrapper of 116 | * that request. 117 | * @param response The response as the Response object or a wrapper of 118 | * that request. 119 | * @throws IOException when writing to the response object goes wrong 120 | */ 121 | @SuppressWarnings("unchecked") //for JSONObject.put() 122 | public void handle(String target, 123 | Request baseRequest, 124 | HttpServletRequest request, 125 | HttpServletResponse response) 126 | throws IOException 127 | { 128 | //log the request 129 | logger.info("New incoming request:\n" + 130 | "target: {}\n" + 131 | "method: {}\n" + 132 | "Content-Type: {}\n" + 133 | "Content-Length: {}\n" + 134 | "Time: {}\n" + 135 | "session-id: {}\n", 136 | target, baseRequest.getMethod(), baseRequest.getContentType(), 137 | baseRequest.getContentLength(), baseRequest.getHeader("Date"), 138 | baseRequest.getParameter("session-id")); 139 | 140 | //check if the address was "http://:/recognize" 141 | if (!target.startsWith(ACCEPTED_TARGET)) 142 | { 143 | sendError(HttpServletResponse.SC_BAD_REQUEST, 144 | "URL needs to tail " + ACCEPTED_TARGET, 145 | baseRequest, response); 146 | logger.info("denied request because target was " + target); 147 | return; 148 | } 149 | //check if request method is POST 150 | if (!HttpMethod.POST.asString().equals(baseRequest.getMethod())) 151 | { 152 | sendError(HttpServletResponse.SC_BAD_REQUEST, 153 | "HTTP request should be POST and include an audio file", 154 | baseRequest, response); 155 | logger.info("denied request because METHOD was not post"); 156 | return; 157 | } 158 | //check if content type is an audio file 159 | if (!baseRequest.getContentType().contains("audio/")) 160 | { 161 | sendError(HttpServletResponse.SC_BAD_REQUEST, 162 | "HTTP request should have content type \"audio/xxxx\"", 163 | baseRequest, response); 164 | logger.info("denied request because content type was {}", 165 | baseRequest.getContentType()); 166 | return; 167 | } 168 | 169 | //extract file 170 | File audioFile; 171 | try 172 | { 173 | audioFile = writeAudioFile(request.getInputStream(), 174 | baseRequest.getContentType()); 175 | } 176 | catch (IOException | InvalidDirectoryException e) 177 | { 178 | sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, 179 | "Failed to execute request due to " + 180 | "failure in writing the audio file", 181 | baseRequest, response); 182 | logger.warn("Unable to write an audio file"); 183 | return; 184 | } 185 | 186 | //convert file to .wav 187 | File convertedFile; 188 | try 189 | { 190 | convertedFile = AudioFileManipulator.convertToWAV(audioFile, 191 | fileManager.getNewFile(FileManager.CONVERTED_DIR, ".wav") 192 | .getAbsolutePath()); 193 | //delete the original audio file immediately as it's not needed 194 | fileManager.disposeFiles(audioFile); 195 | } 196 | catch (OperationFailedException | InvalidDirectoryException e) 197 | { 198 | sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, 199 | "Failed to execute request due to " + 200 | "failure in converting the audio file", 201 | baseRequest, response); 202 | fileManager.disposeFiles(audioFile); 203 | logger.warn("unable to convert an audio file to WAV"); 204 | return; 205 | } 206 | 207 | //check for session 208 | Session session; 209 | String sessionID; 210 | if ((sessionID = request.getParameter(SESSION_PARAMETER)) == null) 211 | { 212 | //make a new session 213 | session = sessionManager.createNewSession(); 214 | logger.info("Created new session with id: {} ", session.getId()); 215 | } 216 | else 217 | { 218 | logger.info("handling session with id: {}"); 219 | session = sessionManager.getSession(sessionID); 220 | if (session == null) 221 | { 222 | sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, 223 | "invalid session id", baseRequest, response); 224 | fileManager.disposeFiles(audioFile); 225 | logger.info("request had invalid id: {}", sessionID); 226 | return; 227 | } 228 | } 229 | 230 | //finish the rest of the request 231 | if(config.isChunkedResponse()) 232 | { 233 | transcribeRequestChunked(baseRequest, response,session, 234 | convertedFile); 235 | } 236 | else 237 | { 238 | transcribeRequest(baseRequest, response, session, convertedFile); 239 | } 240 | } 241 | 242 | /** 243 | * When a request has been verified and accepted, this method will manage 244 | * the actual transcription work. This method will transcribe the whole file 245 | * and sent the result when the transcription is completely done 246 | * @param baseRequest the baseRequest 247 | * @param response the response of the server 248 | * @param session the session belonging to the request 249 | * @param audioFile the audio file which is going to get transcribed 250 | * @throws IOException when writing the response goes wrong 251 | */ 252 | @SuppressWarnings("unchecked") //for JSONObject.put() 253 | private void transcribeRequest(Request baseRequest, 254 | HttpServletResponse response, 255 | Session session, 256 | File audioFile) 257 | throws IOException 258 | { 259 | //get the speech-to-text 260 | JSONArray speechToTextResult; 261 | try 262 | { 263 | logger.info("Started audio transcription for id: {}", 264 | session.getId()); 265 | speechToTextResult = session.transcribe(audioFile); 266 | } 267 | catch (IOException e) 268 | { 269 | sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, 270 | "Failed to execute request due to" + 271 | "an error in transcribing the audio file", 272 | baseRequest, response); 273 | return; 274 | } 275 | 276 | //create the returning json result with the session id 277 | JSONObject result = new JSONObject(); 278 | result.put(JSON_SESSION_ID, session.getId()); 279 | result.put(JSON_RESULT, speechToTextResult); 280 | 281 | //return the json result 282 | response.setStatus(HttpServletResponse.SC_OK); 283 | response.setContentType("application/json"); 284 | response.getWriter().write(result.toJSONString()); 285 | baseRequest.setHandled(true); 286 | 287 | //log result 288 | logger.info("Successfully handled request with id: {}", 289 | session.getId()); 290 | logger.debug("Result of request with id {}:\n{}", session.getId(), 291 | result.toJSONString()); 292 | } 293 | 294 | /** 295 | * When a request has been verified and accepted, this method will manage 296 | * the actual transcription work. This method will transcribe the whole file 297 | * and sent the result when the transcription is completely done 298 | * @param baseRequest the baseRequest 299 | * @param response the response of the server 300 | * @param session the session belonging to the request 301 | * @param audioFile the audio file which is going to get transcribed 302 | * @throws IOException when writing the response goes wrong 303 | */ 304 | @SuppressWarnings("unchecked") //for JSONObject.put() 305 | private void transcribeRequestChunked(Request baseRequest, 306 | HttpServletResponse response, 307 | Session session, 308 | File audioFile) 309 | throws IOException 310 | { 311 | //start by sending 200 OK and the meta data JSON object 312 | response.setStatus(HttpServletResponse.SC_OK); 313 | response.setContentType("application/json"); 314 | 315 | JSONObject object = new JSONObject(); 316 | object.put(JSON_SESSION_ID, session.getId()); 317 | 318 | //and send the initial object 319 | response.getWriter().write("{\"objects\":[" + object.toJSONString()); 320 | response.getWriter().flush(); 321 | 322 | //start the transcription, which will send 323 | //JSON objects constantly 324 | JSONArray result; 325 | try 326 | { 327 | logger.info("Started audio transcription for id: {}", 328 | session.getId()); 329 | result = session.chunkedTranscribe(audioFile, 330 | response.getWriter()); 331 | } 332 | catch (IOException e) 333 | { 334 | logger.warn("chunked transcription with id {} failed due to " + 335 | "IO error", e); 336 | sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, 337 | "Failed to execute request due to" + 338 | "an error in transcribing the audio file", 339 | baseRequest, response); 340 | return; 341 | } 342 | 343 | //the request is handled 344 | response.getWriter().write("]}"); 345 | response.getWriter().flush(); 346 | baseRequest.setHandled(true); 347 | 348 | //log result 349 | logger.info("Successfully handled request with id: {}", 350 | session.getId()); 351 | logger.debug("Result of chunked request with id {}:\n{}", 352 | session.getId(), result.toJSONString()); 353 | } 354 | 355 | /** 356 | * Method to finish the request with an error 357 | * @param status the http error status 358 | * @param message the message of the error 359 | * @param baseRequest the original request to reply to 360 | * @param response the HttpServletResponse object to use as an reply 361 | */ 362 | private void sendError(int status, String message, Request baseRequest, 363 | HttpServletResponse response) 364 | { 365 | response.setStatus(status); 366 | try 367 | { 368 | response.setContentType("text/plain"); 369 | response.getWriter().println(message); 370 | } 371 | catch (IOException e) 372 | { 373 | e.printStackTrace(); 374 | } 375 | baseRequest.setHandled(true); 376 | } 377 | 378 | /** 379 | * Writes the audio file given in the HTTP request to file 380 | * 381 | * @param inputStream the InputStream of the audio file object 382 | * @param contentType the content type of the HTTP request. Needed to give 383 | * the written file the correct file extension 384 | * @return The file object if the audio file in hte HTTP request, 385 | * written to disk 386 | * @throws IOException when reading from the InputStream goes wrong 387 | */ 388 | private File writeAudioFile(InputStream inputStream, String contentType) 389 | throws IOException, InvalidDirectoryException 390 | { 391 | String content = contentType.split("/")[0]; 392 | File audioFile = fileManager.getNewFile(FileManager.INCOMING_DIR, 393 | content); 394 | try (FileOutputStream outputStream = new FileOutputStream(audioFile)) 395 | { 396 | byte[] buffer = new byte[2048]; 397 | while (inputStream.read(buffer) != -1) 398 | { 399 | outputStream.write(buffer); 400 | } 401 | inputStream.close(); 402 | return audioFile; 403 | } 404 | catch (IOException e) 405 | { 406 | e.printStackTrace(); 407 | throw e; 408 | } 409 | } 410 | 411 | } 412 | --------------------------------------------------------------------------------