├── .idea ├── .name ├── copyright │ └── profiles_settings.xml ├── encodings.xml ├── vcs.xml ├── modules.xml ├── libraries │ ├── Maven__org_apache_commons_commons_exec_1_3.xml │ └── Maven__com_googlecode_java_diff_utils_diffutils_1_3_0.xml ├── compiler.xml ├── misc.xml └── uiDesigner.xml ├── get_pl_model.sh ├── src └── main │ └── java │ └── pl │ └── edu │ └── pjwstk │ └── kaldi │ ├── files │ ├── SegmentationList.java │ ├── LAB.java │ ├── julius │ │ ├── MLF.java │ │ ├── ConfidenceNetwork.java │ │ ├── WordList.java │ │ ├── AlignedSequence.java │ │ ├── Dictionary.java │ │ ├── LatticeNode.java │ │ ├── WordGraph.java │ │ ├── WordSequence.java │ │ └── JuliusOutput.java │ ├── RTTM.java │ ├── CTM.java │ ├── TextGrid.java │ ├── Converter.java │ ├── ClarinText.java │ └── Segmentation.java │ ├── programs │ ├── Kill.java │ ├── chmod.java │ ├── Python.java │ ├── FFMPEG.java │ ├── Sox.java │ ├── Java.java │ ├── KaldiKWS.java │ ├── NGram.java │ ├── Transcriber.java │ ├── Shout.java │ ├── Julius.java │ ├── Praat.java │ └── Essentia.java │ ├── service │ ├── tasks │ │ ├── ConvertEncodingTask.java │ │ ├── TestTask.java │ │ ├── AlignTask.java │ │ ├── NSERTask.java │ │ ├── ParallelTask.java │ │ ├── KeywordSpottingTask.java │ │ ├── SpeakerDiarizationTask.java │ │ ├── Task.java │ │ ├── DecodeTask.java │ │ └── DecodeFMLLRTask.java │ ├── ServiceTask.java │ ├── database │ │ ├── dbTasks.java │ │ └── Database.java │ └── ServiceDaemon.java │ ├── utils │ ├── PasswordObfuscator.java │ ├── ProgramLauncher.java │ ├── WAV.java │ ├── Log.java │ └── Diff.java │ ├── grammars │ ├── Akt.java │ ├── Numbers.java │ └── Grammar.java │ └── ExperimentMain.java ├── pom.xml ├── KaldiJava.iml └── README.md /.idea/.name: -------------------------------------------------------------------------------- 1 | KJExperimenting -------------------------------------------------------------------------------- /get_pl_model.sh: -------------------------------------------------------------------------------- 1 | wget http://mowa.clarin-pl.eu/korpusy/pl_model.tar.gz 2 | -------------------------------------------------------------------------------- /.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/SegmentationList.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | 6 | public class SegmentationList extends Segmentation { 7 | @Override 8 | public void read(File file) throws IOException { 9 | } 10 | 11 | @Override 12 | public void write(File file) throws IOException { 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/Kill.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import pl.edu.pjwstk.kaldi.utils.Log; 4 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 5 | 6 | public class Kill { 7 | 8 | public static void kill(int pid) { 9 | 10 | String[] cmd = { "kill", "-9", "" + pid }; 11 | 12 | ProgramLauncher launcher = new ProgramLauncher(cmd); 13 | 14 | Log.verbose("Killing process: " + pid); 15 | launcher.run(); 16 | Log.verbose("Done."); 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/chmod.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.File; 4 | 5 | import pl.edu.pjwstk.kaldi.utils.Log; 6 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 7 | 8 | public class chmod { 9 | 10 | public static void run(String mode, File file) { 11 | 12 | String[] cmd = new String[] { "chmod", mode, file.getAbsolutePath() }; 13 | 14 | ProgramLauncher launcher = new ProgramLauncher(cmd); 15 | 16 | Log.verbose("chmod: " + mode + " " + file.getAbsolutePath()); 17 | launcher.run(); 18 | Log.verbose("Done."); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_commons_commons_exec_1_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_googlecode_java_diff_utils_diffutils_1_3_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/LAB.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.PrintWriter; 6 | 7 | public class LAB extends Segmentation { 8 | 9 | public LAB() { 10 | 11 | } 12 | 13 | public LAB(Tier tier) { 14 | 15 | tiers.add(tier); 16 | 17 | } 18 | 19 | public void write(File file) throws IOException { 20 | PrintWriter writer = new PrintWriter(file); 21 | 22 | Tier tier = tiers.get(0); 23 | 24 | for (Segment seg : tier.segments) { 25 | 26 | long start = (long) (seg.start_time * 10000000); 27 | long end = (long) (seg.end_time * 10000000); 28 | writer.format("%d %d %s %.2f\n", start, end, seg.name, 29 | seg.confidence); 30 | } 31 | writer.close(); 32 | } 33 | 34 | @Override 35 | public void read(File file) throws IOException { 36 | throw new IOException("NYI"); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/Python.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.File; 4 | 5 | import pl.edu.pjwstk.kaldi.utils.Log; 6 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 7 | import pl.edu.pjwstk.kaldi.utils.Settings; 8 | 9 | public class Python { 10 | 11 | public static void run(File script, String[] args) { 12 | 13 | String[] cmd = new String[args.length + 2]; 14 | cmd[0] = Settings.python_bin.getAbsolutePath(); 15 | cmd[1] = script.getAbsolutePath(); 16 | int i = 2; 17 | for (String arg : args) { 18 | cmd[i] = arg; 19 | i++; 20 | } 21 | 22 | ProgramLauncher launcher = new ProgramLauncher(cmd); 23 | launcher.setStdoutStream(new Log.Stream()); 24 | launcher.setStderrStream(new Log.Stream("ERR>>")); 25 | 26 | Log.verbose("Running python script: " + script.getName()); 27 | launcher.run(); 28 | Log.verbose("Done."); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | pl.edu.pja.multimedia 8 | KaldiJava 9 | 1.0-SNAPSHOT 10 | 11 | 12 | org.apache.commons 13 | commons-exec 14 | 1.3 15 | 16 | 17 | com.googlecode.java-diff-utils 18 | diffutils 19 | 1.3.0 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/FFMPEG.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.File; 4 | 5 | import pl.edu.pjwstk.kaldi.utils.Log; 6 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 7 | import pl.edu.pjwstk.kaldi.utils.Settings; 8 | 9 | public class FFMPEG { 10 | 11 | public static void convertTo16k(File input, File output) { 12 | 13 | String[] cmd = new String[] { Settings.ffmpeg_bin.getAbsolutePath(), 14 | "-i", input.getAbsolutePath(), "-acodec", "pcm_s16le", "-ac", 15 | "1", "-ar", "16k", output.getAbsolutePath() }; 16 | 17 | ProgramLauncher launcher = new ProgramLauncher(cmd); 18 | 19 | Log.verbose("FFMPEG: " + input.getAbsolutePath() + " -> " 20 | + output.getAbsolutePath()); 21 | launcher.run(); 22 | Log.verbose("Done."); 23 | 24 | if (launcher.getReturnValue() != 0) 25 | throw new RuntimeException("FFMPEG Retval: " + launcher.getReturnValue()); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /KaldiJava.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/Sox.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.File; 4 | 5 | import pl.edu.pjwstk.kaldi.utils.Log; 6 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 7 | 8 | public class Sox { 9 | 10 | public static void convert(File src_file, File dest_file) { 11 | 12 | String[] cmd = new String[] { "sox", src_file.getAbsolutePath(), "-c", 13 | "1", dest_file.getAbsolutePath(), "norm", "-3", "rate", "-h", 14 | "16k" }; 15 | 16 | ProgramLauncher launcher = new ProgramLauncher(cmd); 17 | 18 | Log.verbose("Coverting using SoX..."); 19 | launcher.run(); 20 | Log.verbose("Done."); 21 | 22 | } 23 | 24 | public static void extract(File src_file, File dest_file, 25 | double time_start, double time_end) { 26 | 27 | double duration = time_end - time_start; 28 | 29 | String[] cmd = new String[] { "sox", src_file.getAbsolutePath(), 30 | dest_file.getAbsolutePath(), "trim", "" + time_start, 31 | "" + duration }; 32 | 33 | ProgramLauncher launcher = new ProgramLauncher(cmd); 34 | 35 | Log.verbose("Extracting using SoX..."); 36 | launcher.run(); 37 | Log.verbose("Done."); 38 | 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KaldiJava 2 | 3 | This project contains mostly a set of classes which allow interfacing Kaldi from within Java. This is not a replacement for Kaldi (it does require a working installation of Kaldi on the computer), but it does make life easier in certain situations. 4 | 5 | The idea is to replace the standard Bash/Perl/Python scripts that are used in the Kaldi project. A common use-case for this would be when you want to include Kaldi in some web environment pipeline. Another use may be when you simply want to make your life performing experiments easier or when you require proper unicode support. 6 | 7 | ## Installation tips 8 | 9 | This project doesn't yet contain any detailed documentation. 10 | 11 | Generally you need the following steps: 12 | 13 | 1. Install Kaldi from http://kaldi-asr.org/ 14 | 2. Intall any other tool from the ones included in the classes 15 | 3. Configure the Settings.java file with proper paths (or create a settings file and load it at start) 16 | 4. Modify the main program class to do what ever you need. 17 | 18 | ## Who made this 19 | 20 | This project was started ad the Polish-Japanese Academy of Information Technology in Warsaw, Poland. 21 | 22 | -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/Java.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.File; 4 | import java.util.List; 5 | 6 | import pl.edu.pjwstk.kaldi.utils.Log; 7 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 8 | 9 | public class Java { 10 | 11 | private static String sep = ":"; 12 | 13 | public static void java(String class_name, String[] args, 14 | List cp_files, boolean async) { 15 | 16 | String class_path = ""; 17 | for (File file : cp_files) { 18 | if (!class_path.isEmpty()) 19 | class_path += sep; 20 | class_path += file.getAbsolutePath(); 21 | } 22 | 23 | String[] cmd_start = new String[] { "java", "-Dfile.encoding=UTF-8", 24 | "-cp", class_path, class_name }; 25 | String cmd[] = new String[cmd_start.length + args.length]; 26 | 27 | for (int i = 0; i < cmd_start.length ; i++) 28 | cmd[i] = cmd_start[i]; 29 | for (int i = 0; i < args.length; i++) 30 | cmd[cmd_start.length + i] = args[i]; 31 | 32 | ProgramLauncher launcher = new ProgramLauncher(cmd); 33 | launcher.setSuppressOutput(true); 34 | launcher.setAsynchronous(async); 35 | 36 | Log.verbose("Running Java: " + class_name); 37 | launcher.run(); 38 | Log.verbose("Done."); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/julius/MLF.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files.julius; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.IOException; 7 | import java.io.InputStreamReader; 8 | 9 | import pl.edu.pjwstk.kaldi.utils.Settings; 10 | 11 | public class MLF { 12 | 13 | private File file; 14 | 15 | public MLF(File file) { 16 | this.file = file; 17 | } 18 | 19 | public WordSequence load(String labfile) throws IOException { 20 | BufferedReader reader = new BufferedReader(new InputStreamReader( 21 | new FileInputStream(file), Settings.julius_default_encoding)); 22 | 23 | int pos = labfile.lastIndexOf('.'); 24 | labfile = labfile.substring(0, pos) + ".lab"; 25 | 26 | String line; 27 | while ((line = reader.readLine()) != null) { 28 | 29 | if (line.startsWith("#")) 30 | continue; 31 | 32 | if (line.startsWith("\"") && line.contains(labfile)) {// TODO: weak 33 | // solution 34 | WordSequence ret = new WordSequence(); 35 | while ((line = reader.readLine()) != null) { 36 | if (line.equals(".")) { 37 | reader.close(); 38 | return ret; 39 | } 40 | ret.addWord(line); 41 | } 42 | } 43 | } 44 | 45 | reader.close(); 46 | return null; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/julius/ConfidenceNetwork.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files.julius; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | import java.util.Vector; 6 | 7 | public class ConfidenceNetwork { 8 | 9 | public static class Word 10 | { 11 | public String word; 12 | public double weight; 13 | public Object object; 14 | 15 | public Word(String word, double weight) 16 | { 17 | this.word=word; 18 | this.weight=weight; 19 | 20 | object=null; 21 | } 22 | 23 | public String toString() 24 | { 25 | return word+":"+weight; 26 | } 27 | } 28 | 29 | public static class Section 30 | { 31 | public List words; 32 | 33 | public Section() 34 | { 35 | words=new LinkedList<>(); 36 | } 37 | 38 | public String toString() 39 | { 40 | String ret=""; 41 | 42 | for(Word w:words) 43 | { 44 | ret+="("+w+") "; 45 | } 46 | 47 | return ret; 48 | } 49 | } 50 | 51 | public Vector
sections; 52 | 53 | public ConfidenceNetwork() 54 | { 55 | sections=new Vector
(); 56 | } 57 | 58 | 59 | public double getAverageSectionWidth() 60 | { 61 | int ret=0; 62 | int count=0; 63 | 64 | for(Section s:sections) 65 | { 66 | ret+=s.words.size(); 67 | count++; 68 | } 69 | 70 | return ret/(double)count; 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/tasks/ConvertEncodingTask.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.tasks; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.security.MessageDigest; 6 | 7 | import javax.xml.xpath.XPath; 8 | import javax.xml.xpath.XPathConstants; 9 | import javax.xml.xpath.XPathExpressionException; 10 | 11 | import org.w3c.dom.Element; 12 | 13 | import pl.edu.pjwstk.kaldi.programs.FFMPEG; 14 | import pl.edu.pjwstk.kaldi.utils.Log; 15 | 16 | public class ConvertEncodingTask extends Task { 17 | 18 | private File input, output; 19 | 20 | @Override 21 | public void run() { 22 | 23 | state = State.RUNNING; 24 | 25 | try { 26 | 27 | FFMPEG.convertTo16k(input, output); 28 | 29 | state = State.SUCCEEDED; 30 | 31 | } catch (RuntimeException e) { 32 | Log.error("FFMPEG task.", e); 33 | state = State.FAILED; 34 | } 35 | } 36 | 37 | @Override 38 | public void loadSettings(XPath xpath, Element node) 39 | throws XPathExpressionException { 40 | 41 | input = new File((String) xpath.evaluate("input", node, 42 | XPathConstants.STRING)); 43 | output = new File((String) xpath.evaluate("output", node, 44 | XPathConstants.STRING)); 45 | 46 | } 47 | 48 | @Override 49 | public void updateHash(MessageDigest m) throws IOException { 50 | processFileHash(m, input); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/julius/WordList.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files.julius; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.IOException; 7 | import java.io.InputStreamReader; 8 | import java.util.HashSet; 9 | import java.util.Set; 10 | import java.util.Vector; 11 | 12 | import pl.edu.pjwstk.kaldi.utils.Settings; 13 | 14 | public class WordList { 15 | 16 | Set words; 17 | 18 | public WordList() { 19 | words = new HashSet(); 20 | } 21 | 22 | public void readFromDictionary(File file) throws IOException { 23 | words.clear(); 24 | 25 | BufferedReader reader = new BufferedReader(new InputStreamReader( 26 | new FileInputStream(file), Settings.julius_default_encoding)); 27 | 28 | String line; 29 | while ((line = reader.readLine()) != null) { 30 | if (line.trim().length() == 0) 31 | continue; 32 | 33 | String[] arr = line.split("\\s+"); 34 | 35 | words.add(arr[0]); 36 | } 37 | 38 | reader.close(); 39 | } 40 | 41 | public int countMissingWords(WordSequence sequence) { 42 | int ret = 0; 43 | 44 | for (String word : sequence.words) { 45 | if (!words.contains(word)) 46 | ret++; 47 | } 48 | 49 | return ret; 50 | } 51 | 52 | public Vector getMissingWords(WordSequence sequence) { 53 | Vector ret = new Vector(); 54 | 55 | for (String word : sequence.words) { 56 | if (!words.contains(word)) 57 | ret.add(word); 58 | } 59 | 60 | return ret; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/tasks/TestTask.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.tasks; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.nio.file.Files; 6 | import java.security.MessageDigest; 7 | 8 | import javax.xml.xpath.XPath; 9 | import javax.xml.xpath.XPathConstants; 10 | import javax.xml.xpath.XPathExpressionException; 11 | 12 | import org.w3c.dom.Element; 13 | 14 | import pl.edu.pjwstk.kaldi.utils.Log; 15 | import pl.edu.pjwstk.kaldi.utils.Settings; 16 | 17 | public class TestTask extends Task { 18 | 19 | private File input; 20 | private String output_name; 21 | 22 | @Override 23 | public void run() { 24 | 25 | state = State.RUNNING; 26 | Log.info("Starting test task..."); 27 | 28 | try { 29 | 30 | File output = new File(Settings.curr_task_dir, output_name); 31 | 32 | Files.copy(input.toPath(), output.toPath()); 33 | 34 | } catch (Exception e) { 35 | Log.error("Running Test Task", e); 36 | state = State.FAILED; 37 | return; 38 | } 39 | 40 | Log.info("Completed succesfully!"); 41 | state = State.SUCCEEDED; 42 | } 43 | 44 | @Override 45 | public void loadSettings(XPath xpath, Element node) 46 | throws XPathExpressionException { 47 | 48 | String input_file = (String) xpath.evaluate("input", node, 49 | XPathConstants.STRING); 50 | input = new File(input_file); 51 | 52 | output_name = (String) xpath.evaluate("output-name", node, 53 | XPathConstants.STRING); 54 | } 55 | 56 | @Override 57 | public void updateHash(MessageDigest m) throws IOException { 58 | processFileHash(m, input); 59 | m.digest(output_name.getBytes(Settings.default_encoding)); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/utils/PasswordObfuscator.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.utils; 2 | 3 | import java.nio.ByteBuffer; 4 | 5 | import javax.xml.bind.DatatypeConverter; 6 | 7 | public class PasswordObfuscator { 8 | 9 | private final static long passwordObfuscator = 2934278932478932493L; 10 | 11 | public static String obfuscatePassword(String password) { 12 | 13 | int longNumBytes = Long.SIZE / 8; 14 | 15 | ByteBuffer in = ByteBuffer.wrap(password.getBytes()); 16 | ByteBuffer out = ByteBuffer.allocate(in.capacity()); 17 | ByteBuffer obf = ByteBuffer.allocate(longNumBytes); 18 | obf.putLong(passwordObfuscator); 19 | obf.rewind(); 20 | 21 | while (in.remaining() >= longNumBytes) { 22 | long l = in.getLong(); 23 | out.putLong(l ^ passwordObfuscator); 24 | } 25 | 26 | while (in.hasRemaining()) { 27 | byte b = in.get(); 28 | out.put((byte) (b ^ obf.get())); 29 | } 30 | 31 | return DatatypeConverter.printBase64Binary(out.array()); 32 | 33 | } 34 | 35 | public static String deobfuscatePassword(String password) throws IllegalArgumentException { 36 | 37 | int longNumBytes = Long.SIZE / 8; 38 | 39 | ByteBuffer in = ByteBuffer.wrap(DatatypeConverter.parseBase64Binary(password)); 40 | ByteBuffer out = ByteBuffer.allocate(in.capacity()); 41 | ByteBuffer obf = ByteBuffer.allocate(longNumBytes); 42 | obf.putLong(passwordObfuscator); 43 | obf.rewind(); 44 | 45 | while (in.remaining() >= longNumBytes) { 46 | long l = in.getLong(); 47 | out.putLong(l ^ passwordObfuscator); 48 | } 49 | 50 | while (in.hasRemaining()) { 51 | byte b = in.get(); 52 | out.put((byte) (b ^ obf.get())); 53 | } 54 | 55 | return new String(out.array()); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/RTTM.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileReader; 6 | import java.io.IOException; 7 | import java.io.PrintWriter; 8 | 9 | import pl.edu.pjwstk.kaldi.utils.Log; 10 | 11 | public class RTTM extends Segmentation { 12 | 13 | private double timebase = 1; 14 | 15 | public RTTM(double timebase) { 16 | this.timebase = timebase; 17 | } 18 | 19 | @Override 20 | public void read(File file) throws IOException { 21 | 22 | BufferedReader reader = new BufferedReader(new FileReader(file)); 23 | String line; 24 | 25 | while ((line = reader.readLine()) != null) { 26 | 27 | String[] tok = line.split("\\s+"); 28 | 29 | if (tok[0].equals("SPKR-INFO")) 30 | continue; 31 | 32 | double start = Double.parseDouble(tok[3]); 33 | double len = Double.parseDouble(tok[4]); 34 | String name = tok[7]; 35 | 36 | addSegment(0, start * timebase, (start + len) * timebase, name); 37 | } 38 | reader.close(); 39 | 40 | renameTier(0, "RTTM"); 41 | 42 | sort(); 43 | } 44 | 45 | @Override 46 | public void write(File file) throws IOException { 47 | 48 | PrintWriter writer = new PrintWriter(file); 49 | 50 | if (!tiers.isEmpty()) { 51 | if (tiers.size() > 1) { 52 | Log.warn("RTTM saving only first tier!"); 53 | } 54 | 55 | Tier tier = tiers.get(0); 56 | 57 | for (Segment seg : tier.segments) { 58 | // SPEAKER speaker 1 0.0 12.51 speaker_1.0 59 | writer.format("SPEAKER speaker 1 %f %f %s \n", 60 | seg.start_time / timebase, 61 | (seg.end_time - seg.start_time) / timebase, seg.name); 62 | } 63 | 64 | } 65 | 66 | writer.close(); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/CTM.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileReader; 6 | import java.io.IOException; 7 | import java.io.PrintWriter; 8 | 9 | public class CTM extends Segmentation { 10 | 11 | @Override 12 | public void read(File file) throws IOException { 13 | 14 | tiers.clear(); 15 | 16 | BufferedReader reader = new BufferedReader(new FileReader(file)); 17 | 18 | String line; 19 | String filename = ""; 20 | String name; 21 | double start, end; 22 | 23 | while ((line = reader.readLine()) != null) { 24 | String tok[] = line.split("\\s"); 25 | 26 | if (tok.length < 5 || tok.length > 6) { 27 | reader.close(); 28 | throw new IOException("wrong line in file " + file.getName() + ": " + line); 29 | } 30 | 31 | filename = tok[0]; 32 | try { 33 | start = Double.parseDouble(tok[2]); 34 | end = start + Double.parseDouble(tok[3]); 35 | } catch (NumberFormatException e) { 36 | reader.close(); 37 | throw new IOException("wrong line in file " + file.getName() + ": " + line); 38 | } 39 | name = tok[4]; 40 | 41 | addSegment(0, start, end, name); 42 | 43 | if (tok.length == 6) { 44 | addSegment(1, start, end, tok[5]); 45 | } 46 | } 47 | 48 | if (tiers.size() > 0) 49 | tiers.get(0).name = filename; 50 | if (tiers.size() > 1) 51 | tiers.get(1).name = "Confidence"; 52 | 53 | reader.close(); 54 | 55 | } 56 | 57 | @Override 58 | public void write(File file) throws IOException { 59 | 60 | PrintWriter writer = new PrintWriter(file); 61 | 62 | Tier tier = tiers.get(0); 63 | for (Segment segment : tier.segments) { 64 | writer.format("%s 1 %1.2f %1.2f %s %1.2f\n", tier.name, segment.start_time, segment.end_time 65 | - segment.start_time, segment.name, segment.confidence); 66 | } 67 | 68 | writer.close(); 69 | 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/tasks/AlignTask.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.tasks; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.security.MessageDigest; 6 | 7 | import javax.sound.sampled.UnsupportedAudioFileException; 8 | import javax.xml.xpath.XPath; 9 | import javax.xml.xpath.XPathConstants; 10 | import javax.xml.xpath.XPathExpressionException; 11 | 12 | import org.w3c.dom.Element; 13 | 14 | import pl.edu.pjwstk.kaldi.KaldiMain; 15 | import pl.edu.pjwstk.kaldi.utils.FileUtils; 16 | import pl.edu.pjwstk.kaldi.utils.Log; 17 | import pl.edu.pjwstk.kaldi.utils.Settings; 18 | 19 | public class AlignTask extends Task { 20 | 21 | private File input_audio; 22 | private File input_text; 23 | 24 | @Override 25 | public void run() { 26 | 27 | state = State.RUNNING; 28 | 29 | File clean_text = new File(Settings.curr_task_dir, "clean.txt"); 30 | File textgrid = new File(Settings.curr_task_dir, "out.TextGrid"); 31 | File labfile = new File(Settings.curr_task_dir, "out.lab"); 32 | 33 | try { 34 | FileUtils.cleanChars(input_text, clean_text, false, true, Settings.default_encoding); 35 | 36 | KaldiMain.alignFile(input_audio, clean_text, textgrid, labfile); 37 | 38 | state = State.SUCCEEDED; 39 | 40 | } catch (IOException | UnsupportedAudioFileException e) { 41 | Log.error("Decoding task.", e); 42 | state = State.FAILED; 43 | } 44 | 45 | } 46 | 47 | @Override 48 | public void loadSettings(XPath xpath, Element node) throws XPathExpressionException { 49 | 50 | input_audio = new File((String) xpath.evaluate("input-audio", node, XPathConstants.STRING)); 51 | input_text = new File((String) xpath.evaluate("input-text", node, XPathConstants.STRING)); 52 | 53 | } 54 | 55 | @Override 56 | public void updateHash(MessageDigest m) throws IOException { 57 | processFileHash(m, input_audio); 58 | processFileHash(m, input_text); 59 | 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/tasks/NSERTask.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.tasks; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.security.MessageDigest; 6 | 7 | import javax.xml.xpath.XPath; 8 | import javax.xml.xpath.XPathConstants; 9 | import javax.xml.xpath.XPathExpressionException; 10 | 11 | import org.w3c.dom.Element; 12 | 13 | import pl.edu.pjwstk.kaldi.programs.Python; 14 | import pl.edu.pjwstk.kaldi.utils.Log; 15 | import pl.edu.pjwstk.kaldi.utils.Settings; 16 | 17 | public class NSERTask extends Task { 18 | 19 | File input_file; 20 | 21 | @Override 22 | public void run() { 23 | 24 | File output_file = new File(Settings.curr_task_dir, "out.TextGrid"); 25 | File script = new File(Settings.python_scripts_dir, 26 | "NSER/pjatk/Main.py"); 27 | File model = new File(Settings.python_scripts_dir, 28 | "NSER/svc_model_prob.pklz"); 29 | File hcopy = new File(Settings.python_scripts_dir, "NSER/hcopy.conf"); 30 | File sd_dir = new File(Settings.python_scripts_dir, "NSER/SD"); 31 | 32 | String args[] = { "--tmp", Settings.curr_task_dir.getAbsolutePath(), 33 | "--sd", sd_dir.getAbsolutePath(), "--model", 34 | model.getAbsolutePath(), "--hcopy_conf", 35 | hcopy.getAbsolutePath(), input_file.getAbsolutePath(), 36 | output_file.getAbsolutePath() }; 37 | 38 | state = State.RUNNING; 39 | Log.info("Starting test task..."); 40 | 41 | Python.run(script, args); 42 | 43 | if (!output_file.exists()) 44 | state = State.FAILED; 45 | else 46 | state = State.SUCCEEDED; 47 | Log.info("Completed!"); 48 | 49 | } 50 | 51 | @Override 52 | public void loadSettings(XPath xpath, Element node) 53 | throws XPathExpressionException { 54 | 55 | String input_file_name = (String) xpath.evaluate("input-file", node, 56 | XPathConstants.STRING); 57 | input_file = new File(input_file_name); 58 | 59 | } 60 | 61 | @Override 62 | public void updateHash(MessageDigest m) throws IOException { 63 | processFileHash(m, input_file); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/KaldiKWS.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.IOException; 6 | import java.util.Locale; 7 | 8 | import pl.edu.pjwstk.kaldi.utils.Log; 9 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 10 | import pl.edu.pjwstk.kaldi.utils.Settings; 11 | 12 | public class KaldiKWS { 13 | 14 | public static void test() throws FileNotFoundException { 15 | if (!Settings.kaldi_kws_bin.exists()) 16 | throw new FileNotFoundException(Settings.kaldi_kws_bin.getAbsolutePath()); 17 | } 18 | 19 | public static void get_vocab(File lattice, File vocab) throws IOException { 20 | 21 | String[] cmd = new String[] { Settings.kaldi_kws_bin.getAbsolutePath(), lattice.getAbsolutePath(), 22 | vocab.getAbsolutePath() }; 23 | 24 | ProgramLauncher launcher = new ProgramLauncher(cmd); 25 | 26 | Log.verbose("KWS generating vocab: " + lattice.getAbsolutePath() + " -> " + vocab.getAbsolutePath()); 27 | launcher.run(); 28 | Log.verbose("Done."); 29 | 30 | } 31 | 32 | public static void detect(File lattice, File keywords, File dict, File out) throws IOException { 33 | 34 | String[] cmd = new String[] { Settings.kaldi_kws_bin.getAbsolutePath(), lattice.getAbsolutePath(), 35 | keywords.getAbsolutePath(), dict.getAbsolutePath() }; 36 | 37 | ProgramLauncher launcher = new ProgramLauncher(cmd); 38 | 39 | launcher.setStdoutFile(out); 40 | 41 | Log.verbose("KWS detecting keywords: " + lattice.getAbsolutePath() + " + " + keywords.getAbsolutePath() + " + " 42 | + dict.getAbsolutePath() + " -> " + out.getAbsolutePath()); 43 | launcher.run(); 44 | Log.verbose("Done."); 45 | 46 | } 47 | 48 | public static void main(String[] args) { 49 | try { 50 | Locale.setDefault(Locale.ENGLISH); 51 | 52 | Log.init("debug", true); 53 | 54 | get_vocab(new File("/home/guest/Desktop/lucas_kws/paris/130514_NWSU_120A0_O.txt"), 55 | new File("/home/guest/Desktop/lucas_kws/paris/vocab.txt")); 56 | 57 | } catch (Exception e) { 58 | e.printStackTrace(); 59 | } 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/tasks/ParallelTask.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.tasks; 2 | 3 | import java.io.IOException; 4 | import java.security.MessageDigest; 5 | import java.util.LinkedList; 6 | import java.util.List; 7 | 8 | import javax.xml.xpath.XPath; 9 | import javax.xml.xpath.XPathConstants; 10 | import javax.xml.xpath.XPathExpressionException; 11 | 12 | import org.w3c.dom.Element; 13 | import org.w3c.dom.NodeList; 14 | 15 | import pl.edu.pjwstk.kaldi.utils.Log; 16 | 17 | public class ParallelTask extends Task { 18 | 19 | private List tasks = new LinkedList(); 20 | 21 | @Override 22 | public void run() { 23 | 24 | state = State.RUNNING; 25 | 26 | Log.info("Parallel task starting tasks in parallel..."); 27 | List threads = new LinkedList(); 28 | for (Task task : tasks) { 29 | Thread th = new Thread(task); 30 | th.start(); 31 | threads.add(th); 32 | } 33 | 34 | Log.info("Parallel task waiting for tasks to finish..."); 35 | 36 | for (Thread th : threads) { 37 | try { 38 | th.join(); 39 | } catch (InterruptedException e) { 40 | // TODO: wait until actually finished? 41 | } 42 | } 43 | 44 | Log.info("All parallel tasks finished!"); 45 | 46 | for (Task task : tasks) { 47 | if (task.state != State.SUCCEEDED) { 48 | Log.info("A parallel task was not succesfull!"); 49 | state = State.FAILED; 50 | return; 51 | } 52 | } 53 | 54 | state = State.SUCCEEDED; 55 | Log.info("Parallel task succesful!"); 56 | } 57 | 58 | @Override 59 | public void loadSettings(XPath xpath, Element node) 60 | throws XPathExpressionException { 61 | 62 | NodeList tasks = (NodeList) xpath.evaluate("task", node, 63 | XPathConstants.NODESET); 64 | 65 | for (int i = 0; i < tasks.getLength(); i++) { 66 | Element elTask = (Element) tasks.item(i); 67 | 68 | String name = elTask.getAttribute("name"); 69 | 70 | Task task = getTask(name); 71 | 72 | task.loadSettings(xpath, elTask); 73 | } 74 | } 75 | 76 | @Override 77 | public void updateHash(MessageDigest m) throws IOException { 78 | for (Task task : tasks) { 79 | 80 | //TODO: also check names of tasks! 81 | task.updateHash(m); 82 | } 83 | 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/NGram.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | 6 | import pl.edu.pjwstk.kaldi.utils.Log; 7 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 8 | import pl.edu.pjwstk.kaldi.utils.Settings; 9 | 10 | public class NGram { 11 | 12 | public static void test_mitlm() throws FileNotFoundException { 13 | if (!Settings.estimate_ngram_bin.exists()) 14 | throw new 15 | FileNotFoundException(Settings.estimate_ngram_bin.getAbsolutePath() 16 | ); 17 | } 18 | 19 | public static void test_srilm() throws FileNotFoundException { 20 | if (!Settings.ngram_count_bin.exists()) 21 | throw new FileNotFoundException(Settings.ngram_count_bin.getAbsolutePath()); 22 | } 23 | 24 | public static void estimate(File input, File vocab, File model, int order) { 25 | 26 | String[] cmd = new String[]{Settings.estimate_ngram_bin.getAbsolutePath(), "-t", input.getAbsolutePath(), 27 | "-o", "" + order, "-wv", vocab.getAbsolutePath(), "-wl", model.getAbsolutePath()}; 28 | 29 | ProgramLauncher launcher = new ProgramLauncher(cmd); 30 | 31 | launcher.setStdoutStream(new Log.Stream()); 32 | launcher.setStderrStream(new Log.Stream("ERR>>")); 33 | 34 | Log.verbose("Estimating N-Gram (MITLM)..."); 35 | launcher.run(); 36 | Log.verbose("Done."); 37 | 38 | } 39 | 40 | public static void srilm_estimate(File input, File vocab, File model, int order) { 41 | 42 | String[] cmd = new String[]{Settings.ngram_count_bin.getAbsolutePath(), "-order", "" + order, "-unk", 43 | "-map-unk", "", "-text", input.getAbsolutePath(), "-lm", model.getAbsolutePath(), "-write-vocab", 44 | vocab.getAbsolutePath(), "-wbdiscount", "-gt1min", "1", "-gt2min", "1", "-gt3min", "1"}; 45 | 46 | ProgramLauncher launcher = new ProgramLauncher(cmd); 47 | 48 | launcher.setStdoutStream(new Log.Stream()); 49 | launcher.setStderrStream(new Log.Stream("ERR>>")); 50 | 51 | Log.verbose("Estimating N-Gram (SRILM)..."); 52 | launcher.run(); 53 | Log.verbose("Done."); 54 | 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/grammars/Akt.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.grammars; 2 | 3 | import java.util.LinkedList; 4 | 5 | /** 6 | * Created by guest on 6/1/16. 7 | */ 8 | public class Akt { 9 | 10 | public static Grammar male_names() { 11 | Grammar ret=new Grammar(); 12 | LinkedList l=new LinkedList<>(); 13 | l.add("stanisław"); 14 | l.add("aleksander"); 15 | l.add("jan"); 16 | l.add("michał"); 17 | ret.setWordList(l); 18 | return ret; 19 | } 20 | 21 | public static Grammar female_names() { 22 | Grammar ret=new Grammar(); 23 | LinkedList l=new LinkedList<>(); 24 | l.add("teofila"); 25 | l.add("marianna"); 26 | l.add("jadwiga"); 27 | l.add("natalia"); 28 | l.add("aleksandra"); 29 | ret.setWordList(l); 30 | return ret; 31 | } 32 | 33 | public static Grammar last_names() { 34 | Grammar ret=new Grammar(); 35 | LinkedList l=new LinkedList<>(); 36 | l.add("doddek"); 37 | l.add("dąbkowska"); 38 | l.add("dudziak"); 39 | l.add("ciborowska"); 40 | l.add("kowalski"); 41 | ret.setWordList(l); 42 | return ret; 43 | } 44 | 45 | public static Grammar places() { 46 | Grammar ret=new Grammar(); 47 | LinkedList l=new LinkedList<>(); 48 | l.add("lipianka"); 49 | l.add("borawe"); 50 | l.add("warszawa"); 51 | l.add("poznań"); 52 | l.add("kraków"); 53 | ret.setWordList(l); 54 | return ret; 55 | } 56 | 57 | public static Grammar zgonu() { 58 | 59 | Grammar w_akt=new Grammar(); 60 | w_akt.setWord("akt"); 61 | 62 | Grammar num=Numbers.numbers(); 63 | num.fixend(); 64 | 65 | w_akt.attach(num); 66 | 67 | Grammar w_pl=new Grammar(); 68 | w_pl.setWord("miejscowość"); 69 | w_akt.attach(w_pl); 70 | 71 | w_akt.attach(places()); 72 | 73 | Grammar w_zm=new Grammar(); 74 | w_zm.setWord("zmarła"); 75 | w_akt.attach(w_zm); 76 | 77 | w_akt.attach(female_names()); 78 | w_akt.attach(last_names()); 79 | 80 | Grammar w_fat=new Grammar(); 81 | w_fat.setWord("ojciec"); 82 | w_akt.attach(w_fat); 83 | 84 | w_akt.attach(male_names()); 85 | w_akt.attach(last_names()); 86 | 87 | Grammar w_mot=new Grammar(); 88 | w_mot.setWord("matka"); 89 | w_akt.attach(w_mot); 90 | 91 | w_akt.attach(female_names()); 92 | w_akt.attach(last_names()); 93 | 94 | return w_akt; 95 | } 96 | 97 | } 98 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/Transcriber.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | 8 | import pl.edu.pjwstk.kaldi.utils.FileUtils; 9 | import pl.edu.pjwstk.kaldi.utils.Log; 10 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 11 | import pl.edu.pjwstk.kaldi.utils.Settings; 12 | 13 | public class Transcriber { 14 | 15 | private static File transcriber_bin; 16 | private static File rules; 17 | private static File replacement; 18 | 19 | public static void init() { 20 | transcriber_bin = new File(Settings.transcriber_dir, "transcriber"); 21 | rules = new File(Settings.transcriber_dir, "transcription.rules"); 22 | replacement = new File(Settings.transcriber_dir, "replacement.rules"); 23 | } 24 | 25 | public static void test() throws FileNotFoundException { 26 | if (!Settings.transcriber_dir.exists()) 27 | throw new FileNotFoundException( 28 | Settings.transcriber_dir.getAbsolutePath()); 29 | if (!transcriber_bin.exists()) 30 | throw new FileNotFoundException(transcriber_bin.getAbsolutePath()); 31 | if (!rules.exists()) 32 | throw new FileNotFoundException(rules.getAbsolutePath()); 33 | if (!replacement.exists()) 34 | throw new FileNotFoundException(replacement.getAbsolutePath()); 35 | } 36 | 37 | public static void transcribe(File vocab, String vocab_enc, File dict, 38 | String dict_enc, boolean add_sent_boundaries) throws IOException { 39 | 40 | File temp_vocab = File.createTempFile("voc", ".txt"); 41 | File temp_dic = File.createTempFile("dic", ".txt"); 42 | 43 | ArrayList sent = new ArrayList(); 44 | sent.add(""); 45 | sent.add(""); 46 | sent.add(""); 47 | sent.add(""); 48 | sent.add("sil"); 49 | sent.add("SIL"); 50 | sent.add("-pau-"); 51 | 52 | FileUtils.removeLines(vocab, vocab_enc, temp_vocab, vocab_enc, sent, 53 | true); 54 | 55 | String[] cmd = new String[] { transcriber_bin.getAbsolutePath(), "-r", 56 | rules.getAbsolutePath(), "-w", replacement.getAbsolutePath(), 57 | "-i", temp_vocab.getAbsolutePath(), "-ie", vocab_enc, "-o", 58 | temp_dic.getAbsolutePath(), "-oe", dict_enc }; 59 | 60 | ProgramLauncher launcher = new ProgramLauncher(cmd); 61 | 62 | launcher.setStdoutStream(new Log.Stream()); 63 | launcher.setStderrStream(new Log.Stream("ERR>>")); 64 | 65 | Log.verbose("Transcribing..."); 66 | launcher.run(); 67 | Log.verbose("Done."); 68 | 69 | ArrayList dic = new ArrayList(); 70 | dic.add("SIL sil"); 71 | dic.add(" sil"); 72 | 73 | if (add_sent_boundaries) { 74 | dic.add(" sil"); 75 | dic.add(" sil"); 76 | } 77 | 78 | FileUtils.appendLines(temp_dic, dict_enc, dict, dict_enc, dic, true); 79 | 80 | temp_vocab.delete(); 81 | temp_dic.delete(); 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/Shout.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.lang.reflect.Field; 6 | import java.lang.reflect.Modifier; 7 | 8 | import pl.edu.pjwstk.kaldi.utils.Log; 9 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 10 | import pl.edu.pjwstk.kaldi.utils.Settings; 11 | 12 | public class Shout { 13 | 14 | private static File shout_segment = new File(Settings.shout_dir, 15 | "shout_segment"); 16 | 17 | private static File shout_cluster = new File(Settings.shout_dir, 18 | "shout_cluster"); 19 | 20 | public static void test() throws FileNotFoundException { 21 | 22 | Field fields[] = Shout.class.getDeclaredFields(); 23 | for (Field field : fields) { 24 | if (Modifier.isStatic(field.getModifiers()) 25 | && field.getType().getName().equals("java.io.File")) { 26 | 27 | try { 28 | 29 | File file = (File) field.get(null); 30 | 31 | if (file == null || !file.exists()) 32 | throw new FileNotFoundException("" + file); 33 | 34 | } catch (IllegalArgumentException | IllegalAccessException e) { 35 | Log.error("Internal error", e); 36 | } 37 | } 38 | } 39 | } 40 | 41 | public static void shout_segment(File audio_file, File seg_model, 42 | File out_mo) throws RuntimeException { 43 | 44 | String[] cmd = new String[] { shout_segment.getAbsolutePath(), "-a", 45 | audio_file.getAbsolutePath(), "-ams", 46 | seg_model.getAbsolutePath(), "-mo", out_mo.getAbsolutePath() }; 47 | 48 | ProgramLauncher launcher = new ProgramLauncher(cmd); 49 | launcher.setStdoutStream(new Log.Stream()); 50 | launcher.setStderrStream(new Log.Stream("ERR>>")); 51 | 52 | Log.verbose("shout_segment: " + audio_file.getName() + "->" 53 | + out_mo.getName()); 54 | launcher.run(); 55 | Log.verbose("Done."); 56 | 57 | if (launcher.getReturnValue() != 0) 58 | throw new RuntimeException("Retval: " + launcher.getReturnValue()); 59 | } 60 | 61 | public static void shout_cluster(File audio_file, File seg_out, 62 | File out_mo, int max_clusters) throws RuntimeException { 63 | 64 | String[] cmd = null; 65 | 66 | if (max_clusters > 0) { 67 | cmd = new String[] { shout_cluster.getAbsolutePath(), "-a", 68 | audio_file.getAbsolutePath(), "-mi", 69 | out_mo.getAbsolutePath(), "-mo", seg_out.getAbsolutePath(), 70 | "-l", audio_file.getName(), "-mc", "" + max_clusters }; 71 | } else { 72 | cmd = new String[] { shout_cluster.getAbsolutePath(), "-a", 73 | audio_file.getAbsolutePath(), "-mi", 74 | seg_out.getAbsolutePath(), "-mo", out_mo.getAbsolutePath(), 75 | "-l", audio_file.getName() }; 76 | } 77 | 78 | ProgramLauncher launcher = new ProgramLauncher(cmd); 79 | launcher.setStdoutStream(new Log.Stream()); 80 | launcher.setStderrStream(new Log.Stream("ERR>>")); 81 | 82 | Log.verbose("shout_cluster: " + audio_file.getName() + "->" 83 | + out_mo.getName()); 84 | launcher.run(); 85 | Log.verbose("Done."); 86 | 87 | if (launcher.getReturnValue() != 0) 88 | throw new RuntimeException("Retval: " + launcher.getReturnValue()); 89 | } 90 | 91 | } 92 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/julius/AlignedSequence.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files.julius; 2 | 3 | import java.util.Vector; 4 | 5 | import pl.edu.pjwstk.kaldi.files.Segmentation; 6 | import pl.edu.pjwstk.kaldi.files.SegmentationList; 7 | 8 | public class AlignedSequence { 9 | 10 | public static class AlignedWord { 11 | public String word; 12 | public int start, end; 13 | public double score; 14 | public boolean correct; 15 | } 16 | 17 | int max_len = 0; 18 | 19 | public Vector sequence; 20 | 21 | public AlignedSequence() { 22 | sequence = new Vector(); 23 | } 24 | 25 | public void addWord(String line) { 26 | if (!line.startsWith("[")) 27 | return; 28 | 29 | line = line.replace('[', ' '); 30 | line = line.replace(']', ' '); 31 | 32 | String[] args = line.split("\\s+"); 33 | 34 | AlignedWord word = new AlignedWord(); 35 | 36 | word.start = Integer.parseInt(args[1]); 37 | word.end = Integer.parseInt(args[2]); 38 | word.score = Double.parseDouble(args[3]); 39 | word.word = args[4]; 40 | word.correct = true; 41 | 42 | if (word.end > max_len) 43 | max_len = word.end; 44 | 45 | if (word.word.equals("") || word.word.equals("")) 46 | return; 47 | 48 | sequence.add(word); 49 | } 50 | 51 | public int getLength() { 52 | return max_len; 53 | } 54 | 55 | public String toString() { 56 | String ret = ""; 57 | for (AlignedWord w : sequence) { 58 | ret += w.word + " "; 59 | } 60 | return ret; 61 | } 62 | 63 | public AlignedSequence getCurrectSegments(boolean only_correct) { 64 | AlignedSequence ret = new AlignedSequence(); 65 | 66 | boolean last = sequence.get(0).correct; 67 | String words = ""; 68 | int start = sequence.get(0).start; 69 | int end = sequence.get(0).end; 70 | AlignedWord seq; 71 | 72 | for (AlignedWord word : sequence) { 73 | if (word.correct == last) { 74 | words += word.word + " "; 75 | end = word.end; 76 | } else { 77 | if (last || !only_correct) { 78 | seq = new AlignedWord(); 79 | seq.start = start; 80 | seq.end = end; 81 | seq.correct = last; 82 | seq.word = words; 83 | ret.sequence.add(seq); 84 | if (ret.max_len < seq.end) 85 | ret.max_len = seq.end; 86 | } 87 | 88 | start = word.start; 89 | end = word.end; 90 | words = word.word + " "; 91 | last = word.correct; 92 | } 93 | } 94 | 95 | if (last || !only_correct) { 96 | seq = new AlignedWord(); 97 | seq.start = start; 98 | seq.end = end; 99 | seq.correct = last; 100 | seq.word = words; 101 | ret.sequence.add(seq); 102 | if (ret.max_len < seq.end) 103 | ret.max_len = seq.end; 104 | } 105 | 106 | return ret; 107 | } 108 | 109 | public Segmentation toSegmentation(double win_off) { 110 | SegmentationList ret = new SegmentationList(); 111 | 112 | ret.renameTier(0, "Julius"); 113 | 114 | for (AlignedWord word : sequence) { 115 | ret.addSegment(0, word.start * win_off, word.end * win_off, 116 | word.word, word.score); 117 | } 118 | 119 | return ret; 120 | } 121 | 122 | } 123 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/ServiceTask.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.Locale; 6 | 7 | import pl.edu.pjwstk.kaldi.programs.KaldiKWS; 8 | import pl.edu.pjwstk.kaldi.programs.KaldiScripts; 9 | import pl.edu.pjwstk.kaldi.programs.KaldiUtils; 10 | import pl.edu.pjwstk.kaldi.programs.NGram; 11 | import pl.edu.pjwstk.kaldi.programs.Shout; 12 | import pl.edu.pjwstk.kaldi.programs.Transcriber; 13 | import pl.edu.pjwstk.kaldi.service.database.dbTasks; 14 | import pl.edu.pjwstk.kaldi.service.database.dbTasks.dbStatus; 15 | import pl.edu.pjwstk.kaldi.service.tasks.Task; 16 | import pl.edu.pjwstk.kaldi.utils.Log; 17 | import pl.edu.pjwstk.kaldi.utils.ParseOptions; 18 | import pl.edu.pjwstk.kaldi.utils.Settings; 19 | 20 | public class ServiceTask { 21 | 22 | public static void main(String[] args) { 23 | 24 | dbTasks.Task db_task = null; 25 | 26 | try { 27 | 28 | Locale.setDefault(Locale.ENGLISH); 29 | 30 | ParseOptions po = new ParseOptions("Kaldi Task", "Task runner for Kaldi Service."); 31 | 32 | po.addArgument(Integer.class, "task_id", "Database ID of the given task."); 33 | 34 | po.addArgument("settings", 's', File.class, "Load program settings from a file", null); 35 | po.addArgument("restart", 'r', Boolean.class, "Restart if task already exists!", "false"); 36 | 37 | if (!po.parse(args)) 38 | return; 39 | 40 | if (po.getArgument("settings") != null) { 41 | Settings.loadSettings((File) po.getArgument("settings")); 42 | } 43 | 44 | int id = (Integer) po.getArgument(0); 45 | 46 | db_task = dbTasks.getByID(id); 47 | 48 | try { 49 | int pid = Integer.parseInt(new File("/proc/self").getCanonicalFile().getName()); 50 | dbTasks.changePID(db_task, pid); 51 | } catch (NumberFormatException | IOException | SecurityException e) { 52 | throw new RuntimeException("Cannot get PID!", e); 53 | } 54 | 55 | File task_file = new File(db_task.task_file); 56 | File task_dir = new File(task_file.getParent(), "task"); 57 | 58 | if (task_dir.exists()) { 59 | if ((Boolean) po.getArgument("restart")) { 60 | File newname = File.createTempFile("task", "bak", task_dir.getParentFile()); 61 | newname.delete(); 62 | task_dir.renameTo(newname); 63 | } else 64 | throw new RuntimeException("Task dir already exists!"); 65 | } 66 | 67 | task_dir.mkdirs(); 68 | 69 | Settings.curr_task_dir = task_dir; 70 | Settings.log_dir = Settings.curr_task_dir; 71 | Settings.temp_dir = new File(Settings.curr_task_dir, "tmp"); 72 | Settings.temp_dir2 = new File(Settings.curr_task_dir, "tmp2"); 73 | 74 | Log.initFile("KaldiTask", true); 75 | 76 | KaldiUtils.init(); 77 | KaldiUtils.test(); 78 | KaldiScripts.init(Settings.curr_task_dir); 79 | KaldiScripts.test(); 80 | Shout.test(); 81 | Transcriber.init(); 82 | Transcriber.test(); 83 | NGram.test_srilm(); 84 | KaldiKWS.test(); 85 | 86 | Task.run(task_file); 87 | 88 | dbTasks.changeStatus(db_task, dbStatus.done); 89 | 90 | } catch (Exception e) { 91 | Log.error("Main.", e); 92 | if (db_task != null) 93 | dbTasks.changeStatus(db_task, dbStatus.dead); 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/grammars/Numbers.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.grammars; 2 | 3 | import java.util.LinkedList; 4 | import java.util.Map; 5 | import java.util.TreeMap; 6 | 7 | /** 8 | * Created by guest on 5/23/16. 9 | */ 10 | public class Numbers { 11 | 12 | 13 | public static Grammar numbers() { 14 | 15 | Grammar zero=new Grammar(); 16 | zero.setWord("zero"); 17 | 18 | Map join_link=new TreeMap(); 19 | join_link.put(0,0); 20 | join_link.put(1,1); 21 | 22 | Grammar n1_9=new Grammar(); 23 | LinkedList l1_9=new LinkedList<>(); 24 | l1_9.add("jeden"); 25 | l1_9.add("dwa"); 26 | l1_9.add("trzy"); 27 | l1_9.add("cztery"); 28 | l1_9.add("pięć"); 29 | l1_9.add("sześć"); 30 | l1_9.add("siedem"); 31 | l1_9.add("osiem"); 32 | l1_9.add("dziewięć"); 33 | n1_9.setWordList(l1_9); 34 | 35 | Grammar n1_99=n1_9.clone(); 36 | 37 | Grammar n10_19=new Grammar(); 38 | LinkedList l10_19=new LinkedList<>(); 39 | l10_19.add("dziesięć"); 40 | l10_19.add("jedenaście"); 41 | l10_19.add("dwanaście"); 42 | l10_19.add("trzynaście"); 43 | l10_19.add("czternaście"); 44 | l10_19.add("piętnaście"); 45 | l10_19.add("szesnaście"); 46 | l10_19.add("siedemnaście"); 47 | l10_19.add("osiemnaście"); 48 | l10_19.add("dziewiętnaście"); 49 | n10_19.setWordList(l10_19); 50 | 51 | n1_99.merge(n10_19,join_link); 52 | 53 | Grammar n20_90=new Grammar(); 54 | LinkedList l20_90=new LinkedList<>(); 55 | l20_90.add("dwadzieścia"); 56 | l20_90.add("trzydzieści"); 57 | l20_90.add("czterdzieści"); 58 | l20_90.add("piećdziesiąt"); 59 | l20_90.add("sześcdziesiąt"); 60 | l20_90.add("siedemdziesiąt"); 61 | l20_90.add("osiemdziesiąt"); 62 | l20_90.add("dziewiedziesiąt"); 63 | n20_90.setWordList(l20_90); 64 | 65 | Grammar n2x_9x=n20_90.clone(); 66 | n2x_9x.attach(n1_9,1); 67 | n2x_9x.end_nodes.add(1); 68 | 69 | 70 | Map join_link2=new TreeMap(); 71 | join_link2.put(0,0); 72 | join_link2.put(2,1); 73 | 74 | n1_99.merge(n2x_9x,join_link2); 75 | 76 | Grammar n100_900=new Grammar(); 77 | LinkedList l100_900=new LinkedList<>(); 78 | l100_900.add(""); 79 | l100_900.add("sto"); 80 | l100_900.add("dwieście"); 81 | l100_900.add("trzysta"); 82 | l100_900.add("czterysta"); 83 | l100_900.add("pięćset"); 84 | l100_900.add("sześćset"); 85 | l100_900.add("siedemset"); 86 | l100_900.add("osiemset"); 87 | l100_900.add("dziewięćset"); 88 | n100_900.setWordList(l100_900); 89 | 90 | Grammar n1xx_9xx=n100_900.clone(); 91 | n1xx_9xx.attach(n1_99,1); 92 | n1xx_9xx.end_nodes.add(1); 93 | 94 | Grammar n0_999=zero.clone(); 95 | n0_999.merge(n1xx_9xx,join_link2); 96 | 97 | return n0_999; 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/julius/Dictionary.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files.julius; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.IOException; 7 | import java.io.InputStreamReader; 8 | import java.io.PrintWriter; 9 | import java.util.HashMap; 10 | import java.util.HashSet; 11 | import java.util.LinkedList; 12 | import java.util.Map; 13 | import java.util.Set; 14 | 15 | import pl.edu.pjwstk.kaldi.utils.Settings; 16 | 17 | public class Dictionary { 18 | 19 | class Transcriptions { 20 | public LinkedList transcription; 21 | 22 | public Transcriptions(String word) { 23 | transcription = new LinkedList(); 24 | transcription.add(word); 25 | } 26 | } 27 | 28 | Map dictionary; 29 | 30 | public Dictionary() { 31 | dictionary = new HashMap(); 32 | } 33 | 34 | public void load(File file) throws IOException, RuntimeException { 35 | BufferedReader reader = new BufferedReader(new InputStreamReader( 36 | new FileInputStream(file), Settings.julius_default_encoding)); 37 | 38 | dictionary.clear(); 39 | 40 | String line, word, trans; 41 | int ret; 42 | 43 | while ((line = reader.readLine()) != null) { 44 | if (line.length() == 0) 45 | continue; 46 | 47 | ret = line.indexOf(' '); 48 | 49 | if (ret < 0) { 50 | reader.close(); 51 | throw new RuntimeException("Error parsing line: " + line); 52 | } 53 | 54 | word = line.substring(0, ret); 55 | trans = line.substring(ret + 1); 56 | 57 | if (dictionary.containsKey(word)) { 58 | dictionary.get(word).transcription.add(trans); 59 | } else 60 | dictionary.put(word, new Transcriptions(trans)); 61 | } 62 | 63 | reader.close(); 64 | } 65 | 66 | public void generateSubDictionary(File wordlist, File subdic) 67 | throws IOException, RuntimeException { 68 | BufferedReader reader = new BufferedReader( 69 | new InputStreamReader(new FileInputStream(wordlist), 70 | Settings.julius_default_encoding)); 71 | PrintWriter writer = new PrintWriter(subdic, 72 | Settings.julius_default_encoding); 73 | 74 | Set added = new HashSet(); 75 | 76 | writer.println(" sil"); 77 | writer.println(" sil"); 78 | writer.println(" sil"); 79 | 80 | added.add(""); 81 | added.add(""); 82 | added.add(""); 83 | 84 | boolean error = false; 85 | String line; 86 | String arr[]; 87 | Transcriptions trans; 88 | 89 | while ((line = reader.readLine()) != null) { 90 | arr = line.split("\\s+"); 91 | for (String word : arr) { 92 | word = word.trim(); 93 | if (word.length() == 0) 94 | continue; 95 | 96 | if (added.contains(word)) 97 | continue; 98 | 99 | added.add(word); 100 | 101 | if (!dictionary.containsKey(word)) { 102 | error = true; 103 | System.out.println("ERROR: missing word in dictionary: " 104 | + word); 105 | } else { 106 | trans = dictionary.get(word); 107 | for (String str_trans : trans.transcription) 108 | writer.println(word + " " + str_trans); 109 | } 110 | 111 | } 112 | } 113 | 114 | reader.close(); 115 | writer.close(); 116 | 117 | if (error) { 118 | throw new RuntimeException("Didn't find all words!"); 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/julius/LatticeNode.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files.julius; 2 | import java.util.Vector; 3 | 4 | public class LatticeNode 5 | { 6 | public int id; 7 | public int time_start, time_end; 8 | public Vector left,right; 9 | public Vector left_score,right_score; 10 | public double lscore, f, f_prev, g_head, g_prev; 11 | public double forward_score, backword_score, AMavg, cmscore, graphcm; 12 | public String headphone, tailphone; 13 | public String word; 14 | 15 | public Object object; 16 | 17 | /*********** 18 | * Example: 19 | * 20 | * 19: [414..448] left=15,10 right=22 left_lscore=-39.554115,-51.541363 21 | * right_lscore=-57.233547 lscore_tmp=-57.233547 wid=4411 name="dobra" lname="dobra" 22 | * f=-85518.468750 f_prev=-85517.171875 g_head=-66810.640625 g_prev=-65217.273438 23 | * forward_score=-3157.783691 backword_score=-916.954895 AMavg=-45.524776 cmscore=0.079091 24 | * graphcm=0.003168 headphone=m-d+o tailphone=r-a+tS 25 | **********/ 26 | public LatticeNode(String line) throws NumberFormatException, IndexOutOfBoundsException 27 | { 28 | left=new Vector(); 29 | right=new Vector(); 30 | left_score=new Vector(); 31 | right_score=new Vector(); 32 | 33 | String [] arr = line.split("\\s+"); 34 | String [] arr2; 35 | 36 | if(arr.length<2) 37 | throw new RuntimeException("Error parsing line [not enough tokens]: "+line); 38 | 39 | id=Integer.parseInt(arr[0].substring(0,arr[0].length()-1)); 40 | 41 | arr2=arr[1].split("\\.\\."); 42 | time_start=Integer.parseInt(arr2[0].substring(1)); 43 | time_end=Integer.parseInt(arr2[1].substring(0,arr2[1].length()-1)); 44 | 45 | String key,value; 46 | for(int el=2; el"); 97 | writer.println("size = " + tiers.size()); 98 | writer.println("item []:"); 99 | 100 | for (int i = 0; i < tiers.size(); i++) { 101 | Tier tier = tiers.get(i); 102 | 103 | writer.println("\titem [" + (i + 1) + "]:"); 104 | 105 | writer.println("\t\tclass = \"IntervalTier\""); 106 | writer.println("\t\tname = \"" + tier.name + "\""); 107 | 108 | writer.println("\t\txmin = " + tier.min()); 109 | writer.println("\t\txmax = " + tier.max()); 110 | writer.println("\t\tintervals: size = " + tier.segments.size()); 111 | 112 | for (int j = 0; j < tier.segments.size(); j++) { 113 | Segment segment = tier.segments.get(j); 114 | 115 | writer.println("\t\tintervals [" + (j + 1) + "]:"); 116 | writer.println("\t\t\txmin = " + segment.start_time); 117 | writer.println("\t\t\txmax = " + segment.end_time); 118 | writer.println("\t\t\ttext = \"" + segment.name + "\""); 119 | } 120 | } 121 | 122 | writer.close(); 123 | 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/utils/ProgramLauncher.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.utils; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.io.OutputStream; 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | import org.apache.commons.exec.CommandLine; 11 | import org.apache.commons.exec.DefaultExecutor; 12 | import org.apache.commons.exec.ExecuteException; 13 | import org.apache.commons.exec.ExecuteResultHandler; 14 | import org.apache.commons.exec.PumpStreamHandler; 15 | import org.apache.commons.exec.environment.EnvironmentUtils; 16 | 17 | public class ProgramLauncher implements Runnable { 18 | 19 | private CommandLine cmd; 20 | private DefaultExecutor exec = new DefaultExecutor(); 21 | private OutputStream ostr = System.out; 22 | private OutputStream estr = System.err; 23 | private int retVal = -1; 24 | private boolean suppressOutput = false; 25 | private boolean running = false; 26 | private boolean async = false; 27 | private String addPath = null; 28 | private String lib_env = null; 29 | 30 | public ProgramLauncher(String[] cmd_arr) { 31 | cmd = new CommandLine(cmd_arr[0]); 32 | for (int i = 1; i < cmd_arr.length; i++) 33 | cmd.addArgument(cmd_arr[i]); 34 | } 35 | 36 | public void setLibraries(List libraries) { 37 | lib_env = "LD_LIBRARY_PATH="; 38 | for (File lib : libraries) { 39 | if (lib_env.length() > 0) 40 | lib_env += ";"; 41 | lib_env += lib.getAbsolutePath(); 42 | } 43 | } 44 | 45 | public void addPath(File path) { 46 | if (addPath != null) 47 | addPath += ";"; 48 | addPath += path.getAbsolutePath(); 49 | } 50 | 51 | public void setStdoutStream(OutputStream ostr) { 52 | this.ostr = ostr; 53 | } 54 | 55 | public void setStderrStream(OutputStream estr) { 56 | this.estr = estr; 57 | } 58 | 59 | public void setStdoutFile(File stdout) throws FileNotFoundException { 60 | setStdoutStream(new FileOutputStream(stdout)); 61 | } 62 | 63 | public void setCwd(File cwd) { 64 | exec.setWorkingDirectory(cwd); 65 | } 66 | 67 | public void setSuppressOutput(boolean suppressOutput) { 68 | this.suppressOutput = suppressOutput; 69 | } 70 | 71 | public void setAsynchronous(boolean async) { 72 | this.async = async; 73 | } 74 | 75 | public void run() { 76 | 77 | running = true; 78 | 79 | try { 80 | if (!suppressOutput) { 81 | PumpStreamHandler stream = new PumpStreamHandler(ostr, estr); 82 | exec.setStreamHandler(stream); 83 | } 84 | 85 | if (lib_env != null || addPath != null) { 86 | @SuppressWarnings("rawtypes") 87 | Map env = EnvironmentUtils.getProcEnvironment(); 88 | if (lib_env != null) 89 | EnvironmentUtils.addVariableToEnvironment(env, lib_env); 90 | if (addPath != null) { 91 | String path = (String) env.get("PATH"); 92 | if (path == null) 93 | path = (String) env.get("path"); 94 | if (path == null) 95 | path = (String) env.get("Path"); 96 | if (path == null) 97 | path = ""; 98 | if (path.length() > 0 && !path.endsWith(";")) 99 | path += ";"; 100 | path = "PATH=" + path + addPath; 101 | EnvironmentUtils.addVariableToEnvironment(env, path); 102 | } 103 | 104 | retVal = 0; 105 | 106 | if (async) 107 | exec.execute(cmd, env, blankHandler); 108 | else 109 | retVal = exec.execute(cmd, env); 110 | 111 | } else { 112 | 113 | retVal = 0; 114 | 115 | if (async) 116 | exec.execute(cmd, blankHandler); 117 | else 118 | retVal = exec.execute(cmd); 119 | } 120 | 121 | } catch (ExecuteException e) { 122 | // ignore 123 | } catch (Exception e) { 124 | Log.error("Running program: " + cmd, e); 125 | } 126 | 127 | running = false; 128 | 129 | } 130 | 131 | public int getReturnValue() { 132 | return retVal; 133 | } 134 | 135 | public boolean isRunning() { 136 | return running; 137 | } 138 | 139 | ExecuteResultHandler blankHandler = new ExecuteResultHandler() { 140 | public void onProcessFailed(ExecuteException e) { 141 | } 142 | 143 | public void onProcessComplete(int exitValue) { 144 | } 145 | }; 146 | } 147 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/tasks/KeywordSpottingTask.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.tasks; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.PrintWriter; 6 | import java.security.MessageDigest; 7 | import java.util.Vector; 8 | 9 | import javax.xml.xpath.XPath; 10 | import javax.xml.xpath.XPathConstants; 11 | import javax.xml.xpath.XPathExpressionException; 12 | 13 | import org.w3c.dom.Element; 14 | 15 | import pl.edu.pjwstk.kaldi.files.TextGrid; 16 | import pl.edu.pjwstk.kaldi.programs.KaldiKWS; 17 | import pl.edu.pjwstk.kaldi.programs.KaldiUtils; 18 | import pl.edu.pjwstk.kaldi.programs.Transcriber; 19 | import pl.edu.pjwstk.kaldi.utils.FileUtils; 20 | import pl.edu.pjwstk.kaldi.utils.Log; 21 | import pl.edu.pjwstk.kaldi.utils.Settings; 22 | 23 | public class KeywordSpottingTask extends Task { 24 | 25 | private File input_keywords; 26 | private File words_table; 27 | 28 | @Override 29 | public void run() { 30 | 31 | state = State.RUNNING; 32 | 33 | try { 34 | File lattice = new File(Settings.curr_task_dir, "aligned_lattice"); 35 | File lattice_int = new File(Settings.curr_task_dir, "kws_lattice.int"); 36 | File lattice_txt = new File(Settings.curr_task_dir, "kws_lattice.txt"); 37 | File kw_clean = new File(Settings.curr_task_dir, "kws_clean_words"); 38 | File lat_vocab = new File(Settings.curr_task_dir, "kws_lat_vocab"); 39 | File vocab = new File(Settings.curr_task_dir, "kws_vocab"); 40 | File dict = new File(Settings.curr_task_dir, "kws_dict"); 41 | File kws_out = new File(Settings.curr_task_dir, "kws.txt"); 42 | File tg_out = new File(Settings.curr_task_dir, "out.TextGrid"); 43 | 44 | if (!lattice.canRead()) { 45 | Log.error("Cannot read lattice for task: " + Settings.curr_task_dir); 46 | Log.error("Keyword spotting HAS to be run after decoding the file first!"); 47 | state = State.FAILED; 48 | return; 49 | } 50 | 51 | cleanKeywords(input_keywords, kw_clean, Settings.default_encoding); 52 | 53 | KaldiUtils.lattice_copy("ark", lattice, "ark,t", lattice_int, true); 54 | 55 | KaldiUtils.int2sym("3", words_table, lattice_int, lattice_txt); 56 | 57 | KaldiKWS.get_vocab(lattice_txt, lat_vocab); 58 | 59 | FileUtils.mergeFiles(new File[] { lat_vocab, kw_clean }, vocab, Settings.default_encoding, true); 60 | 61 | FileUtils.sort_uniq(vocab, vocab, Settings.default_encoding); 62 | 63 | Transcriber.transcribe(vocab, Settings.default_encoding, dict, Settings.default_encoding, false); 64 | 65 | KaldiKWS.detect(lattice_txt, kw_clean, dict, kws_out); 66 | 67 | convertKWSToTG(kws_out, tg_out); 68 | 69 | state = State.SUCCEEDED; 70 | 71 | } catch (Exception e) { 72 | Log.error("KWS task.", e); 73 | state = State.FAILED; 74 | } 75 | 76 | } 77 | 78 | private static void cleanKeywords(File input, File output, String encoding) throws IOException { 79 | 80 | Vector keywords = FileUtils.readLines(input, Settings.default_encoding); 81 | 82 | PrintWriter writer = new PrintWriter(output, encoding); 83 | 84 | for (String kw : keywords) { 85 | kw = kw.toLowerCase().trim(); 86 | 87 | kw = kw.replaceAll("[-_,]", " "); 88 | 89 | kw = kw.replaceAll("[^\\s\\w\\.ĄĆĘŁŃÓŚŹŻąćęłńóśźż]", ""); 90 | 91 | if (kw.isEmpty()) 92 | continue; 93 | 94 | String[] kws = kw.split("\\s+"); 95 | for (String w : kws) 96 | writer.println(w); 97 | } 98 | 99 | writer.close(); 100 | 101 | } 102 | 103 | private static void convertKWSToTG(File kws, File tg) throws IOException { 104 | Vector kw_lines = FileUtils.readLines(kws, Settings.default_encoding); 105 | 106 | TextGrid gridfile = new TextGrid(); 107 | 108 | for (String line : kw_lines) { 109 | String[] ss = line.split("\\s+"); 110 | double start = Double.parseDouble(ss[1]); 111 | double len = Double.parseDouble(ss[2]); 112 | gridfile.addSegment(0, start, start + len, ss[0] + " <" + ss[3] + ">"); 113 | } 114 | 115 | gridfile.write(tg); 116 | } 117 | 118 | @Override 119 | public void loadSettings(XPath xpath, Element node) throws XPathExpressionException { 120 | input_keywords = new File((String) xpath.evaluate("input-keywords", node, XPathConstants.STRING)); 121 | words_table = new File((String) xpath.evaluate("words-table", node, XPathConstants.STRING)); 122 | } 123 | 124 | @Override 125 | public void updateHash(MessageDigest m) throws IOException { 126 | processFileHash(m, input_keywords); 127 | } 128 | 129 | } 130 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/tasks/SpeakerDiarizationTask.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.tasks; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.IOException; 6 | import java.security.MessageDigest; 7 | import java.util.Locale; 8 | 9 | import javax.xml.xpath.XPath; 10 | import javax.xml.xpath.XPathConstants; 11 | import javax.xml.xpath.XPathExpressionException; 12 | 13 | import org.w3c.dom.Element; 14 | 15 | import pl.edu.pjwstk.kaldi.programs.KaldiScripts; 16 | import pl.edu.pjwstk.kaldi.programs.KaldiUtils; 17 | import pl.edu.pjwstk.kaldi.programs.Lium; 18 | import pl.edu.pjwstk.kaldi.programs.Shout; 19 | import pl.edu.pjwstk.kaldi.programs.Sox; 20 | import pl.edu.pjwstk.kaldi.utils.Log; 21 | import pl.edu.pjwstk.kaldi.utils.Settings; 22 | 23 | public class SpeakerDiarizationTask extends Task { 24 | 25 | private static enum Method { 26 | shout, lium 27 | } 28 | 29 | private File input_file; 30 | private Method method; 31 | 32 | private int max_clusters; 33 | 34 | @Override 35 | public void run() { 36 | 37 | state = State.RUNNING; 38 | 39 | if (!input_file.canRead()) { 40 | Log.error("File cannot be read: " + input_file.getAbsolutePath()); 41 | state = State.FAILED; 42 | return; 43 | } 44 | 45 | try { 46 | 47 | switch (method) { 48 | case shout: 49 | shout(); 50 | break; 51 | case lium: 52 | lium(); 53 | break; 54 | default: 55 | Log.info("Speaker diarization method not implemented!"); 56 | Log.info("Method: " + method.toString()); 57 | break; 58 | } 59 | 60 | state = State.SUCCEEDED; 61 | 62 | } catch (RuntimeException | FileNotFoundException e) { 63 | Log.error("Speaker diarization task.", e); 64 | state = State.FAILED; 65 | } 66 | } 67 | 68 | private void shout() throws RuntimeException, FileNotFoundException { 69 | 70 | File raw_file = new File(Settings.curr_task_dir, "file.raw"); 71 | File seg_out = new File(Settings.curr_task_dir, "seg.out"); 72 | File out_mo = new File(Settings.curr_task_dir, "out.mo"); 73 | 74 | File seg_model = new File(Settings.shout_models, "shout.sad"); 75 | 76 | if (!seg_model.canRead()) { 77 | throw new FileNotFoundException(seg_model.getAbsolutePath()); 78 | } 79 | 80 | Sox.convert(input_file, raw_file); 81 | Shout.shout_segment(raw_file, seg_model, out_mo); 82 | Shout.shout_cluster(raw_file, seg_out, out_mo, max_clusters); 83 | 84 | } 85 | 86 | private void lium() throws RuntimeException, FileNotFoundException { 87 | 88 | Lium.test(); 89 | 90 | Lium.diarize(input_file); 91 | } 92 | 93 | @Override 94 | public void loadSettings(XPath xpath, Element node) 95 | throws XPathExpressionException { 96 | 97 | input_file = new File((String) xpath.evaluate("input-file", node, 98 | XPathConstants.STRING)); 99 | 100 | String max_clust_string = (String) xpath.evaluate("max-clusters", node, 101 | XPathConstants.STRING); 102 | 103 | try { 104 | max_clusters = Integer.parseInt(max_clust_string); 105 | } catch (NumberFormatException e) { 106 | throw new XPathExpressionException(e); 107 | } 108 | 109 | String str_method = (String) xpath.evaluate("method", node, 110 | XPathConstants.STRING); 111 | 112 | try { 113 | method = Method.valueOf(str_method); 114 | } catch (IllegalArgumentException e) { 115 | String methods = ""; 116 | for (Method m : Method.values()) 117 | methods += m.toString() + ","; 118 | 119 | throw new XPathExpressionException( 120 | "Method type unknown! Available methods: " + methods); 121 | } 122 | } 123 | 124 | public static void main(String[] args) { 125 | try { 126 | 127 | Locale.setDefault(Locale.ENGLISH); 128 | 129 | Log.init("SpeakerDiarizationUnitTest", true); 130 | 131 | KaldiUtils.init(); 132 | KaldiUtils.test(); 133 | KaldiScripts.init(); 134 | KaldiScripts.test(); 135 | Shout.test(); 136 | 137 | SpeakerDiarizationTask task = new SpeakerDiarizationTask(); 138 | 139 | Settings.curr_task_dir = new File(Settings.tasks_dir, 140 | "SpeakerDiarizartionUnitTest"); 141 | 142 | Settings.curr_task_dir.mkdirs(); 143 | 144 | task.input_file = new File("/home/guest/Desktop/TEMP/speaker.wav"); 145 | task.method = Method.lium; 146 | task.max_clusters = 5; 147 | 148 | task.run(); 149 | 150 | } catch (Exception e) { 151 | Log.error("Error running task.", e); 152 | } 153 | } 154 | 155 | @Override 156 | public void updateHash(MessageDigest m) throws IOException { 157 | 158 | String methodname = method.name(); 159 | 160 | m.update(methodname.getBytes(Settings.default_encoding)); 161 | processFileHash(m, input_file); 162 | 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/Converter.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.util.Locale; 6 | 7 | import javax.xml.parsers.ParserConfigurationException; 8 | 9 | import pl.edu.pjwstk.kaldi.utils.Log; 10 | import pl.edu.pjwstk.kaldi.utils.ParseOptions; 11 | 12 | public class Converter { 13 | 14 | public static enum Format { 15 | CTM, EAF, LAB, RTTM, TextGrid 16 | } 17 | 18 | public static void main(String[] args) { 19 | 20 | Locale.setDefault(Locale.ENGLISH); 21 | 22 | String fmt_str = ""; 23 | for (Format fmt : Format.values()) 24 | fmt_str += fmt.name() + " "; 25 | 26 | ParseOptions po = new ParseOptions( 27 | "Converter", 28 | "Converts between several file formats.\nSupported formats: " 29 | + fmt_str 30 | + "\nIf no types are given, program tries to guess from the file extenation only!"); 31 | 32 | po.addArgument(File.class, "from", "source file"); 33 | po.addArgument(File.class, "to", "destination file"); 34 | 35 | po.addArgument("from-type", 'f', String.class, "Sets the source type", 36 | null); 37 | 38 | po.addArgument("to-type", 't', String.class, "Sets the source type", 39 | null); 40 | 41 | po.addArgument("rttm-tb", 'r', Double.class, 42 | "Sets the RTTM time-base.", "1.0"); 43 | 44 | po.addArgument("tier", 't', Integer.class, 45 | "Use specific tier only, or <0 for all tiers.", "-1"); 46 | 47 | if (!po.parse(args)) 48 | return; 49 | 50 | try { 51 | Log.init("Converter", true); 52 | } catch (SecurityException | FileNotFoundException e1) { 53 | e1.printStackTrace(); 54 | return; 55 | } 56 | 57 | try { 58 | 59 | Format from_fmt, to_fmt; 60 | 61 | String from_str = (String) po.getArgument("from-type"); 62 | String to_str = (String) po.getArgument("to-type"); 63 | double timebase = (Double) po.getArgument("rttm-tb"); 64 | File from_file = (File) po.getArgument(0); 65 | File to_file = (File) po.getArgument(1); 66 | 67 | if (from_str != null) { 68 | try { 69 | from_fmt = Format.valueOf(from_str); 70 | } catch (IllegalArgumentException e) { 71 | throw new RuntimeException("Type " + from_str 72 | + " doesn't exist!"); 73 | } 74 | } else { 75 | from_fmt = guessFormat(from_file); 76 | if (from_fmt == null) { 77 | throw new RuntimeException( 78 | "Couldn't guess format for file: " 79 | + from_file.getName() 80 | + "\nEnter it manually!"); 81 | } 82 | } 83 | 84 | if (to_str != null) { 85 | try { 86 | to_fmt = Format.valueOf(to_str); 87 | } catch (IllegalArgumentException e) { 88 | throw new RuntimeException("Type " + from_str 89 | + " doesn't exist!"); 90 | } 91 | } else { 92 | to_fmt = guessFormat(to_file); 93 | if (to_fmt == null) { 94 | throw new RuntimeException( 95 | "Couldn't guess format for file: " 96 | + to_file.getName() 97 | + "\nEnter it manually!"); 98 | } 99 | } 100 | 101 | Log.info("Converting " + from_file.getName() + " [" 102 | + from_fmt.name() + "] -> " + to_file.getName() + " [" 103 | + to_fmt.name() + "] t=" + timebase); 104 | 105 | Segmentation from_seg = fromFmt(from_fmt, timebase); 106 | from_seg.read(from_file); 107 | Segmentation to_seg = fromFmt(to_fmt, timebase); 108 | 109 | int t = (Integer) po.getArgument("tier"); 110 | if (t < 0) 111 | to_seg.tiers.addAll(from_seg.tiers); 112 | else 113 | to_seg.tiers.add(from_seg.tiers.get(t)); 114 | 115 | to_seg.write(to_file); 116 | 117 | } catch (Exception e) { 118 | Log.error("Main error", e); 119 | } 120 | 121 | } 122 | 123 | private static Format guessFormat(File file) { 124 | 125 | String name = file.getName(); 126 | int pos = name.lastIndexOf('.'); 127 | if (pos < 0) 128 | return null; 129 | 130 | String ext = name.substring(pos + 1); 131 | 132 | if (ext.equals("TextGrid")) 133 | return Format.TextGrid; 134 | 135 | if (ext.toLowerCase().equals("lab")) 136 | return Format.LAB; 137 | 138 | if (ext.toLowerCase().equals("rttm")) 139 | return Format.RTTM; 140 | 141 | if (ext.toLowerCase().equals("ctm")) 142 | return Format.CTM; 143 | 144 | if (ext.toLowerCase().equals("eaf")) 145 | return Format.EAF; 146 | 147 | return null; 148 | } 149 | 150 | public static Segmentation fromFmt(Format fmt, double timebase) 151 | throws ParserConfigurationException { 152 | switch (fmt) { 153 | case CTM: 154 | return new CTM(); 155 | case EAF: 156 | return new EAF(); 157 | case LAB: 158 | return new LAB(); 159 | case RTTM: 160 | return new RTTM(timebase); 161 | case TextGrid: 162 | return new TextGrid(); 163 | default: 164 | return null; 165 | } 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/database/dbTasks.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.database; 2 | 3 | import java.sql.SQLException; 4 | import java.sql.Timestamp; 5 | import java.text.DateFormat; 6 | 7 | import pl.edu.pjwstk.kaldi.service.database.Database.Pair; 8 | 9 | public class dbTasks { 10 | 11 | public static enum dbStatus { 12 | queued, running, done, dead, copyof 13 | } 14 | 15 | private static DateFormat df = DateFormat.getDateTimeInstance(); 16 | 17 | public static class Task { 18 | 19 | public int _id; 20 | public dbStatus status = dbStatus.queued; 21 | public String task_file; 22 | public int pid; 23 | public String hash; 24 | public Timestamp time; 25 | public int login_id; 26 | public String host; 27 | public int copyid; 28 | 29 | public String toString() { 30 | return "(" + _id + ") " + status + " -- " + task_file + " -- " 31 | + pid + " -- " + hash + " -- " + df.format(time) + " -- " 32 | + login_id + " -- " + host; 33 | } 34 | } 35 | 36 | public static Task getOldestQueued() throws RuntimeException { 37 | 38 | try { 39 | 40 | String options = Database.whereAnd(new Pair[] { new Pair("status", 41 | "queued") }); 42 | options += " " + Database.order("time", true); 43 | options += " " + Database.limit(1); 44 | 45 | Object[] ret = Database.get(Task.class, options); 46 | 47 | if (ret.length == 0) 48 | return null; 49 | else 50 | return (Task) ret[0]; 51 | 52 | } catch (InstantiationException | IllegalAccessException | SQLException 53 | | RuntimeException e) { 54 | throw new RuntimeException(e); 55 | } 56 | } 57 | 58 | public static Task[] getAllRunning() throws RuntimeException { 59 | try { 60 | String options = Database.whereAnd(new Pair[] { new Pair("status", 61 | "running") }); 62 | options += " " + Database.order("time", true); 63 | 64 | Object[] obj = Database.get(Task.class, options); 65 | 66 | Task[] ret = new Task[obj.length]; 67 | for (int i = 0; i < ret.length; i++) 68 | ret[i] = (Task) obj[i]; 69 | 70 | return ret; 71 | 72 | } catch (InstantiationException | IllegalAccessException | SQLException 73 | | RuntimeException e) { 74 | throw new RuntimeException(e); 75 | } 76 | } 77 | 78 | public static void changeStatus(Task task, dbStatus status) 79 | throws RuntimeException { 80 | try { 81 | Database.update("Task", "status", status.name(), "_id", task._id); 82 | } catch (SQLException e) { 83 | throw new RuntimeException(e); 84 | } 85 | } 86 | 87 | public static void changePID(Task task, int pid) throws RuntimeException { 88 | try { 89 | Database.updateInt("Task", "pid", pid, "_id", task._id); 90 | } catch (SQLException e) { 91 | throw new RuntimeException(e); 92 | } 93 | } 94 | 95 | public static void setHash(Task task, String hash) throws RuntimeException { 96 | try { 97 | Database.update("Task", "hash", hash, "_id", task._id); 98 | } catch (SQLException e) { 99 | throw new RuntimeException(e); 100 | } 101 | 102 | } 103 | 104 | public static void setCopy(Task task, int copy) throws RuntimeException { 105 | try { 106 | Database.updateInt("Task", "copyid", copy, "_id", task._id); 107 | } catch (SQLException e) { 108 | throw new RuntimeException(e); 109 | } 110 | 111 | } 112 | 113 | public static Task getByHash(String hash) throws RuntimeException { 114 | try { 115 | 116 | String options = Database 117 | .whereAnd(new Pair[] { new Pair("status", "copyof", true), 118 | new Pair("status", "dead",true), 119 | new Pair("hash", hash) }); 120 | options += " " + Database.order("time", true); 121 | options += " " + Database.limit(1); 122 | 123 | Object[] ret = Database.get(Task.class, options); 124 | 125 | if (ret.length == 0) 126 | return null; 127 | else 128 | return (Task) ret[0]; 129 | 130 | } catch (InstantiationException | IllegalAccessException | SQLException 131 | | RuntimeException e) { 132 | throw new RuntimeException(e); 133 | } 134 | } 135 | 136 | public static Task getByID(int id) throws RuntimeException { 137 | try { 138 | 139 | String options = Database.whereAnd(new Pair[] { new Pair("_id", "" 140 | + id, false, false) }); 141 | 142 | Object[] ret = Database.get(Task.class, options); 143 | 144 | if (ret.length == 0) 145 | return null; 146 | else 147 | return (Task) ret[0]; 148 | 149 | } catch (InstantiationException | IllegalAccessException | SQLException 150 | | RuntimeException e) { 151 | throw new RuntimeException(e); 152 | } 153 | } 154 | 155 | public static void main(String[] args) { 156 | 157 | try { 158 | Task t = getOldestQueued(); 159 | System.out.println("1>" + t); 160 | 161 | Task[] t2 = getAllRunning(); 162 | for (Task t3 : t2) { 163 | System.out.println("2>" + t3); 164 | } 165 | 166 | } catch (Exception e) { 167 | e.printStackTrace(); 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/Julius.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.IOException; 6 | import java.util.Locale; 7 | import java.util.Vector; 8 | 9 | import pl.edu.pjwstk.kaldi.files.Segmentation; 10 | import pl.edu.pjwstk.kaldi.files.TextGrid; 11 | import pl.edu.pjwstk.kaldi.files.julius.JuliusOutput; 12 | import pl.edu.pjwstk.kaldi.utils.FileUtils; 13 | import pl.edu.pjwstk.kaldi.utils.Log; 14 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 15 | import pl.edu.pjwstk.kaldi.utils.Settings; 16 | 17 | public class Julius { 18 | 19 | public static void test() throws FileNotFoundException { 20 | if (!Settings.julius_bin.exists()) 21 | throw new FileNotFoundException(Settings.julius_bin.getAbsolutePath()); 22 | if (!Settings.julius_mklm_bin.exists()) 23 | throw new FileNotFoundException(Settings.julius_mklm_bin.getAbsolutePath()); 24 | // TODO: check config files, etc... 25 | } 26 | 27 | public static void julius(File conf, File filelist, File dic, File binlm) throws RuntimeException { 28 | 29 | String[] cmd = new String[] { Settings.julius_bin.getAbsolutePath(), "-C", conf.getAbsolutePath(), "-filelist", 30 | filelist.getAbsolutePath(), "-v", dic.getAbsolutePath(), "-d", binlm.getAbsolutePath() }; 31 | 32 | ProgramLauncher launcher = new ProgramLauncher(cmd); 33 | launcher.setStdoutStream(new Log.Stream()); 34 | launcher.setStderrStream(new Log.Stream("ERR>>")); 35 | 36 | Log.verbose("julius: " + filelist.getName()); 37 | launcher.run(); 38 | Log.verbose("Done."); 39 | 40 | if (launcher.getReturnValue() != 0) 41 | throw new RuntimeException("Retval: " + launcher.getReturnValue()); 42 | } 43 | 44 | public static void mkbingram(File model_bkwd, File binlm) throws RuntimeException { 45 | String[] cmd = new String[] { Settings.julius_mklm_bin.getAbsolutePath(), "-nrl", model_bkwd.getAbsolutePath(), 46 | binlm.getAbsolutePath() }; 47 | 48 | ProgramLauncher launcher = new ProgramLauncher(cmd); 49 | launcher.setStdoutStream(new Log.Stream()); 50 | launcher.setStderrStream(new Log.Stream("ERR>>")); 51 | 52 | Log.verbose("mkbingram: " + model_bkwd.getName() + " -> " + binlm.getName()); 53 | launcher.run(); 54 | Log.verbose("Done."); 55 | 56 | if (launcher.getReturnValue() != 0) 57 | throw new RuntimeException("Retval: " + launcher.getReturnValue()); 58 | } 59 | 60 | public static Segmentation align(File sound, File text) throws IOException, RuntimeException { 61 | 62 | File files[] = new File[] { sound }; 63 | File conf = new File("julius_model/julius.jconf"); 64 | 65 | File scp = new File(Settings.temp_dir, "julius.scp"); 66 | File vocab = new File(Settings.temp_dir, "julius.voc"); 67 | File dict = new File(Settings.temp_dir, "julius.dic"); 68 | File model = new File(Settings.temp_dir, "julius.lm"); 69 | File text_b = new File(Settings.temp_dir, "julius_rev.txt"); 70 | File binlm = new File(Settings.temp_dir, "julius.jlm"); 71 | 72 | // FileUtils.makeVocab(text, vocab); 73 | 74 | FileUtils.reverse(text, text_b); 75 | 76 | NGram.srilm_estimate(text_b, vocab, model, 3); 77 | 78 | mkbingram(model, binlm); 79 | 80 | Transcriber.transcribe(vocab, Settings.default_encoding, dict, Settings.default_encoding, true); 81 | FileUtils.makeSCPFile(scp, files, false); 82 | 83 | Log.verbose("Running julius..."); 84 | julius(conf, scp, dict, binlm); 85 | 86 | Log.verbose("Parsing julius output..."); 87 | String soundname = sound.getAbsolutePath(); 88 | soundname = soundname.substring(0, soundname.lastIndexOf('.')); 89 | File outfile = new File(soundname + ".out"); 90 | Vector julouts = null; 91 | 92 | julouts = JuliusOutput.loadFromJulius(outfile); 93 | 94 | if (julouts.isEmpty()) 95 | throw new RuntimeException("Julius didn't provide any outputs!"); 96 | 97 | Segmentation ret = julouts.get(0).aligned.toSegmentation(Settings.julius_win_offset); 98 | 99 | for (int i = 1; i < julouts.size(); i++) { 100 | double offset = ret.tiers.get(0).max(); 101 | ret.appendSegmenation(julouts.get(i).aligned.toSegmentation(Settings.julius_win_offset), offset); 102 | } 103 | 104 | return ret; 105 | } 106 | 107 | /** 108 | * Unit tests. 109 | * 110 | * @param args 111 | */ 112 | public static void main(String[] args) { 113 | try { 114 | 115 | Locale.setDefault(Locale.ENGLISH); 116 | 117 | Log.init("JuliusUnitTest", false); 118 | 119 | Transcriber.init(); 120 | Transcriber.test(); 121 | 122 | Segmentation seg = align(new File("/home/guest/Desktop/Respeaking/test/kopacz.wav"), 123 | new File("/home/guest/Desktop/Respeaking/test/kopacz.txt")); 124 | 125 | TextGrid grid = new TextGrid(seg); 126 | 127 | grid.write(new File("/home/guest/Desktop/Respeaking/test/out.TextGrid")); 128 | 129 | Log.info("Julius Test complete!"); 130 | 131 | } catch (Exception e) { 132 | e.printStackTrace(); 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/tasks/Task.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.tasks; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.IOException; 6 | import java.math.BigInteger; 7 | import java.nio.ByteBuffer; 8 | import java.nio.channels.FileChannel; 9 | import java.security.MessageDigest; 10 | import java.security.NoSuchAlgorithmException; 11 | 12 | import javax.xml.parsers.DocumentBuilderFactory; 13 | import javax.xml.parsers.ParserConfigurationException; 14 | import javax.xml.xpath.XPath; 15 | import javax.xml.xpath.XPathConstants; 16 | import javax.xml.xpath.XPathExpressionException; 17 | import javax.xml.xpath.XPathFactory; 18 | 19 | import org.w3c.dom.Document; 20 | import org.w3c.dom.Element; 21 | import org.w3c.dom.NodeList; 22 | import org.xml.sax.SAXException; 23 | 24 | import pl.edu.pjwstk.kaldi.utils.Log; 25 | import pl.edu.pjwstk.kaldi.utils.Settings; 26 | 27 | public abstract class Task implements Runnable { 28 | 29 | public static enum State { 30 | INITIALIZED, RUNNING, FAILED, SUCCEEDED 31 | } 32 | 33 | public State state = State.INITIALIZED; 34 | 35 | public abstract void loadSettings(XPath xpath, Element node) throws XPathExpressionException; 36 | 37 | public abstract void updateHash(MessageDigest m) throws IOException; 38 | 39 | protected static Task getTask(String name) { 40 | 41 | if (name.equals("test")) { 42 | return new TestTask(); 43 | } 44 | 45 | if (name.equals("decode")) { 46 | return new DecodeTask(); 47 | } 48 | 49 | if (name.equals("align")) { 50 | return new AlignTask(); 51 | } 52 | 53 | if (name.equals("speaker-diarization")) { 54 | return new SpeakerDiarizationTask(); 55 | } 56 | 57 | if (name.equals("convert-encoding")) { 58 | return new ConvertEncodingTask(); 59 | } 60 | 61 | if (name.equals("nser")) { 62 | return new NSERTask(); 63 | } 64 | 65 | if (name.equals("kws")) { 66 | return new KeywordSpottingTask(); 67 | } 68 | 69 | Log.error("Unknown task: " + name); 70 | return null; 71 | } 72 | 73 | public static void run(File task_config) 74 | throws SAXException, IOException, ParserConfigurationException, XPathExpressionException, RuntimeException { 75 | 76 | Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(task_config); 77 | 78 | XPath xpath = XPathFactory.newInstance().newXPath(); 79 | 80 | NodeList tasks = (NodeList) xpath.evaluate("/tasks/task", doc, XPathConstants.NODESET); 81 | 82 | for (int i = 0; i < tasks.getLength(); i++) { 83 | Element elTask = (Element) tasks.item(i); 84 | 85 | String name = elTask.getAttribute("name"); 86 | 87 | Task task = getTask(name); 88 | 89 | task.loadSettings(xpath, elTask); 90 | 91 | task.run(); 92 | 93 | if (task.state != State.SUCCEEDED) { 94 | throw new RuntimeException("Failed to complete task!"); 95 | } 96 | } 97 | } 98 | 99 | public static String getHash(File task_config) throws SAXException, IOException, ParserConfigurationException, 100 | XPathExpressionException, NoSuchAlgorithmException { 101 | Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(task_config); 102 | 103 | XPath xpath = XPathFactory.newInstance().newXPath(); 104 | 105 | NodeList tasks = (NodeList) xpath.evaluate("/tasks/task", doc, XPathConstants.NODESET); 106 | 107 | MessageDigest m = MessageDigest.getInstance("MD5"); 108 | 109 | for (int i = 0; i < tasks.getLength(); i++) { 110 | Element elTask = (Element) tasks.item(i); 111 | 112 | String name = elTask.getAttribute("name"); 113 | 114 | Task task = getTask(name); 115 | 116 | if (task == null) 117 | continue; 118 | 119 | m.update(name.getBytes(Settings.default_encoding)); 120 | 121 | task.loadSettings(xpath, elTask); 122 | 123 | try { 124 | task.updateHash(m); 125 | } catch (IOException e) { 126 | // IGNORE MISSING FILES IN TASKS THAT HAVEN'T CREATED THEM 127 | // YET... 128 | } 129 | } 130 | 131 | byte[] d = m.digest(); 132 | BigInteger bigInt = new BigInteger(1, d); 133 | String hashstr = bigInt.toString(16); 134 | while (hashstr.length() < 32) { 135 | hashstr = "0" + hashstr; 136 | } 137 | 138 | return hashstr; 139 | } 140 | 141 | protected static void processFileHash(MessageDigest m, File f) throws IOException { 142 | 143 | FileInputStream stream = new FileInputStream(f); 144 | FileChannel chan = stream.getChannel(); 145 | 146 | ByteBuffer bb = ByteBuffer.allocate(512); 147 | 148 | while (true) { 149 | int ret = chan.read(bb); 150 | if (ret < 0) 151 | break; 152 | m.update(bb); 153 | bb.rewind(); 154 | } 155 | 156 | chan.close(); 157 | stream.close(); 158 | } 159 | 160 | public static void main(String[] args) { 161 | try { 162 | System.out.println(getHash(new File("/var/www/html/mowa/tasks/task_20150501_225120/config.xml"))); 163 | } catch (XPathExpressionException | NoSuchAlgorithmException | SAXException | IOException 164 | | ParserConfigurationException e) { 165 | e.printStackTrace(); 166 | } 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/Praat.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileReader; 6 | import java.io.IOException; 7 | import java.io.PrintWriter; 8 | import java.util.Vector; 9 | 10 | import javax.xml.parsers.DocumentBuilderFactory; 11 | import javax.xml.parsers.ParserConfigurationException; 12 | import javax.xml.transform.OutputKeys; 13 | import javax.xml.transform.Result; 14 | import javax.xml.transform.Source; 15 | import javax.xml.transform.Transformer; 16 | import javax.xml.transform.TransformerException; 17 | import javax.xml.transform.TransformerFactory; 18 | import javax.xml.transform.TransformerFactoryConfigurationError; 19 | import javax.xml.transform.dom.DOMSource; 20 | import javax.xml.transform.stream.StreamResult; 21 | 22 | import org.w3c.dom.Document; 23 | import org.w3c.dom.Element; 24 | 25 | import pl.edu.pjwstk.kaldi.utils.Log; 26 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 27 | import pl.edu.pjwstk.kaldi.utils.Settings; 28 | 29 | public class Praat { 30 | 31 | public static class PitchMark { 32 | public double time; 33 | public double intensity; 34 | public double frequency; 35 | public double strength; 36 | } 37 | 38 | public static Vector pitch(File wav_file, File pitch_file, 39 | File pitch_wav) throws IOException { 40 | 41 | File script = new File("pitch.script"); 42 | PrintWriter writer = new PrintWriter(script); 43 | 44 | writer.println("Read from file: \"" + wav_file.getAbsolutePath() + "\""); 45 | writer.println("To Pitch: 0, 75, 600"); 46 | writer.println("Save as short text file: \"" 47 | + pitch_file.getAbsolutePath() + "\""); 48 | writer.println("To Sound (hum)"); 49 | writer.println("Save as WAV file: \"" + pitch_wav.getAbsolutePath() 50 | + "\""); 51 | 52 | writer.close(); 53 | 54 | String[] cmd = new String[] { Settings.praat_bin.getAbsolutePath(), 55 | script.getAbsolutePath() }; 56 | 57 | ProgramLauncher launcher = new ProgramLauncher(cmd); 58 | 59 | Log.verbose("Running Praat to compute pitch..."); 60 | launcher.run(); 61 | Log.verbose("Done."); 62 | 63 | Vector ret = new Vector(); 64 | 65 | BufferedReader reader = new BufferedReader(new FileReader(pitch_file)); 66 | 67 | int num, numc; 68 | double dx, x1; 69 | 70 | String line; 71 | 72 | reader.readLine();// header line #1 73 | reader.readLine();// header line #2 74 | reader.readLine();// empty line 75 | reader.readLine();// min time 76 | reader.readLine();// max time 77 | 78 | line = reader.readLine();// marks num 79 | num = Integer.parseInt(line); 80 | 81 | line = reader.readLine();// dx 82 | dx = Double.parseDouble(line); 83 | 84 | line = reader.readLine();// x1 85 | x1 = Double.parseDouble(line); 86 | 87 | reader.readLine();// max freq 88 | reader.readLine();// max n candidates 89 | 90 | for (int i = 0; i < num; i++) { 91 | PitchMark mark = new PitchMark(); 92 | 93 | mark.time = i * dx + x1; 94 | 95 | line = reader.readLine();// intensity 96 | 97 | mark.intensity = Double.parseDouble(line); 98 | 99 | line = reader.readLine();// num candidates 100 | numc = Integer.parseInt(line); 101 | 102 | if (numc == 0) 103 | continue; 104 | 105 | line = reader.readLine();// frequency 106 | mark.frequency = Double.parseDouble(line); 107 | 108 | line = reader.readLine();// strength 109 | mark.strength = Double.parseDouble(line); 110 | 111 | for (int j = 1; j < numc; j++) { 112 | reader.readLine();// frequency 113 | reader.readLine();// strength 114 | } 115 | 116 | ret.add(mark); 117 | } 118 | 119 | reader.close(); 120 | 121 | return ret; 122 | } 123 | 124 | public static void savePitchMarksToXML(String audio_id, 125 | Vector pitch, File xml) 126 | throws TransformerFactoryConfigurationError, 127 | ParserConfigurationException, TransformerException { 128 | 129 | Document doc = DocumentBuilderFactory.newInstance() 130 | .newDocumentBuilder().newDocument(); 131 | 132 | Element elRoot = doc.createElement("audio-segment"); 133 | elRoot.setAttribute("id", audio_id); 134 | doc.appendChild(elRoot); 135 | 136 | for (PitchMark p : pitch) { 137 | Element elPitch = doc.createElement("pitch"); 138 | elPitch.setAttribute("t", String.format("%2.3f", p.time)); 139 | elPitch.setAttribute("i", String.format("%2.3f", p.intensity)); 140 | elPitch.setAttribute("c", String.format("%2.3f", p.strength)); 141 | elPitch.setTextContent(String.format("%2.3f", p.frequency)); 142 | elRoot.appendChild(elPitch); 143 | } 144 | 145 | Transformer trans = TransformerFactory.newInstance().newTransformer(); 146 | 147 | trans.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", 148 | "4"); 149 | trans.setOutputProperty(OutputKeys.METHOD, "xml"); 150 | trans.setOutputProperty(OutputKeys.INDENT, "yes"); 151 | 152 | Source source = new DOMSource(doc); 153 | Result result = new StreamResult(xml); 154 | trans.transform(source, result); 155 | } 156 | 157 | } 158 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/tasks/DecodeTask.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.tasks; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.security.MessageDigest; 6 | 7 | import javax.xml.xpath.XPath; 8 | import javax.xml.xpath.XPathConstants; 9 | import javax.xml.xpath.XPathExpressionException; 10 | 11 | import org.w3c.dom.Element; 12 | 13 | import pl.edu.pjwstk.kaldi.files.CTM; 14 | import pl.edu.pjwstk.kaldi.files.TextGrid; 15 | import pl.edu.pjwstk.kaldi.programs.KaldiUtils; 16 | import pl.edu.pjwstk.kaldi.utils.FileUtils; 17 | import pl.edu.pjwstk.kaldi.utils.Log; 18 | import pl.edu.pjwstk.kaldi.utils.Settings; 19 | 20 | public class DecodeTask extends Task { 21 | 22 | private File input_file; 23 | private File mfcc_config; 24 | private File mdl_file; 25 | private File hclg_file; 26 | private File words_table; 27 | private File phones_table; 28 | private File word_boundaries; 29 | 30 | private File lda_matrix = null; 31 | 32 | @Override 33 | public void run() { 34 | 35 | state = State.RUNNING; 36 | 37 | boolean fail = false; 38 | File files[] = { input_file, mfcc_config, mdl_file, hclg_file, words_table, phones_table, word_boundaries, 39 | lda_matrix }; 40 | for (File f : files) 41 | if (f != null && !f.exists()) { 42 | Log.error("Missing file: " + f.getAbsolutePath()); 43 | fail = true; 44 | } 45 | 46 | if (fail) { 47 | Log.error("Some files are missing!"); 48 | state = State.FAILED; 49 | return; 50 | } 51 | 52 | File scp_file = new File(Settings.curr_task_dir, "wav.scp"); 53 | File mfcc = new File(Settings.curr_task_dir, "mfcc"); 54 | File cmvn_stats = new File(Settings.curr_task_dir, "cmvn_stats"); 55 | File cmvn = new File(Settings.curr_task_dir, "cmvn"); 56 | File deltas = new File(Settings.curr_task_dir, "deltas"); 57 | File splice = new File(Settings.curr_task_dir, "splice"); 58 | File trans = new File(Settings.curr_task_dir, "trans"); 59 | File lattice = new File(Settings.curr_task_dir, "lattice"); 60 | File words = new File(Settings.curr_task_dir, "words"); 61 | File alignment = new File(Settings.curr_task_dir, "alignment"); 62 | File words_int = new File(Settings.curr_task_dir, "words.int"); 63 | File words_txt = new File(Settings.curr_task_dir, "words.txt"); 64 | File aligned_lattice = new File(Settings.curr_task_dir, "aligned_lattice"); 65 | File ctm_int = new File(Settings.curr_task_dir, "ctm.int"); 66 | File ctm_txt = new File(Settings.curr_task_dir, "ctm.txt"); 67 | File tg_out = new File(Settings.curr_task_dir, "out.TextGrid"); 68 | 69 | try { 70 | 71 | FileUtils.makeSCPFile(scp_file, new File[] { input_file }, true); 72 | 73 | KaldiUtils.compute_mfcc_feats(mfcc_config, scp_file, mfcc); 74 | 75 | KaldiUtils.compute_cmvn_stats(mfcc, cmvn_stats); 76 | 77 | KaldiUtils.apply_cmvn(cmvn_stats, mfcc, cmvn); 78 | 79 | File data; 80 | 81 | if (lda_matrix != null) { 82 | 83 | KaldiUtils.splice_feats(cmvn, splice); 84 | 85 | KaldiUtils.transform_feats(lda_matrix, false, splice, trans); 86 | 87 | data = trans; 88 | 89 | } else { 90 | 91 | KaldiUtils.add_deltas(cmvn, deltas); 92 | 93 | data = deltas; 94 | } 95 | 96 | KaldiUtils.gmm_latgen_faster(mdl_file, hclg_file, data, lattice, words, alignment); 97 | 98 | KaldiUtils.copy_int_vector("ark", words, "ark,t", words_int); 99 | 100 | KaldiUtils.int2sym("2-", words_table, words_int, words_txt); 101 | 102 | KaldiUtils.lattice_align_words(word_boundaries, mdl_file, lattice, aligned_lattice); 103 | 104 | KaldiUtils.lattice_to_ctm_conf(aligned_lattice, ctm_int); 105 | 106 | KaldiUtils.int2sym("5", words_table, ctm_int, ctm_txt); 107 | 108 | CTM ctm = new CTM(); 109 | 110 | ctm.read(ctm_txt); 111 | 112 | TextGrid tg = new TextGrid(ctm); 113 | 114 | tg.write(tg_out); 115 | 116 | state = State.SUCCEEDED; 117 | 118 | } catch (Exception e) { 119 | Log.error("Decoding task.", e); 120 | state = State.FAILED; 121 | } 122 | } 123 | 124 | @Override 125 | public void loadSettings(XPath xpath, Element node) throws XPathExpressionException { 126 | 127 | input_file = new File((String) xpath.evaluate("input-file", node, XPathConstants.STRING)); 128 | mfcc_config = new File((String) xpath.evaluate("mfcc-config", node, XPathConstants.STRING)); 129 | mdl_file = new File((String) xpath.evaluate("mdl", node, XPathConstants.STRING)); 130 | hclg_file = new File((String) xpath.evaluate("hclg", node, XPathConstants.STRING)); 131 | words_table = new File((String) xpath.evaluate("words-table", node, XPathConstants.STRING)); 132 | phones_table = new File((String) xpath.evaluate("phones-table", node, XPathConstants.STRING)); 133 | word_boundaries = new File((String) xpath.evaluate("word-boundaries", node, XPathConstants.STRING)); 134 | 135 | String str = (String) xpath.evaluate("lda-matrix", node, XPathConstants.STRING); 136 | if (str != null && !str.isEmpty()) 137 | lda_matrix = new File(str); 138 | 139 | } 140 | 141 | @Override 142 | public void updateHash(MessageDigest m) throws IOException { 143 | processFileHash(m, input_file); 144 | } 145 | 146 | } 147 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/julius/WordGraph.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files.julius; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import java.util.Stack; 6 | import java.util.Vector; 7 | 8 | import pl.edu.pjwstk.kaldi.utils.Log; 9 | 10 | public class WordGraph { 11 | 12 | public Map lattice; 13 | public Map nodes; 14 | 15 | public WordGraph() { 16 | lattice = new HashMap(); 17 | nodes = new HashMap(); 18 | } 19 | 20 | public int getNodeNum() 21 | { 22 | return lattice.size(); 23 | } 24 | 25 | public int getLength() { 26 | int len = 0; 27 | for (LatticeNode node : lattice.values()) { 28 | if (node.time_end > len) 29 | len = node.time_end; 30 | } 31 | return len; 32 | } 33 | 34 | public LatticeNode getFirst() throws RuntimeException { 35 | Vector found = new Vector(); 36 | for (LatticeNode node : lattice.values()) { 37 | if (node.left.size() == 0) 38 | found.add(node); 39 | } 40 | 41 | if (found.size() == 0) 42 | throw new RuntimeException("Cannot find first node! Loop maybe?"); 43 | 44 | if (found.size() == 1) 45 | return found.get(0); 46 | 47 | Log.info("WARNING: found more than 1 first node! Returning one with highest score..."); 48 | 49 | double score = -999999999; 50 | LatticeNode ret = found.get(0); 51 | for (LatticeNode node : found) { 52 | if (score < node.lscore) { 53 | ret = node; 54 | score = node.lscore; 55 | } 56 | } 57 | 58 | return ret; 59 | } 60 | 61 | public Hypo findBestScore() { 62 | LatticeNode node = getFirst(); 63 | Hypo ret = new Hypo(); 64 | 65 | while (node.right.size() > 0) { 66 | ret.add(node); 67 | 68 | LatticeNode next = lattice.get(node.right.get(0)); 69 | double score = next.lscore; 70 | for (int i = 1; i < node.right.size(); i++) { 71 | LatticeNode test = lattice.get(node.right.get(i)); 72 | if (test.lscore > score) { 73 | score = test.lscore; 74 | next = test; 75 | } 76 | } 77 | node = next; 78 | } 79 | 80 | return ret; 81 | } 82 | 83 | public static class Hypo { 84 | public Vector sequence; 85 | public LatticeNode tail; 86 | public WordSequence words; 87 | 88 | public Hypo() { 89 | words = new WordSequence(); 90 | sequence = new Vector(); 91 | tail = null; 92 | } 93 | 94 | public Hypo(LatticeNode node) { 95 | words = new WordSequence(); 96 | sequence = new Vector(); 97 | sequence.add(node); 98 | words.addWord(node.word); 99 | tail = node; 100 | } 101 | 102 | public Hypo(Hypo hypo, LatticeNode node) { 103 | sequence = new Vector(); 104 | sequence.addAll(hypo.sequence); 105 | words = new WordSequence(); 106 | words.copy(hypo.words); 107 | words.addWord(node.word); 108 | sequence.add(node); 109 | tail = node; 110 | } 111 | 112 | public void add(LatticeNode node) { 113 | sequence.add(node); 114 | words.addWord(node.word); 115 | tail = node; 116 | } 117 | } 118 | 119 | class HypoList { 120 | public Map hypos = new HashMap(); 121 | } 122 | 123 | public Hypo findOracleSequence(WordSequence correct_sequence) { 124 | Vector found_hypos = new Vector(); 125 | 126 | Stack stack = new Stack(); 127 | Map vite = new HashMap(); 128 | // TODO: not 100% of viterbi criterion applies to Levenshtein 129 | // the criterion assumes that hypos occupying the same node and of same 130 | // length can be compared 131 | 132 | Hypo first = new Hypo(getFirst()); 133 | 134 | first.words.setupLevenshtein(correct_sequence); 135 | 136 | stack.push(first); 137 | 138 | while (!stack.empty()) { 139 | Hypo hypo = stack.pop(); 140 | 141 | HypoList list = vite.get(hypo.tail); 142 | if (list != null) { 143 | Hypo other = list.hypos.get(hypo.sequence.size()); 144 | if (other != null) { 145 | hypo.words.updateLevenshtein(); 146 | other.words.updateLevenshtein(); 147 | if (hypo.words.error_sum >= other.words.error_sum) 148 | continue; 149 | } 150 | } else { 151 | list = new HypoList(); 152 | vite.put(hypo.tail, list); 153 | } 154 | 155 | list.hypos.put(hypo.sequence.size(), hypo); 156 | 157 | if (hypo.tail.right.size() == 0) { 158 | found_hypos.add(hypo); 159 | } else { 160 | for (Integer i : hypo.tail.right) { 161 | LatticeNode child = lattice.get(i); 162 | stack.push(new Hypo(hypo, child)); 163 | } 164 | } 165 | } 166 | 167 | int best_score = 999999999; 168 | Hypo ret = null; 169 | 170 | Log.info("Hypo num: " + found_hypos.size()); 171 | 172 | for (Hypo h : found_hypos) { 173 | h.words.updateLevenshtein(); 174 | if (h.words.error_sum < best_score) { 175 | best_score = h.words.error_sum; 176 | ret = h; 177 | } 178 | } 179 | 180 | return ret; 181 | } 182 | 183 | public static Object[] graphNodesFromLattice( 184 | Vector lattice_nodes) { 185 | Object[] nodes = new Object[lattice_nodes.size()]; 186 | 187 | int i = 0; 188 | for (LatticeNode n : lattice_nodes) 189 | nodes[i++] = n.object; 190 | 191 | return nodes; 192 | } 193 | 194 | } 195 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/database/Database.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.database; 2 | 3 | import java.lang.reflect.Field; 4 | import java.sql.Connection; 5 | import java.sql.DriverManager; 6 | import java.sql.ResultSet; 7 | import java.sql.SQLException; 8 | import java.sql.Timestamp; 9 | import java.util.ArrayList; 10 | import java.util.HashMap; 11 | import java.util.Map.Entry; 12 | import java.util.Properties; 13 | 14 | import pl.edu.pjwstk.kaldi.utils.Settings; 15 | 16 | public class Database { 17 | 18 | private static Connection conn = null; 19 | 20 | private static Properties db_props = new Properties(); 21 | static { 22 | db_props.put("user", Settings.db_username); 23 | db_props.put("password", Settings.db_password); 24 | } 25 | 26 | public static Connection get() throws SQLException { 27 | if (conn == null || !conn.isValid(1)) { 28 | conn = DriverManager.getConnection("jdbc:mysql://localhost/", 29 | db_props); 30 | } 31 | return conn; 32 | } 33 | 34 | @SuppressWarnings({ "unchecked", "rawtypes" }) 35 | public static Object[] get(Class type, String options) 36 | throws InstantiationException, IllegalAccessException, 37 | SQLException, RuntimeException { 38 | 39 | String tablename = type.getSimpleName(); 40 | 41 | HashMap fields = new HashMap(); 42 | for (Field f : type.getFields()) { 43 | fields.put(f.getName(), f); 44 | } 45 | 46 | String fieldlist = ""; 47 | for (String f : fields.keySet()) { 48 | if (!fieldlist.isEmpty()) 49 | fieldlist += ","; 50 | fieldlist += "`" + f + "`"; 51 | } 52 | 53 | String query = "SELECT " + fieldlist + " FROM `" + Settings.db_name 54 | + "`.`" + tablename + "`"; 55 | if (!options.isEmpty()) 56 | query += " " + options; 57 | 58 | // System.out.println(query); 59 | 60 | Connection db = get(); 61 | 62 | ResultSet rs = db.createStatement().executeQuery(query); 63 | 64 | ArrayList ret = new ArrayList(); 65 | while (rs.next()) { 66 | Object obj = type.newInstance(); 67 | 68 | for (Entry e : fields.entrySet()) { 69 | Field f = e.getValue(); 70 | String n = e.getKey(); 71 | 72 | String t = f.getType().getName(); 73 | if (t.equals("int")) { 74 | int value = rs.getInt(n); 75 | f.setInt(obj, value); 76 | } else if (t.equals("long")) { 77 | long value = rs.getLong(n); 78 | f.setLong(obj, value); 79 | 80 | } else if (t.equals("java.sql.Timestamp")) { 81 | Timestamp value = rs.getTimestamp(n); 82 | f.set(obj, value); 83 | } else if (t.equals("java.lang.String")) { 84 | String value = rs.getString(n); 85 | f.set(obj, value); 86 | } else if (f.getType().isEnum()) { 87 | String value = rs.getString(n); 88 | 89 | f.set(obj, Enum.valueOf((Class) f.getType(), value)); 90 | } else { 91 | throw new RuntimeException( 92 | "Unknown or unimplemented type: " + t); 93 | } 94 | 95 | } 96 | 97 | ret.add(obj); 98 | } 99 | 100 | return ret.toArray(); 101 | } 102 | 103 | public static class Pair { 104 | 105 | public String key; 106 | public String value; 107 | public boolean not; 108 | public boolean quoted; 109 | 110 | public Pair(String key, String value) { 111 | this.key = key; 112 | this.value = value; 113 | this.not = false; 114 | this.quoted = true; 115 | } 116 | 117 | public Pair(String key, String value, boolean not) { 118 | this.key = key; 119 | this.value = value; 120 | this.not = not; 121 | this.quoted = true; 122 | } 123 | 124 | public Pair(String key, String value, boolean not, boolean quoted) { 125 | this.key = key; 126 | this.value = value; 127 | this.not = not; 128 | this.quoted = quoted; 129 | } 130 | } 131 | 132 | public static String whereAnd(Pair values[]) { 133 | String ret = ""; 134 | String sign; 135 | 136 | for (Pair p : values) { 137 | 138 | if (p.not) 139 | sign = "!="; 140 | else 141 | sign = "="; 142 | 143 | if (!ret.isEmpty()) 144 | ret += " AND "; 145 | if (p.quoted) 146 | ret += "`" + p.key + "` " + sign + " '" + p.value + "'"; 147 | else 148 | ret += "`" + p.key + "` " + sign + " " + p.value; 149 | } 150 | return "WHERE " + ret; 151 | } 152 | 153 | public static String limit(int num) { 154 | return "LIMIT " + num; 155 | } 156 | 157 | public static String order(String column, boolean asc) { 158 | if (asc) 159 | return "ORDER BY `" + column + "` ASC"; 160 | else 161 | return "ORDER BY `" + column + "` DESC"; 162 | } 163 | 164 | public static void update(String table, String column, String value, 165 | String id_col, int id_val) throws SQLException { 166 | String query = "UPDATE `" + Settings.db_name + "`.`" + table 167 | + "` SET `" + column + "` = '" + value + "' WHERE `" + id_col 168 | + "`=" + id_val; 169 | 170 | Connection db = get(); 171 | db.createStatement().execute(query); 172 | } 173 | 174 | public static void updateInt(String table, String column, int value, 175 | String id_col, int id_val) throws SQLException { 176 | String query = "UPDATE `" + Settings.db_name + "`.`" + table 177 | + "` SET `" + column + "` = " + value + " WHERE `" + id_col 178 | + "`=" + id_val; 179 | 180 | Connection db = get(); 181 | db.createStatement().execute(query); 182 | } 183 | 184 | } 185 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/julius/WordSequence.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files.julius; 2 | 3 | import java.util.StringTokenizer; 4 | import java.util.Vector; 5 | 6 | public class WordSequence { 7 | 8 | public Vector words; 9 | 10 | public WordSequence() { 11 | words = new Vector(); 12 | } 13 | 14 | public WordSequence(String text) { 15 | this(); 16 | 17 | StringTokenizer strtok = new StringTokenizer(text); 18 | while (strtok.hasMoreTokens()) 19 | addWord(strtok.nextToken()); 20 | } 21 | 22 | public void copy(WordSequence clone) { 23 | words.addAll(clone.words); 24 | original = clone.original; 25 | orig_num = clone.orig_num; 26 | levenshtein_row = clone.levenshtein_row; 27 | insertions = clone.insertions; 28 | deletions = clone.deletions; 29 | substitutions = clone.substitutions; 30 | error_sum = clone.error_sum; 31 | 32 | lev_old = new Error[clone.lev_old.length]; 33 | for (int i = 0; i < lev_old.length; i++) { 34 | lev_old[i] = new Error(); 35 | lev_old[i].copy(clone.lev_old[i]); 36 | } 37 | 38 | lev_new = new Error[clone.lev_new.length]; 39 | for (int i = 0; i < lev_new.length; i++) 40 | lev_new[i] = new Error(); 41 | } 42 | 43 | public void addWord(String word) { 44 | String test = word.trim().toLowerCase(); 45 | if (test.equals("") || test.equals("")) 46 | return; 47 | 48 | words.add(word); 49 | } 50 | 51 | public int insertions, deletions, substitutions, error_sum; 52 | 53 | public String getResults() { 54 | String ret = ""; 55 | 56 | int count = original.words.size(); 57 | 58 | ret += "=======\n"; 59 | ret += "Token count: " + count + "\n"; 60 | ret += "Errors: " + error_sum + "\n"; 61 | ret += "Insertions: " + insertions + "\n"; 62 | ret += "Deletions: " + deletions + "\n"; 63 | ret += "Substitutions: " + substitutions + "\n"; 64 | ret += "------\n"; 65 | ret += "Correctness: " + (count - (substitutions + deletions)) 66 | / (double) count * 100.0 + "\n"; 67 | ret += "Accuracy: " + (count - error_sum) / (double) count * 100.0 68 | + "\n"; 69 | ret += "======="; 70 | 71 | return ret; 72 | } 73 | 74 | public String toString() { 75 | String ret = ""; 76 | for (String w : words) { 77 | ret += w + " "; 78 | } 79 | return ret; 80 | } 81 | 82 | class Error { 83 | public int ins = 0, del = 0, sub = 0, sum = 0; 84 | 85 | public void copy(Error err) { 86 | ins = err.ins; 87 | del = err.del; 88 | sub = err.sub; 89 | sum = err.sum; 90 | } 91 | 92 | public String toString() { 93 | return ins + "," + del + "," + sub + "=" + sum; 94 | } 95 | } 96 | 97 | enum ErrorType { 98 | SUBSTITUTION, DELETION, INSERTION 99 | } 100 | 101 | private WordSequence original; 102 | private int orig_num; 103 | private Error[] lev_old, lev_new; 104 | private int levenshtein_row; 105 | 106 | public void setupLevenshtein(WordSequence original) { 107 | this.original = original; 108 | orig_num = original.words.size(); 109 | lev_old = new Error[orig_num + 1]; 110 | lev_new = new Error[orig_num + 1]; 111 | levenshtein_row = 1; 112 | for (int i = 0; i <= orig_num; i++) { 113 | lev_new[i] = new Error(); 114 | lev_old[i] = new Error(); 115 | lev_old[i].del = i; 116 | lev_old[i].sum = i; 117 | } 118 | 119 | deletions = orig_num; 120 | substitutions = 0; 121 | insertions = 0; 122 | 123 | updateLevenshtein(); 124 | } 125 | 126 | public void updateLevenshtein() { 127 | if (words.size() < levenshtein_row) 128 | return; 129 | 130 | int sub, ins, del; 131 | boolean words_match; 132 | 133 | for (; levenshtein_row <= words.size(); levenshtein_row++) { 134 | String test_word = words.get(levenshtein_row - 1); 135 | 136 | lev_new[0].copy(lev_old[0]); 137 | lev_new[0].ins += 2; 138 | lev_new[0].sum += 2; 139 | for (int j = 1; j <= orig_num; j++) { 140 | String orig_word = original.words.get(j - 1); 141 | if (test_word.equals(orig_word)) 142 | words_match = true; 143 | else 144 | words_match = false; 145 | 146 | ins = lev_old[j].sum + 2; 147 | del = lev_new[j - 1].sum + 1; 148 | sub = lev_old[j - 1].sum; 149 | if (!words_match) 150 | sub++; 151 | 152 | ErrorType type; 153 | 154 | if (sub < ins) { 155 | if (sub < del) 156 | type = ErrorType.SUBSTITUTION; 157 | else 158 | type = ErrorType.DELETION; 159 | } else { 160 | if (ins < del) 161 | type = ErrorType.INSERTION; 162 | else 163 | type = ErrorType.DELETION; 164 | } 165 | 166 | switch (type) { 167 | case SUBSTITUTION: 168 | lev_new[j].copy(lev_old[j - 1]); 169 | if (!words_match) { 170 | lev_new[j].sub++; 171 | lev_new[j].sum++; 172 | } 173 | break; 174 | case DELETION: 175 | lev_new[j].copy(lev_new[j - 1]); 176 | lev_new[j].del++; 177 | lev_new[j].sum++; 178 | break; 179 | case INSERTION: 180 | lev_new[j].copy(lev_old[j]); 181 | lev_new[j].ins += 2; 182 | lev_new[j].sum += 2; 183 | break; 184 | } 185 | } 186 | 187 | Error[] t = lev_new; 188 | lev_new = lev_old; 189 | lev_old = t; 190 | } 191 | 192 | // insertions are weighted double to minimize their presence (in order 193 | // to improve accuracy)? 194 | insertions = lev_old[orig_num].ins / 2; 195 | substitutions = lev_old[orig_num].sub; 196 | deletions = lev_old[orig_num].del; 197 | error_sum = insertions + substitutions + deletions; 198 | } 199 | 200 | } 201 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/utils/WAV.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.utils; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.FileOutputStream; 7 | import java.io.IOException; 8 | import java.nio.ByteBuffer; 9 | import java.nio.ByteOrder; 10 | 11 | import javax.sound.sampled.AudioFileFormat; 12 | import javax.sound.sampled.AudioFormat; 13 | import javax.sound.sampled.AudioInputStream; 14 | import javax.sound.sampled.AudioSystem; 15 | import javax.sound.sampled.UnsupportedAudioFileException; 16 | 17 | public class WAV { 18 | 19 | public static float getLength(File file) throws UnsupportedAudioFileException, IOException { 20 | 21 | AudioFileFormat af = AudioSystem.getAudioFileFormat(file); 22 | 23 | return af.getFrameLength() / af.getFormat().getFrameRate(); 24 | } 25 | 26 | public static float getLengthOld(File file) throws IOException { 27 | byte[] buffer = new byte[44]; 28 | ByteBuffer header = ByteBuffer.wrap(buffer); 29 | header.order(ByteOrder.LITTLE_ENDIAN); 30 | 31 | FileInputStream fis = new FileInputStream(file); 32 | 33 | fis.read(buffer, 0, 44); 34 | 35 | if (!(new String(buffer, 0, 4).equals("RIFF"))) { 36 | fis.close(); 37 | throw new IOException("RIFF header missing!"); 38 | } 39 | 40 | if (!(new String(buffer, 8, 4).equals("WAVE"))) { 41 | fis.close(); 42 | 43 | throw new IOException("WAVE header missing!"); 44 | } 45 | 46 | if (!(new String(buffer, 12, 4).equals("fmt "))) { 47 | fis.close(); 48 | throw new IOException("fmt header missing!"); 49 | } 50 | 51 | if (!(new String(buffer, 36, 4).equals("data"))) { 52 | fis.close(); 53 | throw new IOException("data header missing!"); 54 | } 55 | 56 | int src_len = header.getInt(40); 57 | int byte_rate = header.getInt(28); 58 | 59 | fis.close(); 60 | 61 | return src_len / (float) byte_rate; 62 | } 63 | 64 | public static void extract(File source, File dest, double time_start, double time_end) 65 | throws UnsupportedAudioFileException, IOException { 66 | 67 | AudioInputStream input = AudioSystem.getAudioInputStream(source); 68 | 69 | AudioFormat af = input.getFormat(); 70 | 71 | if (af.getChannels() != 1) 72 | throw new IOException("channel count != 1"); 73 | 74 | int ss = af.getSampleSizeInBits() / 8; 75 | int sl = (int) (input.getFrameLength() * ss); 76 | float sr = af.getSampleRate(); 77 | 78 | int beg = (int) (time_start * sr) * ss; 79 | if (beg > sl) 80 | beg = sl; 81 | beg = (int) (beg / ss) * ss; 82 | int end = (int) (time_end * sr) * ss; 83 | if (end > sl) 84 | end = sl; 85 | end = (int) (end / ss) * ss; 86 | int len = end - beg; 87 | 88 | int fl = len / af.getFrameSize(); 89 | 90 | byte buf[] = new byte[len]; 91 | 92 | input.skip(beg); 93 | input.read(buf, 0, len); 94 | 95 | AudioInputStream output = new AudioInputStream(new ByteArrayInputStream(buf), af, fl); 96 | 97 | AudioSystem.write(output, AudioFileFormat.Type.WAVE, dest); 98 | 99 | } 100 | 101 | public static void extractOld(File source, File dest, double time_start, double time_end) throws IOException { 102 | 103 | byte[] buffer = new byte[44]; 104 | ByteBuffer header = ByteBuffer.wrap(buffer); 105 | header.order(ByteOrder.LITTLE_ENDIAN); 106 | 107 | FileInputStream fis = new FileInputStream(source); 108 | 109 | fis.read(buffer, 0, 44); 110 | 111 | if (!(new String(buffer, 0, 4).equals("RIFF"))) { 112 | fis.close(); 113 | throw new IOException("RIFF header missing!"); 114 | } 115 | 116 | if (!(new String(buffer, 8, 4).equals("WAVE"))) { 117 | fis.close(); 118 | 119 | throw new IOException("WAVE header missing!"); 120 | } 121 | 122 | if (!(new String(buffer, 12, 4).equals("fmt "))) { 123 | fis.close(); 124 | throw new IOException("fmt header missing!"); 125 | } 126 | 127 | if (!(new String(buffer, 36, 4).equals("data"))) { 128 | fis.close(); 129 | throw new IOException("data header missing!"); 130 | } 131 | 132 | int src_len = header.getInt(40); 133 | int byte_rate = header.getInt(28); 134 | short ba = header.getShort(32); 135 | 136 | int start_offset = (int) (byte_rate * time_start); 137 | int len = (int) ((time_end - time_start) * byte_rate); 138 | 139 | start_offset = (int) (start_offset / ba) * ba; 140 | len = (int) (len / ba) * ba; 141 | 142 | if (start_offset + len > src_len) 143 | len = src_len - start_offset; 144 | 145 | header.putInt(4, len + 36); 146 | header.putInt(40, len); 147 | 148 | FileOutputStream fos = new FileOutputStream(dest); 149 | 150 | fos.write(header.array(), 0, 44); 151 | 152 | fis.skip(start_offset); 153 | 154 | byte[] data = new byte[len]; 155 | 156 | int ret = fis.read(data); 157 | 158 | fis.close(); 159 | 160 | if (ret != len) { 161 | fos.close(); 162 | throw new IOException("Data not read properly!"); 163 | } 164 | 165 | fos.write(data); 166 | 167 | fos.close(); 168 | 169 | } 170 | 171 | public static void main(String[] args) { 172 | try { 173 | 174 | System.out.println(getLength(new File("/home/guest/Desktop/Respeaking/ses0037/file002.wav"))); 175 | 176 | extract(new File("/home/guest/Desktop/Respeaking/ses0037/file002.wav"), 177 | new File("/home/guest/Desktop/Respeaking/ses0037/out.wav"), 284.69, 288.8636779785156); 178 | 179 | extract(new File("/home/guest/Desktop/Respeaking/ses0037/file002.wav"), 180 | new File("/home/guest/Desktop/Respeaking/ses0037/out2.wav"), 284.69, 300); 181 | 182 | } catch (Exception e) { 183 | e.printStackTrace(); 184 | } 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/julius/JuliusOutput.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files.julius; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.IOException; 7 | import java.io.InputStreamReader; 8 | import java.util.Vector; 9 | import java.util.regex.Matcher; 10 | import java.util.regex.Pattern; 11 | 12 | import pl.edu.pjwstk.kaldi.files.Segmentation; 13 | import pl.edu.pjwstk.kaldi.files.TextGrid; 14 | import pl.edu.pjwstk.kaldi.files.julius.ConfidenceNetwork.Section; 15 | import pl.edu.pjwstk.kaldi.files.julius.ConfidenceNetwork.Word; 16 | import pl.edu.pjwstk.kaldi.utils.Settings; 17 | 18 | public class JuliusOutput { 19 | 20 | public ConfidenceNetwork confidence_network; 21 | public WordGraph word_graph; 22 | public WordSequence sentence; 23 | public AlignedSequence aligned; 24 | 25 | public int time_offset; 26 | 27 | public JuliusOutput() { 28 | confidence_network = new ConfidenceNetwork(); 29 | word_graph = new WordGraph(); 30 | aligned = new AlignedSequence(); 31 | time_offset = 0; 32 | } 33 | 34 | public static Vector loadFromJulius(File file) throws IOException, RuntimeException { 35 | Vector ret = new Vector(); 36 | 37 | BufferedReader reader = new BufferedReader( 38 | new InputStreamReader(new FileInputStream(file), Settings.default_encoding)); 39 | 40 | String line = reader.readLine(); 41 | 42 | if (line == null || !line.startsWith("sentence1:")) { 43 | reader.close(); 44 | throw new RuntimeException("Error reading file. Expected each sentence to start with \"sentence1:\"!"); 45 | } 46 | 47 | int off = 0; 48 | 49 | while (reader.ready()) { 50 | JuliusOutput sentence = new JuliusOutput(); 51 | sentence.time_offset = off; 52 | sentence.setSentence(line); 53 | line = sentence.parseFile(reader); 54 | if (sentence.word_graph.getNodeNum() > 0) 55 | off += sentence.word_graph.getLength(); 56 | else if (sentence.aligned.sequence.size() > 0) 57 | off += sentence.aligned.getLength(); 58 | ret.add(sentence); 59 | } 60 | 61 | reader.close(); 62 | 63 | return ret; 64 | 65 | } 66 | 67 | public void setSentence(String line) { 68 | sentence = new WordSequence(); 69 | 70 | String[] words = line.substring(10).trim().split("\\s+"); 71 | for (String word : words) { 72 | if (word.equals("") || word.equals("")) 73 | continue; 74 | 75 | sentence.addWord(word); 76 | } 77 | 78 | } 79 | 80 | private String parseFile(BufferedReader reader) throws IOException, RuntimeException { 81 | 82 | confidence_network.sections.clear(); 83 | word_graph.lattice.clear(); 84 | 85 | Pattern pattern = Pattern.compile("\\s*\\(([^\\)]*)\\)"); 86 | 87 | int line_num = 0; 88 | boolean read_conf_net = false; 89 | boolean read_word_graph = false; 90 | boolean read_word_alignment = false; 91 | String line; 92 | String[] word_opt; 93 | double weight; 94 | while ((line = reader.readLine()) != null) { 95 | line_num++; 96 | if (line.contains("begin confusion network")) { 97 | read_conf_net = true; 98 | continue; 99 | } 100 | 101 | if (line.contains("end confusion network")) { 102 | read_conf_net = false; 103 | continue; 104 | } 105 | 106 | if (line.contains("begin wordgraph data")) { 107 | read_word_graph = true; 108 | continue; 109 | } 110 | 111 | if (line.contains("end wordgraph data")) { 112 | read_word_graph = false; 113 | continue; 114 | } 115 | 116 | if (line.contains("word alignment")) { 117 | read_word_alignment = true; 118 | continue; 119 | } 120 | 121 | if (line.contains("end forced alignment")) { 122 | read_word_alignment = false; 123 | continue; 124 | } 125 | 126 | if (line.startsWith("sentence1:")) 127 | return line; 128 | 129 | if (read_conf_net) { 130 | Section section = new Section(); 131 | confidence_network.sections.add(section); 132 | 133 | Matcher matcher = pattern.matcher(line); 134 | 135 | while (matcher.find()) { 136 | String opt = matcher.group(1); 137 | 138 | word_opt = opt.split(":"); 139 | if (word_opt.length != 2) { 140 | throw new RuntimeException("Cannot parse line " + line_num + " [wrong colon]: " + line); 141 | } 142 | 143 | try { 144 | weight = Double.parseDouble(word_opt[1]); 145 | } catch (NumberFormatException e) { 146 | throw new RuntimeException("Cannot parse line [wrong weight]: " + line_num); 147 | } 148 | 149 | section.words.add(new Word(word_opt[0], weight)); 150 | } 151 | } 152 | 153 | if (read_word_graph) { 154 | LatticeNode node = new LatticeNode(line); 155 | 156 | if (word_graph.lattice.containsKey(node.id)) { 157 | throw new RuntimeException("Cannot parse line " + line_num + " [duplicate word node]"); 158 | } 159 | 160 | word_graph.lattice.put(node.id, node); 161 | } 162 | 163 | if (read_word_alignment) { 164 | aligned.addWord(line); 165 | } 166 | } 167 | 168 | return ""; 169 | } 170 | 171 | public static void main(String[] args) { 172 | 173 | try { 174 | 175 | Vector julouts = JuliusOutput 176 | .loadFromJulius(new File("/home/guest/Desktop/Respeaking/kopacz.out")); 177 | 178 | Segmentation ret = julouts.get(0).aligned.toSegmentation(Settings.julius_win_offset); 179 | 180 | for (int i = 1; i < julouts.size(); i++) { 181 | double offset = ret.tiers.get(0).max(); 182 | ret.appendSegmenation(julouts.get(i).aligned.toSegmentation(Settings.julius_win_offset), offset); 183 | } 184 | 185 | TextGrid grid = new TextGrid(ret); 186 | 187 | grid.write(new File("/home/guest/Desktop/Respeaking/out.TextGrid")); 188 | 189 | } catch (Exception e) { 190 | e.printStackTrace(); 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/ClarinText.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.PrintWriter; 6 | import java.util.List; 7 | import java.util.Vector; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | import javax.xml.parsers.DocumentBuilderFactory; 12 | import javax.xml.parsers.ParserConfigurationException; 13 | import javax.xml.transform.OutputKeys; 14 | import javax.xml.transform.Result; 15 | import javax.xml.transform.Source; 16 | import javax.xml.transform.Transformer; 17 | import javax.xml.transform.TransformerException; 18 | import javax.xml.transform.TransformerFactory; 19 | import javax.xml.transform.TransformerFactoryConfigurationError; 20 | import javax.xml.transform.dom.DOMSource; 21 | import javax.xml.transform.stream.StreamResult; 22 | 23 | import org.w3c.dom.Document; 24 | import org.w3c.dom.Element; 25 | 26 | import pl.edu.pjwstk.kaldi.files.Segmentation; 27 | import pl.edu.pjwstk.kaldi.files.Segmentation.Segment; 28 | import pl.edu.pjwstk.kaldi.utils.Log; 29 | 30 | public class ClarinText { 31 | 32 | public class ClarinPhoneme { 33 | String phoneme; 34 | double start_time; 35 | double end_time; 36 | } 37 | 38 | public class ClarinWord { 39 | 40 | String id; 41 | String word; 42 | 43 | boolean recognizable = true; 44 | 45 | double start_time; 46 | double end_time; 47 | Vector phoneme; 48 | } 49 | 50 | String id; 51 | ClarinWord[] words; 52 | 53 | Vector words_xml; 54 | 55 | public String toString() { 56 | String ret = ""; 57 | for (ClarinWord word : words) { 58 | if (word.recognizable) { 59 | if (ret.isEmpty()) 60 | ret += word.word; 61 | else 62 | ret += " " + word.word; 63 | } 64 | } 65 | return ret; 66 | } 67 | 68 | public void saveText(File file) throws IOException { 69 | PrintWriter writer = new PrintWriter(file); 70 | boolean first = true; 71 | for (int i = 0; i < words.length; i++) { 72 | if (words[i].recognizable) { 73 | if (!first) 74 | writer.print(" "); 75 | else 76 | first = false; 77 | writer.print(words[i].word); 78 | } 79 | } 80 | writer.println(); 81 | writer.close(); 82 | } 83 | 84 | public void saveXML(File file) throws IOException, 85 | ParserConfigurationException, TransformerFactoryConfigurationError, 86 | TransformerException { 87 | 88 | if (words_xml == null) 89 | return; 90 | 91 | Document doc = DocumentBuilderFactory.newInstance() 92 | .newDocumentBuilder().newDocument(); 93 | 94 | Element elRoot = doc.createElement("audio-segment"); 95 | elRoot.setAttribute("id", id); 96 | doc.appendChild(elRoot); 97 | 98 | for (ClarinWord word : words_xml) { 99 | Element elWord = doc.createElement("word"); 100 | elWord.setAttribute("id", word.id); 101 | elWord.setAttribute("beg", String.format("%2.3f", word.start_time)); 102 | elWord.setAttribute("end", String.format("%2.3f", word.end_time)); 103 | elWord.setAttribute("word", word.word); 104 | elRoot.appendChild(elWord); 105 | 106 | if (word.phoneme != null) { 107 | for (ClarinPhoneme phone : word.phoneme) { 108 | Element elPhoneme = doc.createElement("phoneme"); 109 | elPhoneme.setAttribute("beg", 110 | String.format("%2.3f", phone.start_time)); 111 | elPhoneme.setAttribute("end", 112 | "" + String.format("%2.3f", phone.end_time)); 113 | elPhoneme.setTextContent(phone.phoneme); 114 | elWord.appendChild(elPhoneme); 115 | } 116 | } 117 | } 118 | 119 | Transformer trans = TransformerFactory.newInstance().newTransformer(); 120 | 121 | trans.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", 122 | "4"); 123 | trans.setOutputProperty(OutputKeys.METHOD, "xml"); 124 | trans.setOutputProperty(OutputKeys.INDENT, "yes"); 125 | 126 | Source source = new DOMSource(doc); 127 | Result result = new StreamResult(file); 128 | trans.transform(source, result); 129 | 130 | } 131 | 132 | public int size() { 133 | return words.length; 134 | } 135 | 136 | public void checkWords() { 137 | Pattern p = Pattern.compile("[a-zA-ZąĄćĆęĘłŁńŃoÓśŚźŹżŻ]+"); 138 | for (ClarinWord w : words) { 139 | Matcher m = p.matcher(w.word); 140 | if (!m.matches()) 141 | w.recognizable = false; 142 | else 143 | w.recognizable = true; 144 | } 145 | } 146 | 147 | public void processSegmentation(Segmentation segmentation) 148 | throws RuntimeException { 149 | 150 | words_xml = new Vector(); 151 | 152 | List wseg = segmentation.tiers.get(0).segments; 153 | List pseg = segmentation.tiers.get(1).segments; 154 | 155 | int i = 0; 156 | for (Segment w : wseg) { 157 | 158 | while (i < words.length && !words[i].recognizable) 159 | i++; 160 | 161 | if (i >= words.length) 162 | break; 163 | 164 | while (i < words.length && !w.name.equals(words[i].word)) { 165 | Log.warn("Deletion! Fixing..."); 166 | i++; 167 | } 168 | 169 | if (i >= words.length) 170 | break; 171 | 172 | words[i].start_time = w.start_time; 173 | words[i].end_time = w.end_time; 174 | words[i].phoneme = new Vector(); 175 | words_xml.add(words[i]); 176 | 177 | for (Segment p : pseg) { 178 | if (p.start_time - w.start_time > -0.001 179 | && p.end_time - w.end_time < 0.001) { 180 | ClarinPhoneme ph = new ClarinPhoneme(); 181 | ph.phoneme = p.name; 182 | ph.start_time = p.start_time; 183 | ph.end_time = p.end_time; 184 | words[i].phoneme.add(ph); 185 | } 186 | 187 | double dist = p.start_time - w.end_time; 188 | if (dist < 0.001 && dist > -0.001 && p.name.equals("sil")) { 189 | ClarinWord sil = new ClarinWord(); 190 | sil.start_time = p.start_time; 191 | sil.end_time = p.end_time; 192 | sil.id = "n/a"; 193 | sil.word = "SIL"; 194 | sil.phoneme = null; 195 | words_xml.add(sil); 196 | } 197 | } 198 | 199 | i++; 200 | } 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/programs/Essentia.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.programs; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileOutputStream; 6 | import java.io.FileReader; 7 | import java.io.IOException; 8 | import java.nio.ByteBuffer; 9 | import java.nio.ByteOrder; 10 | import java.util.Vector; 11 | 12 | import javax.xml.parsers.DocumentBuilderFactory; 13 | import javax.xml.parsers.ParserConfigurationException; 14 | import javax.xml.transform.OutputKeys; 15 | import javax.xml.transform.Result; 16 | import javax.xml.transform.Source; 17 | import javax.xml.transform.Transformer; 18 | import javax.xml.transform.TransformerException; 19 | import javax.xml.transform.TransformerFactory; 20 | import javax.xml.transform.TransformerFactoryConfigurationError; 21 | import javax.xml.transform.dom.DOMSource; 22 | import javax.xml.transform.stream.StreamResult; 23 | 24 | import org.w3c.dom.Document; 25 | import org.w3c.dom.Element; 26 | 27 | import pl.edu.pjwstk.kaldi.utils.Log; 28 | import pl.edu.pjwstk.kaldi.utils.ProgramLauncher; 29 | import pl.edu.pjwstk.kaldi.utils.Settings; 30 | 31 | public class Essentia { 32 | 33 | public static void pitch(File input, File output) { 34 | 35 | String[] cmd = new String[] { 36 | Settings.essentia_pitch_bin.getAbsolutePath(), 37 | input.getAbsolutePath(), output.getAbsolutePath() }; 38 | 39 | ProgramLauncher launcher = new ProgramLauncher(cmd); 40 | 41 | Log.verbose("Calculating Pitch using Essentia: " 42 | + input.getAbsolutePath() + " " + output.getAbsolutePath()); 43 | launcher.run(); 44 | Log.verbose("Done."); 45 | } 46 | 47 | public static class PitchMark { 48 | public double time; 49 | public double pitch; 50 | public double confidence; 51 | 52 | public PitchMark(double t, double p, double c) { 53 | time = t; 54 | pitch = p; 55 | confidence = c; 56 | } 57 | } 58 | 59 | public static Vector loadPitchYaml(File file) throws IOException { 60 | Vector ret = new Vector(); 61 | 62 | BufferedReader reader = new BufferedReader(new FileReader(file)); 63 | 64 | String line; 65 | 66 | double time_step = 0.01; 67 | double time_off = 0.125; 68 | 69 | while ((line = reader.readLine()) != null) { 70 | line = line.trim(); 71 | if (line.startsWith("pitch:")) { 72 | int beg = line.indexOf('['); 73 | if (beg < 0) { 74 | reader.close(); 75 | throw new IOException("Parsing error"); 76 | } 77 | int end = line.indexOf(']'); 78 | if (end < 0) { 79 | reader.close(); 80 | throw new IOException("Parsing error"); 81 | } 82 | 83 | line = line.substring(beg + 1, end).trim(); 84 | 85 | String tok[] = line.split("\\s*,\\s*"); 86 | 87 | int i = 0; 88 | for (String num : tok) { 89 | double val = Double.parseDouble(num); 90 | if (ret.size() <= i) { 91 | ret.addElement(new PitchMark(time_off + i * time_step, 92 | val, 1)); 93 | } else { 94 | ret.get(i).pitch = val; 95 | } 96 | i++; 97 | } 98 | 99 | } 100 | 101 | if (line.startsWith("pitch_confidence:")) { 102 | int beg = line.indexOf('['); 103 | if (beg < 0) { 104 | reader.close(); 105 | throw new IOException("Parsing error"); 106 | } 107 | int end = line.indexOf(']'); 108 | if (end < 0) { 109 | reader.close(); 110 | throw new IOException("Parsing error"); 111 | } 112 | 113 | line = line.substring(beg + 1, end).trim(); 114 | 115 | String tok[] = line.split("\\s*,\\s*"); 116 | 117 | int i = 0; 118 | for (String num : tok) { 119 | double val = Double.parseDouble(num); 120 | if (ret.size() <= i) { 121 | ret.addElement(new PitchMark(time_off + i * time_step, 122 | 0, val)); 123 | } else { 124 | ret.get(i).confidence = val; 125 | } 126 | i++; 127 | } 128 | 129 | } 130 | } 131 | 132 | reader.close(); 133 | 134 | return ret; 135 | } 136 | 137 | public static void savePitchMarksToXML(String audio_id, 138 | Vector pitch, File xml) 139 | throws TransformerFactoryConfigurationError, 140 | ParserConfigurationException, TransformerException { 141 | 142 | Document doc = DocumentBuilderFactory.newInstance() 143 | .newDocumentBuilder().newDocument(); 144 | 145 | Element elRoot = doc.createElement("audio-segment"); 146 | elRoot.setAttribute("id", audio_id); 147 | doc.appendChild(elRoot); 148 | 149 | for (PitchMark p : pitch) { 150 | Element elPitch = doc.createElement("pitch"); 151 | elPitch.setAttribute("t", String.format("%2.3f", p.time)); 152 | elPitch.setAttribute("c", String.format("%2.3f", p.confidence)); 153 | elPitch.setTextContent(String.format("%2.3f", p.pitch)); 154 | elRoot.appendChild(elPitch); 155 | } 156 | 157 | Transformer trans = TransformerFactory.newInstance().newTransformer(); 158 | 159 | trans.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", 160 | "4"); 161 | trans.setOutputProperty(OutputKeys.METHOD, "xml"); 162 | trans.setOutputProperty(OutputKeys.INDENT, "yes"); 163 | 164 | Source source = new DOMSource(doc); 165 | Result result = new StreamResult(xml); 166 | trans.transform(source, result); 167 | } 168 | 169 | public static void generateSignalFromPitchMarks(Vector pitch, 170 | File sound_file) throws IOException { 171 | double win_dur = 0.025; 172 | double Fs = 16000; 173 | double T = pitch.lastElement().time + win_dur / 2; 174 | 175 | ByteBuffer sound_buf = ByteBuffer.allocate((int) (2.0 * T * Fs)); 176 | 177 | sound_buf.order(ByteOrder.LITTLE_ENDIAN); 178 | 179 | for (int i = 0; i < pitch.size() - 1; i++) { 180 | PitchMark a = pitch.get(i); 181 | PitchMark b = pitch.get(i + 1); 182 | 183 | if (b.time - a.time > 0.1) 184 | continue; 185 | 186 | int s = (int) (a.time * Fs); 187 | int num = (int) ((b.time - a.time) * Fs); 188 | double f = a.pitch; 189 | double df = (b.pitch - a.pitch) / num; 190 | for (int j = s; j < s + num; j++) { 191 | short sval; 192 | 193 | sval = (short) (Short.MAX_VALUE / 2.0 * Math.sin(2 * Math.PI 194 | * f * j / Fs)); 195 | sound_buf.putShort(2 * j, sval); 196 | 197 | j++; 198 | sval = (short) (Short.MAX_VALUE / 2.0 * Math.sin(2 * Math.PI 199 | * f * j / Fs)); 200 | sound_buf.putShort(2 * j, sval); 201 | 202 | f += df; 203 | f += df; 204 | } 205 | } 206 | 207 | FileOutputStream fos = new FileOutputStream(sound_file); 208 | 209 | fos.write(sound_buf.array()); 210 | 211 | fos.flush(); 212 | 213 | fos.close(); 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/grammars/Grammar.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.grammars; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.PrintWriter; 6 | import java.util.*; 7 | 8 | /** 9 | * Created by guest on 5/23/16. 10 | */ 11 | 12 | public class Grammar { 13 | 14 | static class Arc { 15 | public int from,to,id; 16 | public Arc(int from, int to, int id) { 17 | this.from=from; 18 | this.to=to; 19 | this.id=id; 20 | } 21 | } 22 | 23 | static Map eps_map; 24 | static List eps_list; 25 | static { 26 | eps_map=new TreeMap(); 27 | eps_map.put("",0); 28 | eps_list=new LinkedList(); 29 | eps_list.add(""); 30 | 31 | } 32 | 33 | List arcs=new LinkedList(); 34 | int node_count=1; 35 | Set end_nodes=new HashSet(); 36 | Map word_map=new HashMap(eps_map); 37 | List word_list=new ArrayList(eps_list); 38 | 39 | public int getLastNode() 40 | { 41 | return node_count-1; 42 | } 43 | 44 | public void copySymbols(Grammar other) { 45 | word_map.putAll(other.word_map); 46 | word_list.addAll(other.word_list); 47 | } 48 | 49 | private int getWordID(String word) 50 | { 51 | if(word_map.containsKey(word)) 52 | return word_map.get(word); 53 | else { 54 | word_list.add(word); 55 | word_map.put(word,word_list.size()-1); 56 | return word_list.size()-1; 57 | } 58 | } 59 | 60 | public void setWord(String word) { 61 | 62 | assert(arcs.isEmpty()); 63 | 64 | int wid=getWordID(word); 65 | arcs.add(new Arc(0,1,wid)); 66 | end_nodes.add(1); 67 | node_count=2; 68 | } 69 | 70 | public void setWordList(List words) { 71 | 72 | assert(arcs.isEmpty()); 73 | 74 | for (String word:words) { 75 | int wid=getWordID(word); 76 | arcs.add(new Arc(0,1,wid)); 77 | } 78 | end_nodes.add(1); 79 | node_count=2; 80 | } 81 | 82 | public void setWordSequence(List words) { 83 | 84 | assert(arcs.isEmpty()); 85 | 86 | int node=0; 87 | for (String word:words) { 88 | int wid=getWordID(word); 89 | arcs.add(new Arc(node,node+1,wid)); 90 | node++; 91 | } 92 | end_nodes.add(node); 93 | node_count=node; 94 | } 95 | 96 | 97 | public Map getWordListMapping(Grammar other) { 98 | 99 | Map ret=new TreeMap(); 100 | int id=0; 101 | for(String word:other.word_list) { 102 | ret.put(id,getWordID(word)); 103 | id++; 104 | } 105 | return ret; 106 | } 107 | 108 | public void attach(Grammar other) { 109 | attach(other,node_count-1); 110 | } 111 | 112 | public void attach(Grammar other, int node) { 113 | 114 | Map id_map=getWordListMapping(other); 115 | 116 | int offset=node_count-1; 117 | 118 | for(Arc arc:other.arcs) { 119 | int from=arc.from; 120 | if(from==0) 121 | from=node; 122 | else 123 | from+=offset; 124 | if(from>=node_count) 125 | node_count=from+1; 126 | 127 | int to=arc.to; 128 | if(to==0) 129 | to=node; 130 | else 131 | to+=offset; 132 | if(to>=node_count) 133 | node_count=to+1; 134 | 135 | int id=id_map.get(arc.id); 136 | 137 | arcs.add(new Arc(from,to,id)); 138 | } 139 | 140 | end_nodes.remove(node); 141 | 142 | for(Integer id:other.end_nodes) 143 | end_nodes.add(id+offset); 144 | } 145 | 146 | public void merge(Grammar other, Map links) 147 | { 148 | Map id_map=getWordListMapping(other); 149 | 150 | int offset=node_count-1; 151 | 152 | for(Arc arc:other.arcs) { 153 | int from=arc.from; 154 | if(links.containsKey(from)) 155 | from=links.get(from); 156 | else 157 | from+=offset; 158 | if(from>=node_count) 159 | node_count=from+1; 160 | 161 | int to=arc.to; 162 | if(links.containsKey(to)) 163 | to=links.get(to); 164 | else 165 | to+=offset; 166 | if(to>=node_count) 167 | node_count=to+1; 168 | 169 | int id=id_map.get(arc.id); 170 | 171 | arcs.add(new Arc(from,to,id)); 172 | } 173 | 174 | for(Map.Entry e:links.entrySet()) 175 | end_nodes.remove(e.getValue()); 176 | 177 | for(Integer id:other.end_nodes) { 178 | if(links.containsKey(id)) 179 | end_nodes.add(links.get(id)); 180 | else 181 | end_nodes.add(id + offset); 182 | } 183 | } 184 | 185 | public Grammar clone() { 186 | Grammar ret=new Grammar(); 187 | ret.arcs.addAll(arcs); 188 | ret.node_count=node_count; 189 | ret.end_nodes.addAll(end_nodes); 190 | ret.word_map=new HashMap(word_map); 191 | ret.word_list=new ArrayList(word_list); 192 | return ret; 193 | } 194 | 195 | public int fixend() 196 | { 197 | int end=node_count; 198 | node_count++; 199 | for(Integer id:end_nodes) 200 | { 201 | arcs.add(new Arc(id,end,0)); 202 | } 203 | end_nodes.clear(); 204 | end_nodes.add(end); 205 | return end; 206 | } 207 | 208 | 209 | public void save(File fst, File wordlist) throws IOException { 210 | 211 | PrintWriter writer = new PrintWriter(fst); 212 | 213 | for(Arc arc:arcs) { 214 | String w=word_list.get(arc.id); 215 | writer.println(arc.from+" "+arc.to+" "+w+" "+w); 216 | } 217 | 218 | for(Integer id:end_nodes) { 219 | writer.println(id); 220 | } 221 | 222 | writer.close(); 223 | 224 | if(wordlist!=null) { 225 | writer = new PrintWriter(wordlist); 226 | 227 | int id = 0; 228 | for (String w : word_list) { 229 | writer.println(w + " " + id); 230 | id++; 231 | } 232 | 233 | writer.close(); 234 | } 235 | } 236 | 237 | } 238 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/ExperimentMain.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi; 2 | 3 | import pl.edu.pjwstk.kaldi.files.CTM; 4 | import pl.edu.pjwstk.kaldi.files.Segmentation; 5 | import pl.edu.pjwstk.kaldi.files.TextGrid; 6 | import pl.edu.pjwstk.kaldi.grammars.Akt; 7 | import pl.edu.pjwstk.kaldi.grammars.Grammar; 8 | import pl.edu.pjwstk.kaldi.grammars.Numbers; 9 | import pl.edu.pjwstk.kaldi.programs.KaldiScripts; 10 | import pl.edu.pjwstk.kaldi.programs.KaldiUtils; 11 | import pl.edu.pjwstk.kaldi.programs.Transcriber; 12 | import pl.edu.pjwstk.kaldi.utils.FileUtils; 13 | import pl.edu.pjwstk.kaldi.utils.Log; 14 | import pl.edu.pjwstk.kaldi.utils.Settings; 15 | import pl.edu.pjwstk.kaldi.utils.WAV; 16 | 17 | import javax.xml.soap.Text; 18 | import java.io.File; 19 | import java.io.PrintWriter; 20 | import java.util.*; 21 | 22 | public class ExperimentMain { 23 | 24 | public static void main(String[] args) { 25 | 26 | try { 27 | File work_dir = new File("work"); 28 | 29 | Log.init("Experiment", false); 30 | Transcriber.init(); 31 | KaldiUtils.init(); 32 | KaldiUtils.test(); 33 | KaldiScripts.init(work_dir); 34 | KaldiScripts.test(); 35 | 36 | Log.info("Starting experiment..."); 37 | 38 | File lang_dir = KaldiScripts.lang_dir; 39 | File temp_dir = KaldiScripts.temp_dir; 40 | 41 | Grammar grammar = Akt.zgonu(); 42 | 43 | File fst_txt = new File(lang_dir, "grammar.txt"); 44 | File syms = new File(lang_dir, "grammar.syms"); 45 | grammar.save(fst_txt, syms); 46 | 47 | File fst_bin = new File(lang_dir, "grammar.bin"); 48 | KaldiUtils.fstcompile(syms, syms, false, false, fst_txt, fst_bin); 49 | 50 | File fst_sort = new File(lang_dir, "grammar.sort"); 51 | KaldiUtils.fstarcsort("ilabel", fst_bin, fst_sort); 52 | 53 | File fst_det = new File(lang_dir, "grammar.det"); 54 | KaldiUtils.fstdeterminizestar(fst_sort, fst_det, false); 55 | 56 | File fst_min = new File(lang_dir, "grammar_min"); 57 | KaldiUtils.fstminimizeencoded(fst_det, fst_min); 58 | 59 | Log.info("Generating random sequences from grammar..."); 60 | Log.disableOutput(); 61 | Random rand = new Random((new Date()).getTime()); 62 | PrintWriter rand_samples = new PrintWriter(new File(work_dir, "random_grammar.txt")); 63 | for (int i = 0; i < 100; i++) { 64 | File fst_rand = new File(lang_dir, "rand.fst"); 65 | KaldiUtils.fstrandgen(fst_min, fst_rand, rand.nextInt()); 66 | 67 | File rand_txt = new File(lang_dir, "rand.txt"); 68 | KaldiUtils.fstprint(fst_rand, rand_txt, syms, syms); 69 | 70 | Vector words = FileUtils.cut(rand_txt, 2); 71 | rand_samples.println("" + words); 72 | } 73 | Log.enableOutput(); 74 | 75 | 76 | File fst_dot = new File(work_dir, "grammar.dot"); 77 | KaldiUtils.fstdraw(fst_min, fst_dot, syms, syms, true); 78 | 79 | File v_temp = new File(lang_dir, "vocab.tmp"); 80 | File vocab = new File(lang_dir, "vocab"); 81 | 82 | HashSet skip = new HashSet(); 83 | skip.add(""); 84 | 85 | FileUtils.getSymsFromList(syms, v_temp, skip); 86 | 87 | Vector lines = new Vector(); 88 | lines.add(""); 89 | lines.add("SIL"); 90 | 91 | FileUtils.appendLines(v_temp, "UTF-8", vocab, "UTF-8", lines, true); 92 | 93 | File dict_raw = new File(lang_dir, "dict_raw"); 94 | File dict = new File(lang_dir, "dict"); 95 | Transcriber.transcribe(vocab, Settings.default_encoding, dict_raw, Settings.default_encoding, false); 96 | FileUtils.sort_uniq(dict_raw, dict, Settings.default_encoding); 97 | 98 | KaldiScripts.prepare_lang(KaldiScripts.lang_dir, dict, vocab); 99 | 100 | 101 | File fst_txt2 = new File(lang_dir, "grammar_2.txt"); 102 | KaldiUtils.fstprint(fst_min, fst_txt2, syms, syms); 103 | 104 | File fst_final = new File(lang_dir, "G.fst"); 105 | File words = new File(lang_dir, "words.txt"); 106 | KaldiUtils.fstcompile(words, words, false, false, fst_txt2, fst_final); 107 | 108 | 109 | 110 | 111 | File input_txt = new File("/home/guest/Desktop/GENEA/audio/metryka_zgonu.txt"); 112 | Vector input_tok = FileUtils.readTokens(input_txt, "UTF-8"); 113 | Grammar input_lin = new Grammar(); 114 | input_lin.setWordSequence(input_tok); 115 | File input_fst = new File(lang_dir, "input.fst"); 116 | input_lin.save(input_fst, null); 117 | 118 | File input_bin = new File(lang_dir, "input.bin"); 119 | KaldiUtils.fstcompile(words, words, false, false, input_fst, input_bin); 120 | 121 | File output_bin = new File(lang_dir, "output.bin"); 122 | KaldiUtils.fstcompose(input_bin, fst_final, output_bin); 123 | 124 | if (input_bin.length() == output_bin.length()) { 125 | Log.info("Files are the same. Seems like everything is ok."); 126 | } else { 127 | Log.warn("Files are NOT the same! There may be a problem!"); 128 | } 129 | 130 | 131 | 132 | File Ldisamb = new File(lang_dir, "L_disambig.fst"); 133 | File HCLG = new File(lang_dir, "HCLG.fst"); 134 | KaldiScripts.makeHCLG(fst_final, Ldisamb, HCLG); 135 | 136 | File wav = new File("/home/guest/Desktop/GENEA/audio/metryka_zgonu_16k.wav"); 137 | KaldiScripts.decode(wav, false); 138 | 139 | File lattice = new File(temp_dir, "aligned"); 140 | File lat_fst_txt = new File(temp_dir, "lat_fst.txt"); 141 | KaldiUtils.lattice_to_fst(lattice, lat_fst_txt); 142 | 143 | File lat_fst_tmp = new File(temp_dir, "lat_fst_tmp.txt"); 144 | FileUtils.tail(lat_fst_txt, lat_fst_tmp, 1); 145 | 146 | KaldiUtils.int2sym("3-4", words, lat_fst_tmp, lat_fst_txt); 147 | 148 | File lat_fst = new File(temp_dir, "lat.fst"); 149 | KaldiUtils.fstcompile(words, words, false, false, lat_fst_txt, lat_fst); 150 | 151 | File lat_dot = new File(work_dir, "lattice.dot"); 152 | KaldiUtils.fstdraw(lat_fst, lat_dot, words, words, true); 153 | 154 | File words_ctm = new File(temp_dir, "words.ctm"); 155 | CTM ctm = new CTM(); 156 | ctm.read(words_ctm); 157 | 158 | File out_tg = new File(work_dir, "out.TextGrid"); 159 | TextGrid textgrid = new TextGrid(ctm); 160 | textgrid.write(out_tg); 161 | 162 | Segmentation align_seg = KaldiScripts.align(wav, input_txt, false); 163 | TextGrid tg_align=new TextGrid(align_seg); 164 | tg_align.write(new File(work_dir,"align.TextGrid")); 165 | 166 | Log.info("Done!"); 167 | 168 | } catch (Exception e) { 169 | e.printStackTrace(); 170 | } 171 | 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/utils/Log.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.utils; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.io.IOException; 7 | import java.io.OutputStream; 8 | import java.io.PrintWriter; 9 | import java.text.SimpleDateFormat; 10 | import java.util.Date; 11 | import java.util.logging.Handler; 12 | import java.util.logging.Level; 13 | import java.util.logging.LogRecord; 14 | import java.util.logging.Logger; 15 | 16 | public class Log { 17 | 18 | public static class Stream extends OutputStream { 19 | 20 | private StringBuffer str = new StringBuffer(); 21 | 22 | private String prefix = "OUT>>"; 23 | 24 | public Stream() { 25 | } 26 | 27 | public Stream(String prefix) { 28 | this.prefix = prefix; 29 | } 30 | 31 | @Override 32 | public void write(int b) throws IOException { 33 | if (b == '\n' || b == '\r') { 34 | Log.verbose(prefix + str.toString()); 35 | str.setLength(0); 36 | } else 37 | str.append((char) b); 38 | } 39 | 40 | } 41 | 42 | private static class FileLog extends Handler { 43 | 44 | private PrintWriter fileWriter; 45 | 46 | public FileLog(String prog_name, boolean append) 47 | throws FileNotFoundException { 48 | 49 | this(new File(Settings.log_dir, prog_name + "_main.log"), append); 50 | } 51 | 52 | public FileLog(File logFile, boolean append) 53 | throws FileNotFoundException { 54 | 55 | Settings.log_dir.mkdirs(); 56 | 57 | fileWriter = new PrintWriter(new FileOutputStream(logFile, append)); 58 | } 59 | 60 | @Override 61 | public void close() throws SecurityException { 62 | fileWriter.close(); 63 | 64 | } 65 | 66 | @Override 67 | public void flush() { 68 | fileWriter.flush(); 69 | } 70 | 71 | private SimpleDateFormat sdf = new SimpleDateFormat( 72 | "[yyyy-MM-dd HH:mm:ss]"); 73 | 74 | @Override 75 | public void publish(LogRecord lr) { 76 | 77 | String loglevel = lr.getLevel().getName(); 78 | if (lr.getLevel() == Level.SEVERE) 79 | loglevel = "ERROR"; 80 | 81 | String line = sdf.format(new Date(lr.getMillis())) + " <" 82 | + loglevel + "> " + lr.getMessage(); 83 | 84 | fileWriter.println(line); 85 | fileWriter.flush(); 86 | 87 | Throwable th = lr.getThrown(); 88 | if (th != null) { 89 | fileWriter.println("Exception: " + th.toString()); 90 | 91 | fileWriter.println("Stack trace:"); 92 | 93 | for (StackTraceElement el : th.getStackTrace()) { 94 | fileWriter.println("**" + el.getFileName() + ":" 95 | + el.getLineNumber() + " in " + el.getClassName() 96 | + ":" + el.getMethodName()); 97 | } 98 | 99 | fileWriter.flush(); 100 | } 101 | 102 | } 103 | } 104 | 105 | public static class SimpleConsoleLog extends Handler { 106 | 107 | @Override 108 | public void close() throws SecurityException { 109 | } 110 | 111 | @Override 112 | public void flush() { 113 | System.out.flush(); 114 | System.err.flush(); 115 | } 116 | 117 | private SimpleDateFormat sdf = new SimpleDateFormat( 118 | "[yyyy-MM-dd HH:mm:ss]"); 119 | 120 | @Override 121 | public void publish(LogRecord lr) { 122 | 123 | String loglevel = lr.getLevel().getName(); 124 | if (lr.getLevel() == Level.SEVERE) 125 | loglevel = "ERROR"; 126 | 127 | String line = sdf.format(new Date(lr.getMillis())) + " <" 128 | + loglevel + "> " + lr.getMessage(); 129 | 130 | if (lr.getLevel() == Level.SEVERE) 131 | System.err.println(line); 132 | else 133 | System.out.println(line); 134 | 135 | Throwable th = lr.getThrown(); 136 | if (th != null) { 137 | th.printStackTrace(); 138 | } 139 | 140 | } 141 | 142 | } 143 | 144 | private static Logger logger = null; 145 | private static boolean suppress_output = false; 146 | 147 | public static void init(String prog_name, boolean append) 148 | throws SecurityException, FileNotFoundException { 149 | 150 | initFile(prog_name, append); 151 | 152 | logger.addHandler(new SimpleConsoleLog()); 153 | } 154 | 155 | public static void initFile(String prog_name, boolean append) 156 | throws SecurityException, FileNotFoundException { 157 | 158 | logger = Logger.getLogger(Log.class.getPackage().getName()); 159 | 160 | logger.setUseParentHandlers(false); 161 | 162 | logger.addHandler(new FileLog(prog_name, append)); 163 | 164 | logger.setLevel(Level.ALL); 165 | } 166 | 167 | public static void setLevel(Level level) { 168 | if (logger != null) 169 | logger.setLevel(level); 170 | } 171 | 172 | public static void disableOutput() { 173 | suppress_output=true; 174 | } 175 | 176 | public static void enableOutput() { 177 | suppress_output=false; 178 | } 179 | 180 | public static void verbose(String message) { 181 | 182 | if(suppress_output) return; 183 | 184 | if (logger == null) { 185 | System.out.println("V:" + message); 186 | return; 187 | } 188 | 189 | logger.log(Level.FINE, message); 190 | } 191 | 192 | public static void info(String message) { 193 | 194 | if(suppress_output) return; 195 | 196 | if (logger == null) { 197 | System.out.println("I:" + message); 198 | return; 199 | } 200 | 201 | logger.log(Level.INFO, message); 202 | } 203 | 204 | public static void warn(String message) { 205 | 206 | if(suppress_output) return; 207 | 208 | if (logger == null) { 209 | System.out.println("W:" + message); 210 | return; 211 | } 212 | 213 | logger.log(Level.WARNING, message); 214 | } 215 | 216 | public static void error(String message) { 217 | 218 | if(suppress_output) return; 219 | 220 | if (logger == null) { 221 | System.out.println("E:" + message); 222 | return; 223 | } 224 | 225 | logger.log(Level.SEVERE, message); 226 | } 227 | 228 | public static void error(String message, Throwable e) { 229 | 230 | if(suppress_output) return; 231 | 232 | if (logger == null) { 233 | System.out.println("E:" + message); 234 | e.printStackTrace(); 235 | return; 236 | } 237 | 238 | logger.log(Level.SEVERE, message, e); 239 | } 240 | 241 | public static void addHandler(Handler h) { 242 | if (logger != null) 243 | logger.addHandler(h); 244 | } 245 | 246 | } 247 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/files/Segmentation.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.files; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.Collections; 6 | import java.util.Iterator; 7 | import java.util.LinkedList; 8 | import java.util.List; 9 | 10 | public abstract class Segmentation { 11 | 12 | public static class Segment implements Comparable { 13 | public double start_time; 14 | public double end_time; 15 | public String name; 16 | public double confidence; 17 | public boolean used = false; 18 | 19 | @Override 20 | public int compareTo(Segment o) { 21 | if (start_time < o.start_time) 22 | return -1; 23 | else if (start_time > o.start_time) 24 | return 1; 25 | else 26 | return 0; 27 | } 28 | } 29 | 30 | public static class Tier { 31 | public String name; 32 | public List segments = new LinkedList(); 33 | 34 | public double min() { 35 | 36 | if (segments.isEmpty()) 37 | return 0; 38 | 39 | double min = segments.get(0).start_time; 40 | for (Segment s : segments) 41 | if (min > s.start_time) 42 | min = s.start_time; 43 | return min; 44 | } 45 | 46 | public double max() { 47 | 48 | if (segments.isEmpty()) 49 | return 0; 50 | 51 | double max = segments.get(0).end_time; 52 | for (Segment s : segments) 53 | if (max < s.end_time) 54 | max = s.end_time; 55 | return max; 56 | } 57 | 58 | public void add(double start, double end, String name) { 59 | add(start, end, name, 1.0); 60 | } 61 | 62 | public void add(double start, double end, String name, double confidence) { 63 | Segment segment = new Segment(); 64 | segment.start_time = start; 65 | segment.end_time = end; 66 | segment.name = name; 67 | segment.confidence = confidence; 68 | segments.add(segment); 69 | } 70 | 71 | public void sort() { 72 | Collections.sort(segments); 73 | } 74 | 75 | public void mergeOverlappingAndAdjecent() { 76 | sort(); 77 | Segment old = null; 78 | Iterator iter = segments.iterator(); 79 | while (iter.hasNext()) { 80 | Segment seg = iter.next(); 81 | if (old != null && old.name != null && seg.name != null) { 82 | if (seg.end_time > old.end_time) 83 | old.end_time = seg.end_time; 84 | old.confidence = (seg.confidence + old.confidence) / 2; 85 | old.name = old.name + " " + seg.name; 86 | iter.remove(); 87 | } else { 88 | 89 | if (old != null && old.name == null && seg.name != null) { 90 | seg.start_time = old.end_time; 91 | } 92 | 93 | if (seg.name == null && old != null && old.name != null) { 94 | old.end_time = seg.start_time; 95 | } 96 | 97 | old = seg; 98 | } 99 | 100 | } 101 | } 102 | 103 | public void removeNull() { 104 | Iterator iter = segments.iterator(); 105 | while (iter.hasNext()) { 106 | Segment seg = iter.next(); 107 | if (seg.name == null) 108 | iter.remove(); 109 | } 110 | } 111 | } 112 | 113 | public List tiers = new LinkedList(); 114 | 115 | public double min() { 116 | if (tiers.isEmpty()) 117 | return 0; 118 | double min = tiers.get(0).min(); 119 | for (Tier t : tiers) { 120 | double d = t.min(); 121 | if (min > d) 122 | min = d; 123 | } 124 | return min; 125 | } 126 | 127 | public double max() { 128 | if (tiers.isEmpty()) 129 | return 0; 130 | double max = tiers.get(0).max(); 131 | for (Tier t : tiers) { 132 | double d = t.max(); 133 | if (max < d) 134 | max = d; 135 | } 136 | return max; 137 | } 138 | 139 | public void sort() { 140 | for (Tier tier : tiers) 141 | tier.sort(); 142 | } 143 | 144 | public void renameTier(int tier, String name) { 145 | while (tiers.size() <= tier) 146 | tiers.add(new Tier()); 147 | 148 | tiers.get(tier).name = name; 149 | } 150 | 151 | public void addSegment(int tier, double start, double end, String name) { 152 | while (tiers.size() <= tier) 153 | tiers.add(new Tier()); 154 | 155 | tiers.get(tier).add(start, end, name); 156 | } 157 | 158 | public void addSegment(int tier, double start, double end, String name, double confidence) { 159 | while (tiers.size() <= tier) 160 | tiers.add(new Tier()); 161 | 162 | tiers.get(tier).add(start, end, name, confidence); 163 | } 164 | 165 | public void addTiers(Segmentation segmentation) { 166 | tiers.addAll(segmentation.tiers); 167 | } 168 | 169 | public void addTier(Segmentation segmentation, int idx) { 170 | if (idx >= 0 && idx < segmentation.tiers.size()) 171 | tiers.add(segmentation.tiers.get(idx)); 172 | } 173 | 174 | public abstract void read(File file) throws IOException; 175 | 176 | public abstract void write(File file) throws IOException; 177 | 178 | public void dump() { 179 | for (Tier tier : tiers) { 180 | System.out.println("" + tier.name + ""); 181 | for (Segment segment : tier.segments) { 182 | System.out.format("%4.4f %4.4f %s", segment.start_time, segment.end_time, segment.name); 183 | System.out.println(); 184 | } 185 | } 186 | } 187 | 188 | public String getLabel(int tier_idx, double fr_start, double fr_end) { 189 | 190 | String ret = null; 191 | double max_overlap = 0; 192 | Tier tier = tiers.get(tier_idx); 193 | for (Segment segment : tier.segments) { 194 | double o = overlap(fr_start, fr_end, segment.start_time, segment.end_time); 195 | if (o > max_overlap) { 196 | max_overlap = o; 197 | ret = segment.name; 198 | } 199 | } 200 | 201 | return ret; 202 | } 203 | 204 | private double overlap(double a_start, double a_end, double b_start, double b_end) { 205 | 206 | if (a_end < b_start || a_start > b_end) 207 | return 0; 208 | 209 | if (a_start < b_start) { 210 | if (a_end < b_end) 211 | return a_end - b_start; 212 | else 213 | return b_end - b_start; 214 | } 215 | 216 | if (a_end > b_end) { 217 | return b_end - a_start; 218 | } 219 | 220 | return a_end - a_start; 221 | 222 | } 223 | 224 | public void link(Segmentation other) { 225 | tiers = other.tiers; 226 | } 227 | 228 | public void mergeOverlappingAndAdjecent(int tier) { 229 | 230 | Segment old = null; 231 | Iterator iter = tiers.get(tier).segments.iterator(); 232 | while (iter.hasNext()) { 233 | Segment seg = iter.next(); 234 | if (old != null && (old.end_time - seg.start_time) > -0.2) { 235 | old.end_time = seg.end_time; 236 | old.confidence = (seg.confidence + old.confidence) / 2; 237 | old.name = old.name + " " + seg.name; 238 | iter.remove(); 239 | } else { 240 | old = seg; 241 | } 242 | 243 | } 244 | 245 | } 246 | 247 | public void appendSegmenation(Segmentation segmentation, double offset) throws RuntimeException { 248 | 249 | if (segmentation.tiers.size() != tiers.size()) 250 | throw new RuntimeException("Segmentations must match in tier count!"); 251 | 252 | /* 253 | * for (int i = 0; i < tiers.size(); i++) if 254 | * (!segmentation.tiers.get(i).name.equals(tiers.get(i).name)) throw new 255 | * RuntimeException("Segmentations tiers must match in namet!"); 256 | */ 257 | 258 | for (int i = 0; i < tiers.size(); i++) { 259 | Tier thistier = tiers.get(i); 260 | Tier othertier = segmentation.tiers.get(i); 261 | 262 | for (Segment oseg : othertier.segments) { 263 | Segment tseg = new Segment(); 264 | tseg.confidence = oseg.confidence; 265 | tseg.name = oseg.name; 266 | tseg.start_time = oseg.start_time + offset; 267 | tseg.end_time = oseg.end_time + offset; 268 | thistier.segments.add(tseg); 269 | } 270 | } 271 | } 272 | 273 | public void offsetSegments(double offset) { 274 | for (Tier tier : tiers) 275 | for (Segment seg : tier.segments) { 276 | seg.start_time += offset; 277 | seg.end_time += offset; 278 | } 279 | } 280 | 281 | } 282 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/utils/Diff.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.utils; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | import java.util.StringTokenizer; 6 | import java.util.Vector; 7 | 8 | import pl.edu.pjwstk.kaldi.files.Segmentation; 9 | import pl.edu.pjwstk.kaldi.files.Segmentation.Segment; 10 | import pl.edu.pjwstk.kaldi.files.Segmentation.Tier; 11 | import pl.edu.pjwstk.kaldi.files.SegmentationList; 12 | import difflib.Delta; 13 | import difflib.Delta.TYPE; 14 | import difflib.DiffUtils; 15 | import difflib.Patch; 16 | 17 | public class Diff { 18 | 19 | private static int clamp(int val, int min, int max) { 20 | if (val < min) 21 | return min; 22 | if (val >= max) 23 | return max - 1; 24 | return val; 25 | } 26 | 27 | /** 28 | * Computes the diff between hypothesis and refrence. Returns a 29 | * segmentations with 3 tiers: first containing the incorrect segments, 30 | * second and third containing correct segments with words and phonemes 31 | * accordingly. 32 | * 33 | * @param hyp 34 | * hypothesis returned by the decoder (2 tiers: words and 35 | * phonemes) 36 | * @param ref 37 | * reference known to be true 38 | * @return segmentation with 3 tiers 39 | */ 40 | public static Segmentation diff(Segmentation hyp, String ref, double file_len) { 41 | 42 | SegmentationList ret = new SegmentationList(); 43 | List hypsegs = hyp.tiers.get(0).segments; 44 | List phsegs = hyp.tiers.get(1).segments; 45 | 46 | LinkedList hyp_words = new LinkedList(); 47 | LinkedList ref_words = new LinkedList(); 48 | Vector ref_vec = new Vector(); 49 | 50 | String hypstr = ""; 51 | for (Segment seg : hypsegs) { 52 | hypstr += seg.name + " "; 53 | } 54 | 55 | StringTokenizer strtok = new StringTokenizer(hypstr, " \t\n\r"); 56 | while (strtok.hasMoreTokens()) 57 | hyp_words.add(strtok.nextToken()); 58 | 59 | strtok = new StringTokenizer(ref, " \t"); 60 | while (strtok.hasMoreTokens()) { 61 | String str = strtok.nextToken(); 62 | ref_words.add(str); 63 | ref_vec.add(str); 64 | } 65 | 66 | Patch patch = DiffUtils.diff(ref_words, hyp_words); 67 | 68 | int rev_beg, rev_end; 69 | int orig_beg, orig_end; 70 | double start, end; 71 | String text; 72 | double eps = 0.001; // epsilon in seconds 73 | 74 | System.out.println("hypsegs.size(): "+hypsegs.size()); 75 | System.out.println("ref_vec.size(): "+ref_vec.size()); 76 | 77 | if(hypsegs.get(hypsegs.size()-1).end_time delta : patch.getDeltas()) { 87 | 88 | System.out.println(delta.getType()); 89 | System.out.println("o " + delta.getOriginal()); 90 | System.out.println("r " + delta.getRevised()); 91 | 92 | rev_beg = delta.getRevised().getPosition(); 93 | rev_end = rev_beg + delta.getRevised().size(); 94 | orig_beg = delta.getOriginal().getPosition(); 95 | orig_end = orig_beg + delta.getOriginal().size(); 96 | 97 | /* 98 | * if (rev_beg > 0) rev_beg--; if (rev_end < hypsegs.size() - 1) 99 | * rev_end++; if (orig_beg > 0) orig_beg--; if (orig_end < 100 | * ref_words.size() - 1) orig_end++; 101 | */ 102 | 103 | rev_beg = clamp(rev_beg, 0, hypsegs.size()); 104 | rev_end = clamp(rev_end, 0, hypsegs.size()); 105 | orig_beg = clamp(orig_beg, 0, ref_vec.size()); 106 | orig_end = clamp(orig_end, 0, ref_vec.size()); 107 | 108 | start = hypsegs.get(rev_beg).start_time; 109 | end = hypsegs.get(rev_end).end_time; 110 | text = extract(ref_vec, orig_beg, orig_end); 111 | ret.addSegment(0, start, end, text); 112 | 113 | for (int i = rev_beg; i <= rev_end; i++) 114 | hypsegs.get(i).name = null; 115 | } 116 | 117 | Tier wtier = new Tier(); 118 | Tier ptier = new Tier(); 119 | for (Segment seg : hypsegs) 120 | if (seg.name != null) { 121 | wtier.segments.add(seg); 122 | for (Segment pseg : phsegs) { 123 | if (pseg.start_time >= seg.start_time - eps && pseg.end_time <= seg.end_time + eps) 124 | ptier.segments.add(pseg); 125 | } 126 | } 127 | 128 | ret.tiers.add(wtier); 129 | ret.tiers.add(ptier); 130 | 131 | Tier t = ret.tiers.get(0); 132 | for (Segment seg : hypsegs) 133 | if (seg.name != null) { 134 | t.add(seg.start_time, seg.end_time, null); 135 | } 136 | 137 | // ret.mergeOverlappingAndAdjecent(0); 138 | t.mergeOverlappingAndAdjecent(); 139 | t.removeNull(); 140 | 141 | ret.renameTier(0, "incorrect"); 142 | ret.renameTier(1, "correct words"); 143 | ret.renameTier(2, "correct phonemes"); 144 | 145 | return ret; 146 | } 147 | 148 | public static String extract(Vector vec, int from, int to) { 149 | String ret = ""; 150 | 151 | if (from < 0) 152 | from = 0; 153 | if (to >= vec.size()) 154 | to = vec.size() - 1; 155 | 156 | for (int i = from; i <= to; i++) { 157 | String str = vec.get(i); 158 | vec.set(i, ""); 159 | if (str.length() > 0) 160 | ret += str + " "; 161 | } 162 | return ret.trim(); 163 | } 164 | 165 | /** 166 | * Computes the diff between hypothesis and refrence. Returns a 167 | * segmentations with 2 tiers: first containing the incorrect segments, 168 | * second containing correct segments accordingly. 169 | * 170 | * @param hyp 171 | * hypothesis returned by the decoder (single tier) 172 | * @param ref 173 | * reference known to be true 174 | * @param strict 175 | * if false, allow expanding word context 176 | * @return segmentation with 2 tiers 177 | */ 178 | public static Segmentation diff2(Segmentation hyp, String ref, boolean strict) { 179 | SegmentationList ret = new SegmentationList(); 180 | List hypsegs = hyp.tiers.get(0).segments; 181 | 182 | for (Segment seg : hypsegs) 183 | seg.used = false; 184 | 185 | LinkedList hyp_words = new LinkedList(); 186 | LinkedList ref_words = new LinkedList(); 187 | Vector ref_vec = new Vector(); 188 | 189 | String hypstr = ""; 190 | for (Segment seg : hypsegs) { 191 | hypstr += seg.name + " "; 192 | } 193 | 194 | StringTokenizer strtok = new StringTokenizer(hypstr, " \t\n\r"); 195 | while (strtok.hasMoreTokens()) 196 | hyp_words.add(strtok.nextToken()); 197 | 198 | strtok = new StringTokenizer(ref, " \t"); 199 | while (strtok.hasMoreTokens()) { 200 | String str = strtok.nextToken(); 201 | ref_words.add(str); 202 | ref_vec.add(str); 203 | } 204 | 205 | Patch patch = DiffUtils.diff(ref_words, hyp_words); 206 | 207 | int rev_beg, rev_end; 208 | int orig_beg, orig_end; 209 | double start, end; 210 | String text; 211 | 212 | for (Delta delta : patch.getDeltas()) { 213 | 214 | Log.verbose("" + delta.getType()); 215 | Log.verbose("o " + delta.getOriginal()); 216 | Log.verbose("r " + delta.getRevised()); 217 | 218 | rev_beg = delta.getRevised().getPosition(); 219 | rev_end = rev_beg + delta.getRevised().size() - 1; 220 | orig_beg = delta.getOriginal().getPosition(); 221 | orig_end = orig_beg + delta.getOriginal().size() - 1; 222 | 223 | if (delta.getType() == TYPE.INSERT) { 224 | if (delta.getOriginal().getPosition() == 0 || delta.getOriginal().getPosition() == ref_words.size()) { 225 | Log.verbose("Skipping insertion at start/end!"); 226 | 227 | for (int i = rev_beg; i <= rev_end; i++) 228 | hypsegs.get(i).used = true; 229 | 230 | continue; 231 | } 232 | } 233 | 234 | if (!strict) { 235 | if (orig_beg > 0) 236 | orig_beg--; 237 | if (orig_end < ref_words.size() - 1) 238 | orig_end++; 239 | if (rev_beg > 0) 240 | rev_beg--; 241 | if (rev_end < hypsegs.size() - 1) 242 | rev_end++; 243 | } else { 244 | if (rev_beg < 0) 245 | rev_beg = 0; 246 | if (rev_beg >= hypsegs.size()) 247 | rev_beg = hypsegs.size() - 1; 248 | if (rev_end < 0) 249 | rev_end = 0; 250 | if (rev_end >= hypsegs.size()) 251 | rev_end = hypsegs.size() - 1; 252 | } 253 | 254 | start = hypsegs.get(rev_beg).start_time; 255 | end = hypsegs.get(rev_end).end_time; 256 | text = extract(ref_vec, orig_beg, orig_end); 257 | ret.addSegment(0, start, end, text); 258 | 259 | for (int i = rev_beg; i <= rev_end; i++) 260 | hypsegs.get(i).used = true; 261 | } 262 | 263 | ret.mergeOverlappingAndAdjecent(0); 264 | 265 | Tier corrtier = new Tier(); 266 | for (Segment seg : hypsegs) 267 | if (seg.used == false) { 268 | corrtier.segments.add(seg); 269 | } 270 | ret.tiers.add(corrtier); 271 | 272 | ret.renameTier(0, "incorrect"); 273 | ret.renameTier(1, "correct"); 274 | 275 | return ret; 276 | } 277 | } 278 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/ServiceDaemon.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileReader; 6 | import java.io.IOException; 7 | import java.io.PrintWriter; 8 | import java.security.NoSuchAlgorithmException; 9 | import java.text.SimpleDateFormat; 10 | import java.util.Date; 11 | import java.util.HashMap; 12 | import java.util.LinkedList; 13 | import java.util.Locale; 14 | import java.util.logging.Level; 15 | 16 | import javax.xml.parsers.DocumentBuilder; 17 | import javax.xml.parsers.DocumentBuilderFactory; 18 | import javax.xml.parsers.ParserConfigurationException; 19 | import javax.xml.transform.OutputKeys; 20 | import javax.xml.transform.Result; 21 | import javax.xml.transform.Source; 22 | import javax.xml.transform.Transformer; 23 | import javax.xml.transform.TransformerException; 24 | import javax.xml.transform.TransformerFactory; 25 | import javax.xml.transform.dom.DOMSource; 26 | import javax.xml.transform.stream.StreamResult; 27 | import javax.xml.xpath.XPathExpressionException; 28 | 29 | import org.w3c.dom.Document; 30 | import org.w3c.dom.Element; 31 | import org.xml.sax.SAXException; 32 | 33 | import pl.edu.pjwstk.kaldi.programs.Java; 34 | import pl.edu.pjwstk.kaldi.service.database.dbTasks; 35 | import pl.edu.pjwstk.kaldi.service.database.dbTasks.dbStatus; 36 | import pl.edu.pjwstk.kaldi.service.tasks.Task; 37 | import pl.edu.pjwstk.kaldi.utils.Log; 38 | import pl.edu.pjwstk.kaldi.utils.ParseOptions; 39 | import pl.edu.pjwstk.kaldi.utils.Settings; 40 | 41 | public class ServiceDaemon { 42 | 43 | static File settings_file = null; 44 | 45 | static HashMap cache_map = new HashMap(); 46 | 47 | public static void main(String[] args) { 48 | try { 49 | 50 | Locale.setDefault(Locale.ENGLISH); 51 | 52 | Log.initFile("KaldiServiceDaemon", true); 53 | Log.setLevel(Level.INFO); 54 | 55 | ParseOptions po = new ParseOptions("Kaldi Service Daemon", "Service daemon for Java."); 56 | 57 | po.addArgument("settings", 's', File.class, "Load program settings from a file", null); 58 | po.addArgument("dump-settings", 'd', File.class, "Save default program settings to a file", null); 59 | po.addArgument("background", 'b', Boolean.class, "Run program in background", "false"); 60 | 61 | if (!po.parse(args)) 62 | return; 63 | 64 | if (po.getArgument("dump-settings") != null) { 65 | Log.info("Dumping settings and exitting."); 66 | Settings.dumpSettings((File) po.getArgument("dump-settings")); 67 | return; 68 | } 69 | 70 | if (po.getArgument("settings") != null) { 71 | Log.info("Loading settings..."); 72 | settings_file = (File) po.getArgument("settings"); 73 | Settings.loadSettings(settings_file); 74 | } 75 | 76 | try { 77 | BufferedReader reader = new BufferedReader(new FileReader(Settings.daemon_pid)); 78 | int iPid = Integer.parseInt(reader.readLine()); 79 | reader.close(); 80 | File proc = new File("/proc/" + iPid + "/cmdline"); 81 | if (proc.exists()) { 82 | System.out.println("Daemon process is already running @pid: " + iPid); 83 | System.out.println("Exitting..."); 84 | System.exit(0); 85 | } 86 | } catch (Exception e) { 87 | } 88 | 89 | if ((Boolean) po.getArgument("background") == true) { 90 | System.out.println("Running Kaldi Java Service Daemon in background..."); 91 | runSelf(); 92 | try { 93 | Thread.sleep(1000); 94 | } catch (InterruptedException e) { 95 | } 96 | System.exit(0); 97 | } 98 | 99 | PrintWriter writer = new PrintWriter(Settings.daemon_pid); 100 | writer.println(new File("/proc/self").getCanonicalFile().getName()); 101 | writer.close(); 102 | 103 | } catch (Exception e) { 104 | System.out.println("ERROR"); 105 | e.printStackTrace(System.out); 106 | return; 107 | } 108 | 109 | while (true) { 110 | 111 | dbTasks.Task running_tasks[] = dbTasks.getAllRunning(); 112 | 113 | long now = new Date().getTime(); 114 | 115 | int slots_used = 0; 116 | for (dbTasks.Task t : running_tasks) { 117 | if (t.pid <= 0 && (now - t.time.getTime()) > Settings.daemon_startup_timer_ms) { 118 | Log.info("Task " + t._id + " never started and is stale!"); 119 | dbTasks.changeStatus(t, dbStatus.dead); 120 | continue; 121 | } 122 | 123 | if (t.pid > 0 && !checkPIDProc(t.pid)) { 124 | Log.info("Task " + t._id + " died!"); 125 | dbTasks.changeStatus(t, dbStatus.dead); 126 | continue; 127 | } 128 | 129 | slots_used++; 130 | } 131 | 132 | saveStatus(slots_used); 133 | 134 | if (slots_used >= Settings.daemon_slots) { 135 | try { 136 | Thread.sleep(500); 137 | } catch (InterruptedException e) { 138 | } 139 | continue; 140 | } 141 | 142 | dbTasks.Task queued_task = dbTasks.getOldestQueued(); 143 | 144 | if (queued_task == null) { 145 | try { 146 | Thread.sleep(1000); 147 | } catch (InterruptedException e) { 148 | } 149 | continue; 150 | } 151 | 152 | String hash = ""; 153 | try { 154 | hash = Task.getHash(new File(queued_task.task_file)); 155 | Log.verbose("Task hash: " + hash); 156 | } catch (XPathExpressionException | NoSuchAlgorithmException | SAXException | ParserConfigurationException 157 | | IOException | NullPointerException e1) { 158 | Log.error("Getting hash", e1); 159 | dbTasks.changeStatus(queued_task, dbStatus.dead); 160 | continue; 161 | } 162 | 163 | dbTasks.Task copy = dbTasks.getByHash(hash); 164 | 165 | dbTasks.setHash(queued_task, hash); 166 | 167 | if (copy != null) { 168 | 169 | Log.info("Found cached: " + copy._id); 170 | 171 | dbTasks.changeStatus(queued_task, dbStatus.copyof); 172 | dbTasks.setCopy(queued_task, copy._id); 173 | 174 | continue; 175 | } 176 | 177 | Log.info("Starting task " + queued_task._id); 178 | dbTasks.changeStatus(queued_task, dbStatus.running); 179 | 180 | try { 181 | run(queued_task); 182 | } catch (IOException e1) { 183 | e1.printStackTrace(); 184 | } 185 | 186 | try { 187 | Thread.sleep(500); 188 | } catch (InterruptedException e) { 189 | } 190 | 191 | } 192 | 193 | } 194 | 195 | public static void run(dbTasks.Task task) throws IOException { 196 | 197 | LinkedList cp_files = new LinkedList(); 198 | for (File f : Settings.java_lib_dir.listFiles()) { 199 | if (f.getName().endsWith(".jar")) 200 | cp_files.add(f); 201 | } 202 | 203 | String args[]; 204 | if (settings_file != null) 205 | args = new String[] { "-s", settings_file.getAbsolutePath(), "" + task._id }; 206 | else 207 | args = new String[] { "" + task._id }; 208 | 209 | Java.java("pl.edu.pjwstk.kaldi.service.ServiceTask", args, cp_files, true); 210 | 211 | } 212 | 213 | public static void runSelf() throws IOException { 214 | 215 | LinkedList cp_files = new LinkedList(); 216 | for (File f : Settings.java_lib_dir.listFiles()) { 217 | if (f.getName().endsWith(".jar")) 218 | cp_files.add(f); 219 | } 220 | 221 | String args[]; 222 | if (settings_file != null) 223 | args = new String[] { "-s", settings_file.getAbsolutePath() }; 224 | else 225 | args = new String[] {}; 226 | 227 | Java.java("pl.edu.pjwstk.kaldi.service.ServiceDaemon", args, cp_files, true); 228 | 229 | } 230 | 231 | private static DocumentBuilder docBuilder = null; 232 | private static TransformerFactory transfac = TransformerFactory.newInstance(); 233 | private static SimpleDateFormat sdf = new SimpleDateFormat("dd.MM.yyyy HH:mm:ss"); 234 | 235 | public static void saveStatus(int slots_used) { 236 | 237 | try { 238 | if (docBuilder == null) 239 | docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); 240 | 241 | Document doc = docBuilder.newDocument(); 242 | 243 | Element root = doc.createElement("status"); 244 | 245 | Element time = doc.createElement("time"); 246 | Element timestamp = doc.createElement("timestamp"); 247 | Element slots = doc.createElement("slots-used"); 248 | 249 | Date now = new Date(); 250 | 251 | time.setTextContent(sdf.format(now)); 252 | timestamp.setTextContent("" + (now.getTime() / 1000L)); 253 | slots.setTextContent("" + slots_used); 254 | 255 | root.appendChild(time); 256 | root.appendChild(timestamp); 257 | root.appendChild(slots); 258 | doc.appendChild(root); 259 | 260 | Transformer trans = transfac.newTransformer(); 261 | trans.setOutputProperty(OutputKeys.STANDALONE, "yes"); 262 | trans.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); 263 | trans.setOutputProperty(OutputKeys.METHOD, "xml"); 264 | trans.setOutputProperty(OutputKeys.INDENT, "yes"); 265 | 266 | Source source = new DOMSource(doc); 267 | Result result = new StreamResult(Settings.daemon_status); 268 | trans.transform(source, result); 269 | 270 | } catch (ParserConfigurationException | TransformerException e) { 271 | } 272 | } 273 | 274 | private static boolean checkPIDProc(int pid) { 275 | 276 | File proc_file = new File("/proc/" + pid); 277 | 278 | return proc_file.exists(); 279 | } 280 | } 281 | -------------------------------------------------------------------------------- /.idea/uiDesigner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /src/main/java/pl/edu/pjwstk/kaldi/service/tasks/DecodeFMLLRTask.java: -------------------------------------------------------------------------------- 1 | package pl.edu.pjwstk.kaldi.service.tasks; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.security.MessageDigest; 6 | import java.util.Locale; 7 | 8 | import javax.xml.xpath.XPath; 9 | import javax.xml.xpath.XPathConstants; 10 | import javax.xml.xpath.XPathExpressionException; 11 | 12 | import org.w3c.dom.Element; 13 | 14 | import pl.edu.pjwstk.kaldi.programs.KaldiScripts; 15 | import pl.edu.pjwstk.kaldi.programs.KaldiUtils; 16 | import pl.edu.pjwstk.kaldi.programs.KaldiUtils.FMLLRUpdateType; 17 | import pl.edu.pjwstk.kaldi.utils.FileUtils; 18 | import pl.edu.pjwstk.kaldi.utils.Log; 19 | import pl.edu.pjwstk.kaldi.utils.Settings; 20 | 21 | public class DecodeFMLLRTask extends Task { 22 | 23 | private File input_file; 24 | private File mfcc_config; 25 | private File decode_config; 26 | 27 | private File hclg_file; 28 | private File words_table; 29 | private File phones_table; 30 | private File word_boundaries; 31 | private File lda_matrix; 32 | 33 | private File ali_mdl; 34 | private File adapt_mdl; 35 | private File final_mdl; 36 | 37 | private String silence_list; 38 | 39 | @Override 40 | public void run() { 41 | state = State.RUNNING; 42 | 43 | boolean fail = false; 44 | File files[] = { input_file, mfcc_config, decode_config, hclg_file, 45 | words_table, phones_table, word_boundaries, lda_matrix, 46 | ali_mdl, adapt_mdl, final_mdl }; 47 | for (File f : files) 48 | if (f != null && !f.exists()) { 49 | Log.error("Missing file: " + f.getAbsolutePath()); 50 | fail = true; 51 | } 52 | 53 | if (fail) { 54 | Log.error("Some files are missing!"); 55 | state = State.FAILED; 56 | return; 57 | } 58 | 59 | File scp_file = new File(Settings.curr_task_dir, "wav.scp"); 60 | File mfcc = new File(Settings.curr_task_dir, "mfcc"); 61 | File cmvn_stats = new File(Settings.curr_task_dir, "cmvn_stats"); 62 | File cmvn = new File(Settings.curr_task_dir, "cmvn"); 63 | File splice = new File(Settings.curr_task_dir, "splice"); 64 | File lda = new File(Settings.curr_task_dir, "lda"); 65 | 66 | File lattice = new File(Settings.curr_task_dir, "lattice"); 67 | File post = new File(Settings.curr_task_dir, "post"); 68 | File postsil = new File(Settings.curr_task_dir, "postsil"); 69 | File gpost = new File(Settings.curr_task_dir, "gpost"); 70 | File pre_trans = new File(Settings.curr_task_dir, "pretrans"); 71 | File fmllr1 = new File(Settings.curr_task_dir, "fmllr1"); 72 | File fmllr1_lattice = new File(Settings.curr_task_dir, "fmllr1_lattice"); 73 | File fmllr1_latpruned = new File(Settings.curr_task_dir, 74 | "fmllr1_latpruned"); 75 | File trans = new File(Settings.curr_task_dir, "trans"); 76 | File fmllr2 = new File(Settings.curr_task_dir, "fmllr2"); 77 | File fmllr2_lattice = new File(Settings.curr_task_dir, "fmllr2_lattice"); 78 | File fmllr2_latpruned = new File(Settings.curr_task_dir, 79 | "fmllr2_latpruned"); 80 | 81 | File words = new File(Settings.curr_task_dir, "words"); 82 | File alignment = new File(Settings.curr_task_dir, "alignment"); 83 | File words_int = new File(Settings.curr_task_dir, "words.int"); 84 | File words_txt = new File(Settings.curr_task_dir, "words.txt"); 85 | File aligned_lattice = new File(Settings.curr_task_dir, 86 | "aligned_lattice"); 87 | File ctm_int = new File(Settings.curr_task_dir, "ctm.int"); 88 | File ctm_txt = new File(Settings.curr_task_dir, "ctm.txt"); 89 | 90 | try { 91 | 92 | FileUtils.makeSCPFile(scp_file, new File[] { input_file }, true); 93 | 94 | KaldiUtils.compute_mfcc_feats(mfcc_config, scp_file, mfcc); 95 | 96 | KaldiUtils.compute_cmvn_stats(mfcc, cmvn_stats); 97 | 98 | KaldiUtils.apply_cmvn(cmvn_stats, mfcc, cmvn); 99 | 100 | KaldiUtils.splice_feats(cmvn, splice); 101 | 102 | KaldiUtils.transform_feats(lda_matrix, false, splice, lda); 103 | 104 | KaldiUtils.gmm_latgen_faster(ali_mdl, hclg_file, lda, lattice, 105 | words, alignment); 106 | 107 | // TODO accwt as param? 108 | KaldiUtils.lattice_to_post(0.083333, lattice, post); 109 | 110 | KaldiUtils.weight_silence_post(0.01, silence_list, ali_mdl, post, 111 | postsil); // TODO silwt as param? 112 | 113 | KaldiUtils.gmm_post_to_gpost(ali_mdl, lda, postsil, gpost); 114 | 115 | KaldiUtils.gmm_est_fmllr_gpost(FMLLRUpdateType.full, null, ali_mdl, 116 | lda, gpost, pre_trans); 117 | 118 | KaldiUtils.transform_feats(pre_trans, true, lda, fmllr1); 119 | 120 | KaldiUtils.gmm_latgen_faster(adapt_mdl, hclg_file, fmllr1, 121 | fmllr1_lattice, words, alignment); 122 | 123 | // TODO params? 124 | KaldiUtils.lattice_determinize_pruned(0.083333, 4.0, 125 | fmllr1_lattice, fmllr1_latpruned); 126 | 127 | // TODO accwt as param? 128 | KaldiUtils.lattice_to_post(0.083333, fmllr1_latpruned, post); 129 | 130 | KaldiUtils.weight_silence_post(0.01, silence_list, adapt_mdl, post, 131 | postsil); // TODO silwt as param? 132 | 133 | KaldiUtils.gmm_est_fmllr(FMLLRUpdateType.full, null, adapt_mdl, 134 | fmllr1, postsil, trans); 135 | 136 | KaldiUtils.transform_feats(trans, true, fmllr1, fmllr2); 137 | 138 | KaldiUtils.gmm_rescore_lattice(final_mdl, fmllr1_lattice, fmllr2, 139 | fmllr2_lattice); 140 | 141 | // TODO params? 142 | KaldiUtils.lattice_determinize_pruned(0.083333, 4.0, 143 | fmllr2_lattice, fmllr2_latpruned); 144 | 145 | KaldiUtils.lattice_best_path(fmllr2_latpruned, words, alignment); 146 | 147 | KaldiUtils.copy_int_vector("ark", words, "ark,t", words_int); 148 | 149 | KaldiUtils.int2sym("2-", words_table, words_int, words_txt); 150 | 151 | KaldiUtils.lattice_align_words(word_boundaries, final_mdl, 152 | fmllr2_latpruned, aligned_lattice); 153 | 154 | KaldiUtils.lattice_to_ctm_conf(aligned_lattice, ctm_int); 155 | 156 | KaldiUtils.int2sym("5", words_table, ctm_int, ctm_txt); 157 | 158 | state = State.SUCCEEDED; 159 | 160 | } catch (Exception e) { 161 | Log.error("Decoding task.", e); 162 | state = State.FAILED; 163 | } 164 | } 165 | 166 | @Override 167 | public void loadSettings(XPath xpath, Element node) 168 | throws XPathExpressionException { 169 | 170 | input_file = new File((String) xpath.evaluate("input-file", node, 171 | XPathConstants.STRING)); 172 | mfcc_config = new File((String) xpath.evaluate("mfcc-config", node, 173 | XPathConstants.STRING)); 174 | decode_config = new File((String) xpath.evaluate("decode-config", node, 175 | XPathConstants.STRING)); 176 | 177 | hclg_file = new File((String) xpath.evaluate("hclg", node, 178 | XPathConstants.STRING)); 179 | words_table = new File((String) xpath.evaluate("words-table", node, 180 | XPathConstants.STRING)); 181 | phones_table = new File((String) xpath.evaluate("phones-table", node, 182 | XPathConstants.STRING)); 183 | word_boundaries = new File((String) xpath.evaluate("word-boundaries", 184 | node, XPathConstants.STRING)); 185 | lda_matrix = new File((String) xpath.evaluate("lda-matrix", node, 186 | XPathConstants.STRING)); 187 | 188 | ali_mdl = new File((String) xpath.evaluate("ali-mdl", node, 189 | XPathConstants.STRING)); 190 | adapt_mdl = new File((String) xpath.evaluate("adapt-mdl", node, 191 | XPathConstants.STRING)); 192 | final_mdl = new File((String) xpath.evaluate("final-mdl", node, 193 | XPathConstants.STRING)); 194 | 195 | silence_list = (String) xpath.evaluate("silence-list", node, 196 | XPathConstants.STRING); 197 | 198 | } 199 | 200 | public static void main(String[] args) { 201 | 202 | try { 203 | Locale.setDefault(Locale.ENGLISH); 204 | 205 | Settings.curr_task_dir = File.createTempFile("DecodeFMLLRUnitTest", 206 | "", Settings.tasks_dir); 207 | Settings.curr_task_dir.delete(); 208 | Settings.curr_task_dir.mkdirs(); 209 | 210 | Settings.log_dir = Settings.curr_task_dir; 211 | Settings.temp_dir = new File(Settings.curr_task_dir, "tmp"); 212 | Settings.temp_dir2 = new File(Settings.curr_task_dir, "tmp2"); 213 | 214 | Log.init("DecodeFMLLRUnitTests", true); 215 | 216 | KaldiUtils.init(); 217 | KaldiUtils.test(); 218 | KaldiScripts.init(); 219 | KaldiScripts.test(); 220 | 221 | DecodeFMLLRTask task = new DecodeFMLLRTask(); 222 | 223 | task.input_file = new File( 224 | "/home/guest/data/RadioPiNKaldi/Elzbieta_Bienkowska/wav/Elzbieta_Bienkowska_002.wav"); 225 | task.mfcc_config = new File( 226 | "/home/guest/kaldi/egs/synat2/s5/conf/mfcc.conf"); 227 | task.decode_config = new File( 228 | "/home/guest/kaldi/egs/synat2/s5/conf/decode.config"); 229 | 230 | task.hclg_file = new File( 231 | "/home/guest/kaldi/egs/synat2/s5/exp/tri3b/graph/HCLG.fst"); 232 | task.words_table = new File( 233 | "/home/guest/kaldi/egs/synat2/s5/exp/tri3b/graph/words.txt"); 234 | task.phones_table = new File( 235 | "/home/guest/kaldi/egs/synat2/s5/exp/tri3b/graph/phones.txt"); 236 | task.word_boundaries = new File( 237 | "/home/guest/kaldi/egs/synat2/s5/exp/tri3b/graph/phones/word_boundary.int"); 238 | task.lda_matrix = new File( 239 | "/home/guest/kaldi/egs/synat2/s5/exp/tri3b/final.mat"); 240 | 241 | task.ali_mdl = new File( 242 | "/home/guest/kaldi/egs/synat2/s5/exp/tri3b/final.alimdl"); 243 | 244 | task.adapt_mdl = new File( 245 | "/home/guest/kaldi/egs/synat2/s5/exp/tri3b/final.mdl"); 246 | 247 | task.final_mdl = new File( 248 | "/home/guest/kaldi/egs/synat2/s5/exp/tri3b_mmi/final.mdl"); 249 | 250 | task.silence_list = "1:2:3:4:5"; 251 | 252 | task.run(); 253 | 254 | } catch (Exception e) { 255 | Log.error("Main error.", e); 256 | } 257 | } 258 | 259 | @Override 260 | public void updateHash(MessageDigest m) throws IOException { 261 | processFileHash(m, input_file); 262 | } 263 | } 264 | --------------------------------------------------------------------------------