├── jl1.0.1.jar ├── .gitattributes ├── gson-2.8.5.jar ├── mp3spi1.9.5.jar ├── tritonus_share.jar ├── json-simple-1.1.1.jar ├── bin ├── AWSSpeakerItem.class ├── AWSTranscript.class ├── AWSTranscripts.class ├── AWSSpeakerLabels.class ├── TranscribeEditor.class ├── TranscribeUtils.class ├── AWSSpeakerSegment.class ├── AWSTranscriptItem.class ├── TranscribeEditor$1.class ├── AWSTranscriptResults.class └── AWSTranscriptAlternatives.class ├── src ├── AWSTranscripts.java ├── GoogleSpeechWords.java ├── GoogleSpeechMetaData.java ├── GoogleSpeechResponse.java ├── AWSTranscriptAlternatives.java ├── AWSSpeakerItem.java ├── GoogleSpeechResult.java ├── GoogleSpeechAlternatives.java ├── AWSSpeakerLabels.java ├── AWSSpeakerSegment.java ├── AWSTranscriptItem.java ├── AWSTranscriptResults.java ├── AWSTranscript.java ├── GoogleTranscript.java ├── TranscribeUtils.java └── TranscribeEditor.java ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── README.md └── .classpath /jl1.0.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/jl1.0.1.jar -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /gson-2.8.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/gson-2.8.5.jar -------------------------------------------------------------------------------- /mp3spi1.9.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/mp3spi1.9.5.jar -------------------------------------------------------------------------------- /tritonus_share.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/tritonus_share.jar -------------------------------------------------------------------------------- /json-simple-1.1.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/json-simple-1.1.1.jar -------------------------------------------------------------------------------- /bin/AWSSpeakerItem.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSSpeakerItem.class -------------------------------------------------------------------------------- /bin/AWSTranscript.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSTranscript.class -------------------------------------------------------------------------------- /bin/AWSTranscripts.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSTranscripts.class -------------------------------------------------------------------------------- /bin/AWSSpeakerLabels.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSSpeakerLabels.class -------------------------------------------------------------------------------- /bin/TranscribeEditor.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/TranscribeEditor.class -------------------------------------------------------------------------------- /bin/TranscribeUtils.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/TranscribeUtils.class -------------------------------------------------------------------------------- /bin/AWSSpeakerSegment.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSSpeakerSegment.class -------------------------------------------------------------------------------- /bin/AWSTranscriptItem.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSTranscriptItem.class -------------------------------------------------------------------------------- /bin/TranscribeEditor$1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/TranscribeEditor$1.class -------------------------------------------------------------------------------- /bin/AWSTranscriptResults.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSTranscriptResults.class -------------------------------------------------------------------------------- /bin/AWSTranscriptAlternatives.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSTranscriptAlternatives.class -------------------------------------------------------------------------------- /src/AWSTranscripts.java: -------------------------------------------------------------------------------- 1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved. 2 | 3 | import org.json.simple.JSONObject; 4 | 5 | // AWS transcribe->results->transcript object 6 | public class AWSTranscripts{ 7 | String transcript; 8 | 9 | public AWSTranscripts(JSONObject transcriptJSON) { 10 | transcript = (String)transcriptJSON.get("transcript"); 11 | } 12 | 13 | public AWSTranscripts() { 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | TranscribeEditor 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/GoogleSpeechWords.java: -------------------------------------------------------------------------------- 1 | import org.json.simple.JSONObject; 2 | 3 | public class GoogleSpeechWords { 4 | String startTime; 5 | String endTime; 6 | String word; 7 | String confidence; 8 | 9 | GoogleSpeechWords(JSONObject wordJSON){ 10 | startTime = (String) wordJSON.get("startTime"); 11 | endTime = (String) wordJSON.get("endTime"); 12 | word = (String) wordJSON.get("word"); 13 | confidence = (String) wordJSON.get("confidence").toString(); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/GoogleSpeechMetaData.java: -------------------------------------------------------------------------------- 1 | import org.json.simple.JSONObject; 2 | 3 | public class GoogleSpeechMetaData { 4 | //String @type = null; 5 | String progressPercent; 6 | String startTime; 7 | String lastUpdateTime; 8 | 9 | public GoogleSpeechMetaData(JSONObject metaDataJSON) { 10 | startTime = (String) metaDataJSON.get("startTime"); 11 | progressPercent = (String) metaDataJSON.get("progressPercent").toString(); 12 | lastUpdateTime = (String) metaDataJSON.get("lastUpdateTime"); 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /src/GoogleSpeechResponse.java: -------------------------------------------------------------------------------- 1 | import org.json.simple.JSONArray; 2 | import org.json.simple.JSONObject; 3 | 4 | public class GoogleSpeechResponse { 5 | //String @type 6 | GoogleSpeechResult results[]; 7 | 8 | public GoogleSpeechResponse(JSONObject responseJSON) { 9 | 10 | JSONArray resultsJSON = (JSONArray) responseJSON.get("results"); 11 | 12 | int results_size = resultsJSON.size(); 13 | 14 | results = new GoogleSpeechResult[results_size]; 15 | for(int i = 0 ; i < results_size; i++) { 16 | results[i] = new GoogleSpeechResult((JSONObject) resultsJSON.get(i)); 17 | } 18 | 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 5 | org.eclipse.jdt.core.compiler.compliance=1.8 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 11 | org.eclipse.jdt.core.compiler.source=1.8 12 | -------------------------------------------------------------------------------- /src/AWSTranscriptAlternatives.java: -------------------------------------------------------------------------------- 1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved. 2 | 3 | import org.json.simple.JSONObject; 4 | 5 | public class AWSTranscriptAlternatives{ 6 | String confidence = null; 7 | String content = null; 8 | 9 | AWSTranscriptAlternatives(String confidence, String content){ 10 | this.confidence = confidence; 11 | this.content = content; 12 | } 13 | 14 | public AWSTranscriptAlternatives(JSONObject alt) { 15 | confidence = (String) alt.getOrDefault("confidence", null); 16 | content = (String) alt.getOrDefault("content", null); 17 | } 18 | 19 | public AWSTranscriptAlternatives() { 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/AWSSpeakerItem.java: -------------------------------------------------------------------------------- 1 | import org.json.simple.JSONArray; 2 | import org.json.simple.JSONObject; 3 | 4 | public class AWSSpeakerItem { 5 | String start_time; 6 | String speaker_label; 7 | String end_time; 8 | 9 | AWSSpeakerItem(JSONObject itemJSON){ 10 | start_time = (String) itemJSON.getOrDefault("start_time", null); 11 | end_time = (String) itemJSON.getOrDefault("end_time", null); 12 | speaker_label = (String) itemJSON.getOrDefault("speaker_label", null); 13 | } 14 | 15 | public AWSSpeakerItem(String start_time2, String end_time2, String speaker_label2) { 16 | start_time = start_time2; 17 | end_time = end_time2; 18 | speaker_label = speaker_label2; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/GoogleSpeechResult.java: -------------------------------------------------------------------------------- 1 | import org.json.simple.JSONArray; 2 | import org.json.simple.JSONObject; 3 | 4 | public class GoogleSpeechResult { 5 | String languageCode; 6 | GoogleSpeechAlternatives alternatives[]; 7 | 8 | GoogleSpeechResult(JSONObject resultJSON){ 9 | languageCode = (String) resultJSON.get("languageCode"); 10 | 11 | JSONArray alternativesJSON = (JSONArray) resultJSON.get("alternatives"); 12 | 13 | int alts_size = alternativesJSON.size(); 14 | 15 | alternatives = new GoogleSpeechAlternatives[alts_size]; 16 | for(int i = 0 ; i < alts_size; i++) { 17 | alternatives[i] = new GoogleSpeechAlternatives((JSONObject) alternativesJSON.get(i)); 18 | } 19 | 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/GoogleSpeechAlternatives.java: -------------------------------------------------------------------------------- 1 | import org.json.simple.JSONArray; 2 | import org.json.simple.JSONObject; 3 | 4 | public class GoogleSpeechAlternatives { 5 | String transcript; 6 | String confidence; 7 | GoogleSpeechWords words[]; 8 | 9 | GoogleSpeechAlternatives(JSONObject alternativeJSON){ 10 | transcript = (String) alternativeJSON.get("transcript"); 11 | confidence = (String) alternativeJSON.get("confidence").toString(); 12 | 13 | JSONArray wordsJSON = (JSONArray) alternativeJSON.get("words"); 14 | 15 | int words_size = wordsJSON.size(); 16 | 17 | words = new GoogleSpeechWords[words_size]; 18 | for(int i = 0 ; i < words_size; i++) { 19 | words[i] = new GoogleSpeechWords((JSONObject) wordsJSON.get(i)); 20 | } 21 | 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/AWSSpeakerLabels.java: -------------------------------------------------------------------------------- 1 | import org.json.simple.JSONArray; 2 | import org.json.simple.JSONObject; 3 | 4 | public class AWSSpeakerLabels { 5 | String speakers; 6 | AWSSpeakerSegment[] segments; 7 | 8 | public AWSSpeakerLabels(JSONObject labelsJSON) { 9 | speakers = (String) labelsJSON.getOrDefault("speakers", null); 10 | 11 | JSONArray segmentsJSON = (JSONArray) labelsJSON.get("segments"); 12 | 13 | int segments_size = segmentsJSON.size(); 14 | 15 | segments = new AWSSpeakerSegment[segments_size]; 16 | for(int i = 0 ; i < segments_size; i++) { 17 | segments[i] = new AWSSpeakerSegment((JSONObject) segmentsJSON.get(i)); 18 | } 19 | } 20 | 21 | public AWSSpeakerLabels(Integer numSpeakers) { 22 | speakers = numSpeakers.toString(); 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TranscribeEditor 2 | #//Copyright 2018, Creed Alexander Erickson IV, All rights reserved. 3 | 4 | This is an editor for AWS Transcribe results. It allows you to load a JSON file which as been produced by AWS Transcribe, 5 | modify the transcription, including adding and deleting from the items array in the JSON. You can then save the edits as 6 | a json file which follows the AWS Transcribe JSON format. 7 | 8 | You can also load and play the orignal mp3 sound file which generated the transcription while you edit. The text will hightlight 9 | what is currently playing in the sound file to allow easy verification. 10 | 11 | The order of content for each item is, from the top, content; confidence, start_time, end_time. you can modify all of these. 12 | To add or delete items, right-click on the items array at the bottom. 13 | 14 | -------------------------------------------------------------------------------- /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /src/AWSSpeakerSegment.java: -------------------------------------------------------------------------------- 1 | 2 | import org.json.simple.JSONArray; 3 | import org.json.simple.JSONObject; 4 | 5 | public class AWSSpeakerSegment { 6 | String start_time; 7 | String speaker_label; 8 | String end_time; 9 | AWSSpeakerItem[] items; // if this serialized ok then I should not use raw arrays in these AWS classes.... 10 | 11 | public AWSSpeakerSegment(JSONObject segmentJSON) { 12 | start_time = (String) segmentJSON.getOrDefault("start_time", null); 13 | end_time = (String) segmentJSON.getOrDefault("end_time", null); 14 | speaker_label = (String) segmentJSON.getOrDefault("speaker_label", null); 15 | 16 | JSONArray itemsJSON = (JSONArray) segmentJSON.get("items"); 17 | 18 | int items_size = itemsJSON.size(); 19 | 20 | items = new AWSSpeakerItem[items_size]; 21 | for(int i = 0 ; i < items_size; i++) { 22 | items[i] = new AWSSpeakerItem((JSONObject) itemsJSON.get(i)); 23 | } 24 | } 25 | 26 | public AWSSpeakerSegment(String speaker_label2, String start_time2, String end_time2) { 27 | this.start_time = start_time2; 28 | this.end_time = end_time2; 29 | this.speaker_label = speaker_label2; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/AWSTranscriptItem.java: -------------------------------------------------------------------------------- 1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved. 2 | 3 | import org.json.simple.JSONArray; 4 | import org.json.simple.JSONObject; 5 | 6 | class AWSTranscriptItem{ 7 | transient String speaker_label = null; // exclude this from serialzation when saving json... 8 | String start_time = null; 9 | String end_time = null; 10 | AWSTranscriptAlternatives[] alternatives; 11 | String type = null; 12 | 13 | AWSTranscriptItem(JSONObject itemJSON){ 14 | start_time = (String) itemJSON.getOrDefault("start_time", null); 15 | end_time = (String) itemJSON.getOrDefault("end_time", null); 16 | type = (String) itemJSON.getOrDefault("type", null); 17 | 18 | JSONArray alts = (JSONArray) itemJSON.get("alternatives"); 19 | JSONObject alt = (JSONObject) alts.get(0); 20 | alternatives = new AWSTranscriptAlternatives[1]; 21 | alternatives[0] = new AWSTranscriptAlternatives(alt); 22 | } 23 | 24 | AWSTranscriptItem(String content, String speaker_label, String confidence, String start_time, String end_time, String type){ 25 | this.start_time = start_time; 26 | this.end_time = end_time; 27 | this.speaker_label = speaker_label; 28 | this.type = type; 29 | alternatives = new AWSTranscriptAlternatives[1]; 30 | alternatives[0] = new AWSTranscriptAlternatives(confidence, content); 31 | } 32 | 33 | 34 | AWSTranscriptItem(){ // this is used as an intermediary to create a new vbox element. since I dont have my own vbox class, this comes in handy 35 | start_time = ""; 36 | end_time = ""; 37 | String confidence = "1"; 38 | String content = ""; 39 | alternatives = new AWSTranscriptAlternatives[1]; 40 | alternatives[0] = new AWSTranscriptAlternatives(confidence, content); 41 | } 42 | } -------------------------------------------------------------------------------- /src/AWSTranscriptResults.java: -------------------------------------------------------------------------------- 1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved. 2 | 3 | import org.json.simple.JSONArray; 4 | import org.json.simple.JSONObject; 5 | 6 | // AWS transcribe->results object 7 | public class AWSTranscriptResults{ 8 | AWSTranscripts[] transcripts; 9 | AWSTranscriptItem[] items; 10 | AWSSpeakerLabels speaker_labels; 11 | 12 | public AWSTranscriptResults(JSONObject resultsJSON) { 13 | 14 | JSONArray transcriptsJSON = (JSONArray) resultsJSON.get("transcripts"); 15 | JSONArray itemsJSON = (JSONArray) resultsJSON.get("items"); 16 | 17 | int transcripts_size = transcriptsJSON.size(); 18 | int items_size = itemsJSON.size(); 19 | 20 | transcripts = new AWSTranscripts[transcripts_size]; 21 | for(int i = 0 ; i < transcripts_size; i++) { 22 | transcripts[i] = new AWSTranscripts((JSONObject) transcriptsJSON.get(i)); 23 | } 24 | 25 | items = new AWSTranscriptItem[items_size]; 26 | for(int i = 0 ; i < items_size; i++) { 27 | items[i] = new AWSTranscriptItem((JSONObject) itemsJSON.get(i)); 28 | } 29 | 30 | // go through speaker_labels if it exists and 31 | // go through the items inside the segments assigning items speaker label to transcriptItems in order, skipping ones which aren't pronunciations because these dont have labels.... 32 | JSONObject speaker_labelsJSON = (JSONObject) resultsJSON.getOrDefault("speaker_labels", null); 33 | if(speaker_labelsJSON != null) { 34 | 35 | JSONArray segmentsJSON = (JSONArray) speaker_labelsJSON.get("segments"); 36 | int segment_size = segmentsJSON.size(); 37 | 38 | int result_idx = 0; 39 | for(int i = 0; i < segment_size; i++) { 40 | 41 | JSONObject segmentJSON = (JSONObject) segmentsJSON.get(i); 42 | JSONArray segitemsJSON = (JSONArray) segmentJSON.get("items"); 43 | 44 | int segitems_size = segitemsJSON.size(); 45 | for(int j = 0; j < segitems_size; j++) { 46 | // get segitems speaker_label and assign to next result Item which is a pronunciation 47 | while(!items[result_idx].type.equals("pronunciation")) 48 | result_idx++; 49 | JSONObject segitemJSON = (JSONObject) segitemsJSON.get(j); 50 | items[result_idx].speaker_label = (String) segitemJSON.get("speaker_label"); 51 | result_idx++; 52 | } 53 | } 54 | } 55 | } 56 | 57 | public AWSTranscriptResults() { 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/AWSTranscript.java: -------------------------------------------------------------------------------- 1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved. 2 | 3 | import java.io.FileReader; 4 | import java.io.IOException; 5 | 6 | import org.json.simple.JSONObject; 7 | import org.json.simple.parser.JSONParser; 8 | import org.json.simple.parser.ParseException; 9 | 10 | // AWS transcribe, transcribed object 11 | public class AWSTranscript{ 12 | String jobName; 13 | String accountId; 14 | AWSTranscriptResults results; 15 | String status; 16 | 17 | public AWSTranscript(JSONObject transcriptJSON) { 18 | jobName = (String) transcriptJSON.get("jobName"); 19 | accountId = (String) transcriptJSON.get("accountId"); 20 | status = (String) transcriptJSON.get("status"); 21 | 22 | JSONObject resultsJSON = (JSONObject) transcriptJSON.get("results"); 23 | results = new AWSTranscriptResults(resultsJSON); 24 | } 25 | 26 | // factory constructor from filename 27 | static public AWSTranscript createFromFile(String filename) { 28 | JSONParser parser = new JSONParser(); 29 | 30 | JSONObject fileAsJSON = null; 31 | try { 32 | fileAsJSON = (JSONObject) parser.parse(new FileReader(filename)); 33 | } catch (IOException | ParseException e) { 34 | e.printStackTrace(); 35 | } 36 | return new AWSTranscript(fileAsJSON); 37 | } 38 | 39 | public AWSTranscript(GoogleTranscript gTranscript) { 40 | jobName = gTranscript.name; 41 | accountId = ""; 42 | status = "COMPLETED"; 43 | 44 | results = new AWSTranscriptResults(); 45 | results.speaker_labels = null; 46 | 47 | int item_count = 0; 48 | for(GoogleSpeechResult result : gTranscript.response.results) { 49 | for(GoogleSpeechWords word : result.alternatives[0].words) { 50 | item_count++; 51 | } 52 | } 53 | 54 | String transcriptText = ""; 55 | results.items = new AWSTranscriptItem[item_count]; 56 | int i = 0; 57 | for(GoogleSpeechResult result : gTranscript.response.results) { 58 | for(GoogleSpeechWords word : result.alternatives[0].words) { 59 | results.items[i] = new AWSTranscriptItem(); 60 | results.items[i].start_time = word.startTime.replaceAll("s", ""); 61 | results.items[i].end_time = word.endTime.replaceAll("s", ""); 62 | results.items[i].alternatives = new AWSTranscriptAlternatives[1]; 63 | results.items[i].alternatives[0] = new AWSTranscriptAlternatives(); 64 | results.items[i].alternatives[0].confidence = word.confidence; 65 | results.items[i].alternatives[0].content = word.word; 66 | transcriptText += word.word; 67 | results.items[i].type = "pronunciation"; // note that google combines puncuation with a word, maybe we need to seperate them for AWS reuslts?? first just try combined. 68 | i++; 69 | } 70 | } 71 | results.transcripts = new AWSTranscripts[1]; 72 | results.transcripts[0] = new AWSTranscripts(); 73 | results.transcripts[0].transcript = transcriptText; 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /src/GoogleTranscript.java: -------------------------------------------------------------------------------- 1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved. 2 | 3 | import java.io.FileReader; 4 | import java.io.IOException; 5 | 6 | import org.json.simple.JSONObject; 7 | import org.json.simple.parser.JSONParser; 8 | import org.json.simple.parser.ParseException; 9 | 10 | // Google speech API, transcribed object 11 | public class GoogleTranscript{ 12 | String name; 13 | String done; 14 | GoogleSpeechMetaData metadata; 15 | GoogleSpeechResponse response; 16 | 17 | public GoogleTranscript(JSONObject transcriptJSON) { 18 | name = (String) transcriptJSON.get("name"); 19 | done = transcriptJSON.get("done").toString(); 20 | 21 | JSONObject responseJSON = (JSONObject) transcriptJSON.get("response"); 22 | response = new GoogleSpeechResponse(responseJSON); 23 | 24 | JSONObject metaDataJSON = (JSONObject) transcriptJSON.get("metadata"); 25 | metadata = new GoogleSpeechMetaData(metaDataJSON); 26 | } 27 | 28 | /* dont need to create GOOGLe transcript from aws, need other way, just add missing constuctors if wanted 29 | public GoogleTranscript(AWSTranscript awsTranscript) { 30 | name = awsTranscript.jobName; 31 | done = "true"; // maybe handle this buy using aws.status values??? 32 | metadata = GoogleSpeechMetaData(); 33 | 34 | response = new GoogleSpeechResponse(); 35 | response.results = new GoogleSpeechResult[1]; // only create one result, with all words 36 | response.results[0] = new GoogleSpeechResult(); 37 | response.results[0].languageCode = "en-us"; 38 | 39 | response.results[0].alternatives = new GoogleSpeechAlternatives[1]; 40 | response.results[0].alternatives[0] = GoogleSpeechAlternatives(); 41 | response.results[0].alternatives[0].transcript = awsTranscript.results.transcripts[0].transcript; 42 | 43 | AWSTranscriptItem[] awsItems = awsTranscript.results.items; 44 | response.results[0].alternatives[0].words = new GoogleSpeechWords[awsItems.length]; 45 | 46 | int i = 0; 47 | for(AWSTranscriptItem awsItem : awsItems) { 48 | response.results[0].alternatives[0].words[i] = new GoogleSpeechWords(); 49 | response.results[0].alternatives[0].words[i].startTime = awsItem.start_time + "s"; 50 | response.results[0].alternatives[0].words[i].endTime = awsItem.end_time + "s"; 51 | response.results[0].alternatives[0].words[i].confidence = awsItem.alternatives[0].confidence; 52 | response.results[0].alternatives[0].words[i].word = awsItem.alternatives[0].content; 53 | } 54 | } 55 | */ 56 | 57 | // factory constructor from filename 58 | static public GoogleTranscript createFromFile(String filename) { 59 | JSONParser parser = new JSONParser(); 60 | 61 | JSONObject fileAsJSON = null; 62 | try { 63 | fileAsJSON = (JSONObject) parser.parse(new FileReader(filename)); 64 | } catch (IOException | ParseException e) { 65 | e.printStackTrace(); 66 | } 67 | return new GoogleTranscript(fileAsJSON); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/TranscribeUtils.java: -------------------------------------------------------------------------------- 1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved. 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.FileWriter; 6 | import java.io.IOException; 7 | import java.util.ArrayList; 8 | import java.util.HashSet; 9 | import java.util.Set; 10 | import java.util.regex.Pattern; 11 | 12 | import javax.sound.sampled.AudioFormat; 13 | import javax.sound.sampled.AudioInputStream; 14 | import javax.sound.sampled.AudioSystem; 15 | import javax.sound.sampled.Clip; 16 | import javax.sound.sampled.AudioFileFormat.Type; 17 | 18 | import com.google.gson.Gson; 19 | 20 | import javafx.collections.FXCollections; 21 | import javafx.collections.ObservableList; 22 | import javafx.scene.Node; 23 | import javafx.scene.control.TextField; 24 | import javafx.scene.layout.VBox; 25 | import javafx.stage.FileChooser; 26 | 27 | public class TranscribeUtils { 28 | 29 | static File tempWavFile = null; 30 | 31 | static AudioInputStream createClip(String audioFilename, String start_timeStr, String end_timeStr) { 32 | try { 33 | if(audioFilename.endsWith(".mp3") && tempWavFile == null) { 34 | tempWavFile = makeTempWavFile(audioFilename); 35 | } 36 | // convert times given by user to millsec 37 | Double start_timeDbl = Double.parseDouble(start_timeStr); 38 | Double end_timeDbl = Double.parseDouble(end_timeStr); 39 | Long start_timeL = Math.round(start_timeDbl*1_000_000); 40 | Long end_timeL = Math.round(end_timeDbl*1_000_000); 41 | 42 | File file = (tempWavFile == null) ? new File(audioFilename) : tempWavFile; 43 | AudioInputStream sound = AudioSystem.getAudioInputStream(file); 44 | AudioFormat format = sound.getFormat(); 45 | Clip clip = AudioSystem.getClip(); 46 | clip.open(sound); 47 | 48 | // get the frames of the desired start and end times 49 | clip.setMicrosecondPosition(start_timeL); 50 | int start_frame = clip.getFramePosition(); 51 | clip.setMicrosecondPosition(end_timeL); 52 | int end_frame = clip.getFramePosition(); 53 | clip.close(); 54 | 55 | // get size of desired portion in bytes 56 | int bytesPerFrame = format.getFrameSize(); 57 | //int sampleByteSize = bytesPerFrame*(end_frame-start_frame); // i'm not sure why this is not right.... I swore it use to work 58 | int sampleByteSize = (end_frame-start_frame); 59 | 60 | // open file stream to desired portion 61 | // we can make this work so that the users can play mp3, we convert internal to wav and save wav clips... currently lets just assume wav files only 62 | FileInputStream fileStream = new FileInputStream(file); 63 | fileStream.skip(bytesPerFrame*start_frame); 64 | 65 | // create audio stream of desired portion 66 | return new AudioInputStream(fileStream, format, sampleByteSize); 67 | 68 | }catch(Exception e) { 69 | e.printStackTrace(); 70 | } 71 | return null; 72 | } 73 | 74 | 75 | static File makeTempWavFile(String audioFilename) { 76 | try { 77 | File file = new File(audioFilename); 78 | AudioInputStream in= AudioSystem.getAudioInputStream(file); 79 | AudioFormat baseFormat = in.getFormat(); 80 | AudioFormat decodedFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 81 | baseFormat.getSampleRate(), 82 | 16, 83 | baseFormat.getChannels(), 84 | baseFormat.getChannels() * 2, 85 | baseFormat.getSampleRate(), 86 | false); 87 | AudioInputStream din = AudioSystem.getAudioInputStream(decodedFormat, in); 88 | File outfile = File.createTempFile("temp", ".wav"); 89 | outfile.deleteOnExit(); 90 | AudioSystem.write(din, Type.WAVE, outfile); 91 | return outfile; 92 | 93 | }catch(Exception e) { 94 | e.printStackTrace(); 95 | } 96 | return null; 97 | } 98 | 99 | static public void updateTranscriptObj(ArrayList vBoxedItems, AWSTranscript awsTranscription) { 100 | //update AwsTranscript from vBoxedItems, completely replace all content of AWSTranscriptResults by 101 | //building AWSTranscriptItem[] and speaker_labels from vboxedItems in GUI and create new transcript in process 102 | 103 | // not that we create the minimum number of reasonable segments, there seems to be little rhyme or reason to where AWS separates segments. 104 | // the only consistency is that segments have only one speaker. so we create segments at speaker change boundries. 105 | 106 | AWSTranscriptItem[] newItems = new AWSTranscriptItem[vBoxedItems.size()]; 107 | 108 | String newTranscript = ""; 109 | Set speakers = new HashSet(); 110 | ArrayList segItemCounts = new ArrayList(); 111 | int segItemCount = 1; 112 | int numSegments = 0; 113 | String currSpeaker = null; 114 | Pattern punctuation_pattern = Pattern.compile("[\\p{Punct}\\p{IsPunctuation}]"); 115 | 116 | for(int i = 0; i < vBoxedItems.size(); i++) { 117 | 118 | ObservableList vboxChildren = FXCollections.observableArrayList(vBoxedItems.get(i).getChildren()); 119 | String content = ((TextField) vboxChildren.get(0)).getText(); 120 | String speaker_label = ((TextField) vboxChildren.get(1)).getText(); 121 | String confidence = ((TextField) vboxChildren.get(2)).getText(); 122 | String start_time = ((TextField) vboxChildren.get(3)).getText(); 123 | String end_time = ((TextField) vboxChildren.get(4)).getText(); 124 | 125 | // track speaker changes recreate speakerLabel segments, what a mess I made :( 126 | if(currSpeaker == null) { 127 | if(speaker_label != null) // first time we see a speaker, record that speaker_label. 128 | currSpeaker = speaker_label; 129 | }else if(speaker_label == null) { // no speaker label, no speaker change, no new segment... or segment item.. 130 | ; 131 | }else if(!speaker_label.equals("") && !speaker_label.equals(currSpeaker)){ // new speaker => new segment 132 | segItemCounts.add(numSegments++, segItemCount); // record end of last segment by storing its item count 133 | currSpeaker = speaker_label; 134 | speakers.add(currSpeaker); // add any potential new speakers, to speaker set to record # speakers 135 | segItemCount = 1; 136 | }else { // if its not a new segment and an item which has a speaker_label, increment the segments item count... 137 | segItemCount++; 138 | } 139 | 140 | Boolean isPunctuation = punctuation_pattern.matcher(content).matches(); 141 | String type = (isPunctuation) ? "punctuation" : "pronunciation"; 142 | 143 | newItems[i] = new AWSTranscriptItem(content, speaker_label, confidence, start_time, end_time, type); 144 | 145 | if(isPunctuation) 146 | newTranscript += content; 147 | else 148 | newTranscript += " " + content; 149 | } 150 | segItemCounts.add(numSegments++, segItemCount); 151 | 152 | if(speakers.size() > 0) 153 | awsTranscription.results.speaker_labels = createSpeakerLabels(speakers.size(), numSegments, segItemCounts, newItems); 154 | awsTranscription.results.items = newItems; 155 | awsTranscription.results.transcripts[0].transcript = newTranscript; 156 | } 157 | 158 | private static AWSSpeakerLabels createSpeakerLabels(int numSpeakers, int numSegments, ArrayList segItemCount, AWSTranscriptItem[] transcript_items) { 159 | AWSSpeakerLabels speaker_labels = new AWSSpeakerLabels(numSpeakers); 160 | speaker_labels.segments = new AWSSpeakerSegment[numSegments]; 161 | 162 | String currSpeaker = null; 163 | int seg_idx = 0; 164 | int seg_item_idx = 0; 165 | AWSSpeakerSegment currSegment = null; 166 | 167 | // go through all transcript items and if it is a pronunciated item, create a corresponding speaker_label item, and if needed a new speaker label segment 168 | for(int item_idx = 0; item_idx < transcript_items.length; item_idx++) { // index of transcript item 169 | 170 | String end_time = transcript_items[item_idx].end_time; // get items end time, it might or might not be segment endtime.... 171 | String start_time = transcript_items[item_idx].start_time; // only set start time and speaker_label when creating new segment, but update end_time until new segment 172 | String speaker_label = transcript_items[item_idx].speaker_label; 173 | String type = transcript_items[item_idx].type; 174 | 175 | if(!type.equals("pronunciation")) 176 | continue; 177 | 178 | if(currSpeaker == null || !currSpeaker.equals(speaker_label)) { // if we have a new speaker in this transcript_item, then we have a new segment 179 | currSpeaker = speaker_label; 180 | 181 | currSegment = new AWSSpeakerSegment(speaker_label, start_time, end_time); 182 | currSegment.items = new AWSSpeakerItem[segItemCount.get(seg_idx)]; 183 | speaker_labels.segments[seg_idx++] = currSegment; 184 | seg_item_idx=0; 185 | 186 | if(seg_idx >= segItemCount.size()) //this fixes problem where punctuation comes at end of transcript and no speaker labels, hence no segment items are present for last transcript items 187 | break; // all segments are done, no speaker labels on remaining items. 188 | 189 | }else { // not a new segment, so same speaker, but update segment end_time 190 | currSegment.end_time = end_time; 191 | } 192 | // new segment or not, each pronunciation transcript_items are corresponds to a segment item in the current segment 193 | AWSSpeakerItem speaker_item = new AWSSpeakerItem(start_time, end_time, speaker_label); 194 | currSegment.items[seg_item_idx++] = speaker_item; 195 | } 196 | return speaker_labels; 197 | } 198 | 199 | 200 | static void saveJsonFile(AWSTranscript transcriptObj) { 201 | // write new json file 202 | Gson gson = new Gson(); 203 | String filename = saveJSONfile(); 204 | try (FileWriter file = new FileWriter(filename)) { 205 | file.write(gson.toJson(transcriptObj)); 206 | file.flush(); 207 | } catch (IOException e) { 208 | e.printStackTrace(); 209 | } 210 | } 211 | 212 | static public String getWavFile() { 213 | FileChooser fileChooser = new FileChooser(); 214 | fileChooser.getExtensionFilters().add(new FileChooser.ExtensionFilter("WAV", "*.wav")); 215 | return getFile(fileChooser); 216 | } 217 | 218 | static public String getAudioFile() { 219 | FileChooser fileChooser = new FileChooser(); 220 | //fileChooser.getExtensionFilters().add(new FileChooser.ExtensionFilter("MP3", "*.mp3")); 221 | return getFile(fileChooser); 222 | } 223 | 224 | static public String getJSONFile() { 225 | FileChooser fileChooser = new FileChooser(); 226 | fileChooser.getExtensionFilters().add(new FileChooser.ExtensionFilter("JSON", "*.json")); 227 | return getFile(fileChooser); 228 | } 229 | 230 | private static String getFile(FileChooser fileChooser) { 231 | File file = fileChooser.showOpenDialog(null); 232 | if (file != null) { 233 | return file.getAbsolutePath(); 234 | } 235 | return null; 236 | } 237 | 238 | static public String saveJSONfile() { 239 | FileChooser fileChooser = new FileChooser(); 240 | fileChooser.getExtensionFilters().add(new FileChooser.ExtensionFilter("JSON", "*.json")); 241 | return saveFile(fileChooser); 242 | } 243 | 244 | private static String saveFile(FileChooser fileChooser) { 245 | File file = fileChooser.showSaveDialog(null); 246 | if (file != null) { 247 | return file.getAbsolutePath(); 248 | } 249 | return null; 250 | } 251 | } 252 | -------------------------------------------------------------------------------- /src/TranscribeEditor.java: -------------------------------------------------------------------------------- 1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved. 2 | 3 | import javafx.animation.AnimationTimer; 4 | import javafx.application.*; 5 | import javafx.beans.property.LongProperty; 6 | import javafx.beans.property.SimpleLongProperty; 7 | import javafx.scene.*; 8 | import javafx.stage.*; 9 | import javafx.util.Duration; 10 | import javafx.scene.layout.*; 11 | import javafx.scene.media.Media; 12 | import javafx.scene.media.MediaPlayer; 13 | import javafx.scene.control.*; 14 | import javafx.scene.control.Alert.AlertType; 15 | import javafx.scene.control.ScrollPane.ScrollBarPolicy; 16 | import javafx.scene.input.ClipboardContent; 17 | import javafx.scene.input.DragEvent; 18 | import javafx.scene.input.Dragboard; 19 | import javafx.scene.input.KeyCode; 20 | import javafx.scene.input.KeyCombination; 21 | import javafx.scene.input.KeyEvent; 22 | import javafx.scene.input.MouseEvent; 23 | import javafx.scene.input.ScrollEvent; 24 | import javafx.scene.input.TransferMode; 25 | import javafx.event.*; 26 | import javafx.geometry.Bounds; 27 | import javafx.geometry.Pos; 28 | 29 | import java.io.File; 30 | import java.util.ArrayList; 31 | import java.util.Iterator; 32 | import java.util.regex.Pattern; 33 | 34 | import javax.sound.sampled.AudioInputStream; 35 | import javax.sound.sampled.AudioSystem; 36 | import javax.sound.sampled.Clip; 37 | import javax.sound.sampled.AudioFileFormat.Type; 38 | 39 | public class TranscribeEditor extends Application { 40 | 41 | static double SCROLL_DELTA = .002; //.001045001; 42 | static final double MP3_SPEED_DELTA = 0.05; 43 | static final double CONFIDENCE_LIMIT = .5; 44 | static final int SCENE_WIDTH = 1200; 45 | static final int TRANSCRIPT_HEIGHT = 600; 46 | static final int SCENE_HEIGHT = TRANSCRIPT_HEIGHT + 220; 47 | static final int BEHIND_LIMIT = 10; 48 | static final int VBOX_WIDTH = 75; 49 | static final int SCROLL_TOL = 2*VBOX_WIDTH; 50 | static final long MIN_UPDATE_INTERVAL = 100000 ; // nanoseconds. Set to higher number to slow update. 51 | static final ScrollEvent FAKE_SCROLL = new ScrollEvent(null, 0, 0, 0, 0, false, false, false, false, false, false, 0, 1, 0, 0, 0, 0, null, 0, null, 0, 0, null); 52 | static final ScrollEvent FAKE_BACK_SCROLL = new ScrollEvent(null, 0, 0, 0, 0, false, false, false, false, false, false, 0, -1, 0, 0, 0, 0, null, 0, null, 0, 0, null); 53 | 54 | 55 | String audioFilename = null; //"L1.mp3"; 56 | String jsonFilename = null; //"Lesson1.json"; 57 | 58 | AWSTranscript awsTranscript = null; 59 | 60 | static MediaPlayer mediaPlayer = null; 61 | Clip clip = null; 62 | boolean mediaPlaying = false; 63 | Duration skiptime = null; 64 | 65 | // is there a better way than to have all these floating out here? 66 | Scene myScene = null; 67 | TextArea transcriptText = new TextArea(); 68 | BorderPane rootNode; 69 | ScrollPane scrollPane = new ScrollPane(); 70 | HBox scrollingHBox = new HBox(); 71 | HBox outerHBox = new HBox(); 72 | VBox bigVbox = new VBox(); 73 | 74 | ArrayList vBoxedItems; 75 | 76 | int currTransItem = 0; // should i be using bean properties for this? 77 | 78 | public static void main(String args[]) { 79 | launch(args); 80 | } 81 | 82 | // Override the start() method 83 | public void start(Stage myStage) { 84 | myStage.setTitle("Transcription Editor"); 85 | rootNode = new BorderPane(); 86 | myScene = new Scene(rootNode, SCENE_WIDTH, SCENE_HEIGHT); 87 | 88 | myStage.setScene(myScene); 89 | 90 | MenuBar mb = createMenus(); 91 | FlowPane bottomPane = createBottomPane(); 92 | transcriptText.setWrapText(true); 93 | transcriptText.setEditable(false); 94 | transcriptText.setPrefHeight(TRANSCRIPT_HEIGHT); 95 | transcriptText.setText("If you load a non .wav audio file (like .mp3), there will be a slight delay when first playing single words. " 96 | + "A temporary .wav file will be created to allow for easier word extraction. If you want faster single word play-back, use .wav files."); 97 | 98 | scrollPane.setOnScroll((ScrollEvent event) -> { scrollPane.setHvalue(scrollPane.getHvalue() + (event.getDeltaY()/Math.abs(event.getDeltaY()))*SCROLL_DELTA ); }); 99 | scrollPane.setVbarPolicy(ScrollBarPolicy.NEVER); 100 | scrollPane.setHbarPolicy(ScrollBarPolicy.ALWAYS); 101 | scrollPane.setPannable(true); 102 | scrollPane.setFitToHeight(true); 103 | scrollPane.setContent(scrollingHBox); 104 | 105 | bigVbox.getChildren().add(transcriptText); 106 | rootNode.setTop(mb); 107 | rootNode.setCenter(bigVbox); 108 | rootNode.setBottom(bottomPane); 109 | 110 | AnimationTimer timer = getTimer(); 111 | timer.start(); 112 | myStage.show(); 113 | } 114 | 115 | private AnimationTimer getTimer() { 116 | final LongProperty lastUpdate = new SimpleLongProperty(); 117 | final LongProperty itemsBehind = new SimpleLongProperty(); 118 | 119 | AnimationTimer timer = new AnimationTimer() { 120 | @Override // this will select text in the transcription to follow audio 121 | public void handle(long now) { 122 | if(jsonFilename == null) 123 | return; 124 | if (Math.abs(now - lastUpdate.get()) > MIN_UPDATE_INTERVAL) { 125 | if(mediaPlayer != null && mediaPlaying) { 126 | int skip = 1; 127 | if(currTransItem + skip < vBoxedItems.size()) { 128 | String lookAhead_startTime = getStartTime(vBoxedItems.get(currTransItem + skip)); 129 | while((lookAhead_startTime == null || lookAhead_startTime.equals("")) && currTransItem + (++skip) < vBoxedItems.size()) // handle case where an item doesnt have a start_time, move to next item 130 | lookAhead_startTime = getStartTime(vBoxedItems.get(currTransItem + skip)); 131 | if(lookAhead_startTime == null ) // if we cant find a item with a time, then return, we must have shown everything 132 | return; 133 | 134 | Double itemTime = Double.parseDouble(lookAhead_startTime); 135 | Double playTime = mediaPlayer.getCurrentTime().toSeconds(); 136 | if(playTime > itemTime) { // we have passed the start time of this item, so highlight it and scroll if needed 137 | itemsBehind.set(itemsBehind.get() + skip); // record how many items since last scroll 138 | currTransItem += skip; 139 | if(itemsBehind.get() >= BEHIND_LIMIT) { // if we have not scrolled for enough items, scroll to get up-to-date 140 | Bounds boundsInScene = vBoxedItems.get(currTransItem).localToScene(vBoxedItems.get(currTransItem).getBoundsInLocal()); 141 | double oldx = -1; 142 | while(boundsInScene.getMinX() > SCROLL_TOL && (oldx != boundsInScene.getMinX())) { 143 | oldx = boundsInScene.getMinX(); // if we hit the end, it wont scroll any further... check this 144 | scrollPane.getOnScroll().handle(FAKE_SCROLL); 145 | boundsInScene = vBoxedItems.get(currTransItem).localToScene(vBoxedItems.get(currTransItem).getBoundsInLocal()); 146 | itemsBehind.set(0); 147 | } 148 | } 149 | transcriptText.deselect(); 150 | transcriptText.selectNextWord(); 151 | transcriptText.selectEndOfNextWord(); 152 | transcriptText.requestFocus(); 153 | vBoxedItems.get(currTransItem).setStyle("-fx-border-width: 2px; -fx-border-style: solid;"); // highlight item 154 | } 155 | } 156 | } 157 | lastUpdate.set(now); 158 | } 159 | } 160 | }; 161 | return timer; 162 | } 163 | 164 | protected String getStartTime(VBox vBox) { 165 | TextField tf = (TextField) vBox.getChildren().get(3); // start time is 4 element; content, speaker confidence, start_time, end_time 166 | return tf.getText(); 167 | } 168 | 169 | private void loadCenterFromJsonFile(Boolean isGooglefile) { 170 | if(isGooglefile) { 171 | GoogleTranscript googleTranscript = GoogleTranscript.createFromFile(jsonFilename); 172 | awsTranscript = new AWSTranscript(googleTranscript); 173 | }else { 174 | awsTranscript = AWSTranscript.createFromFile(jsonFilename); 175 | } 176 | vBoxedItems = new ArrayList(); 177 | Integer i = 0; 178 | for(AWSTranscriptItem transItem : awsTranscript.results.items) { 179 | vBoxedItems.add(createVBoxItem(transItem, i.toString())); 180 | i++; 181 | } 182 | refreshTranscriptText(); 183 | 184 | transcriptText.deselect(); 185 | transcriptText.selectNextWord(); 186 | transcriptText.selectEndOfNextWord(); 187 | transcriptText.requestFocus(); 188 | 189 | scrollingHBox.getChildren().clear(); 190 | bigVbox.getChildren().clear(); 191 | outerHBox.getChildren().clear(); 192 | scrollingHBox.getChildren().addAll(vBoxedItems); 193 | 194 | VBox labels = createLabelsVBox(); 195 | labels.setMinWidth(1.25*VBOX_WIDTH); 196 | outerHBox.getChildren().addAll(labels, scrollPane); 197 | bigVbox.getChildren().addAll(transcriptText, outerHBox); 198 | } 199 | 200 | 201 | public VBox createLabelsVBox() { 202 | VBox vbox = new VBox(); 203 | 204 | Label savecontent = new Label("save word"); 205 | TextField content = new TextField("content"); 206 | TextField speaker_label = new TextField("speaker_label"); 207 | TextField confidence = new TextField("confidence"); 208 | TextField start_time = new TextField("start_time"); 209 | TextField end_time = new TextField("end_time"); 210 | content.setEditable(false); 211 | speaker_label.setEditable(false); 212 | confidence.setEditable(false); 213 | start_time.setEditable(false); 214 | end_time.setEditable(false); 215 | content.setStyle("-fx-background-color: gray;"); 216 | speaker_label.setStyle("-fx-background-color: gray;"); 217 | confidence.setStyle("-fx-background-color: gray;"); 218 | start_time.setStyle("-fx-background-color: gray;"); 219 | end_time.setStyle("-fx-background-color: gray;"); 220 | 221 | vbox.getChildren().addAll(content, speaker_label, confidence, start_time, end_time, savecontent); 222 | return vbox; 223 | } 224 | 225 | public VBox createVBoxItem(AWSTranscriptItem transItem, String id ) { 226 | VBox vbox = new VBox(); 227 | 228 | vbox.setId(id); 229 | CheckBox saveBox = new CheckBox(); 230 | TextField content = new TextField(transItem.alternatives[0].content); 231 | TextField speaker_label = new TextField(transItem.speaker_label); 232 | TextField confidence = new TextField(transItem.alternatives[0].confidence); 233 | TextField start_time = new TextField(transItem.start_time); 234 | TextField end_time = new TextField(transItem.end_time); 235 | content.setPrefWidth(VBOX_WIDTH); 236 | speaker_label.setPrefWidth(VBOX_WIDTH); 237 | confidence.setPrefWidth(VBOX_WIDTH); 238 | start_time.setPrefWidth(VBOX_WIDTH); 239 | end_time.setPrefWidth(VBOX_WIDTH); 240 | 241 | // Create an Edit popup menu, and menu items 242 | ContextMenu contextMenu = new ContextMenu(); 243 | MenuItem listen = new MenuItem("Listen to word"); 244 | MenuItem playHere = new MenuItem("Play from here"); 245 | MenuItem insertBefore = new MenuItem("Insert Before"); 246 | MenuItem insertAfter = new MenuItem("Insert After"); 247 | MenuItem delete = new MenuItem("Delete"); 248 | 249 | // set actions on menuitems 250 | insertBefore.setOnAction((ActionEvent ae)->{ insertColumn(vbox.getId()); }); 251 | insertAfter.setOnAction((ActionEvent ae)->{ insertColumn( ((Integer)(Integer.parseInt(vbox.getId()) + 1)).toString() ); }); 252 | delete.setOnAction((ActionEvent ae)->{ removeColumn(vbox.getId()); }); 253 | listen.setOnAction((ActionEvent ae)->{ playWord(vbox.getId()); }); 254 | playHere.setOnAction((ActionEvent ae)->{ playFromHere(vbox.getId()); }); 255 | 256 | // Add the menu items to the popup menu 257 | contextMenu.getItems().addAll(listen, playHere, new SeparatorMenuItem(), insertBefore, insertAfter, new SeparatorMenuItem(), delete); 258 | 259 | // add menu to vbox content 260 | content.setContextMenu(contextMenu); 261 | speaker_label.setContextMenu(contextMenu); 262 | confidence.setContextMenu(contextMenu); 263 | start_time.setContextMenu(contextMenu); 264 | end_time.setContextMenu(contextMenu); 265 | 266 | // update transcription on edit... keeps things up to date, in real time. better for sync, worse for performance 267 | content.setOnKeyReleased((KeyEvent ke)->{ processContentTyping(ke, vbox.getId(), content); }); 268 | speaker_label.setOnKeyReleased((KeyEvent ke)-> { TranscribeUtils.updateTranscriptObj(vBoxedItems, awsTranscript); }); 269 | confidence.setOnKeyReleased((KeyEvent ke)-> { TranscribeUtils.updateTranscriptObj(vBoxedItems, awsTranscript);}); 270 | start_time.setOnKeyReleased((KeyEvent ke)-> { TranscribeUtils.updateTranscriptObj(vBoxedItems, awsTranscript); }); 271 | end_time.setOnKeyReleased((KeyEvent ke)-> { TranscribeUtils.updateTranscriptObj(vBoxedItems, awsTranscript); }); 272 | 273 | // add some editing conviences that I desire for faster editing 274 | EventHandler dragOverHandler = new EventHandler () { 275 | public void handle(DragEvent event){ 276 | if (event.getGestureSource() != event.getGestureTarget() && 277 | event.getDragboard().hasString()) { 278 | event.acceptTransferModes(TransferMode.COPY_OR_MOVE); 279 | } 280 | event.consume(); 281 | } 282 | }; 283 | 284 | EventHandler dragDroppedHandler = new EventHandler () { 285 | public void handle(DragEvent event){ 286 | Dragboard db = event.getDragboard(); 287 | boolean success = false; 288 | if (db.hasString()) { 289 | ((TextField)event.getGestureTarget()).setText(db.getString()); 290 | success = true; 291 | } 292 | event.setDropCompleted(success); 293 | event.consume(); 294 | } 295 | }; 296 | 297 | EventHandler dragDetectedHandler = new EventHandler () { 298 | public void handle(MouseEvent me){ 299 | Dragboard db = ((Node) me.getSource()).startDragAndDrop(TransferMode.ANY); 300 | ClipboardContent data = new ClipboardContent(); 301 | data.putString(((TextField) me.getSource()).getText()); 302 | db.setContent(data); 303 | me.consume(); 304 | } 305 | }; 306 | 307 | content.setOnDragDetected(dragDetectedHandler); 308 | content.setOnDragOver(dragOverHandler); 309 | content.setOnDragDropped(dragDroppedHandler); 310 | start_time.setOnDragDetected(dragDetectedHandler); 311 | start_time.setOnDragOver(dragOverHandler); 312 | start_time.setOnDragDropped(dragDroppedHandler); 313 | end_time.setOnDragDetected(dragDetectedHandler); 314 | end_time.setOnDragOver(dragOverHandler); 315 | end_time.setOnDragDropped(dragDroppedHandler); 316 | speaker_label.setOnDragDetected(dragDetectedHandler); 317 | speaker_label.setOnDragOver(dragOverHandler); 318 | speaker_label.setOnDragDropped(dragDroppedHandler); 319 | 320 | speaker_label.setFocusTraversable(false); 321 | confidence.setFocusTraversable(false); 322 | start_time.setFocusTraversable(false); 323 | end_time.setFocusTraversable(false); 324 | saveBox.setFocusTraversable(false); 325 | 326 | 327 | // flag items with low confidence 328 | if(transItem.alternatives[0].confidence != null 329 | && !transItem.alternatives[0].confidence.equals("") 330 | && (Double.parseDouble(transItem.alternatives[0].confidence) < CONFIDENCE_LIMIT)) 331 | { 332 | confidence.setStyle("-fx-background-color: red;"); 333 | } 334 | vbox.setAlignment(Pos.CENTER); 335 | vbox.getChildren().addAll(content, speaker_label, confidence, start_time, end_time, saveBox); 336 | 337 | return vbox; 338 | } 339 | 340 | 341 | 342 | private void playFromHere(String id) { 343 | // after many different attempts, i got good sync in transcript tracking with this code.. 344 | Pattern punctuation_pattern = Pattern.compile("[\\p{Punct}\\p{IsPunctuation}]"); 345 | 346 | transcriptText.deselect(); 347 | transcriptText.selectHome(); 348 | transcriptText.positionCaret(0); 349 | currTransItem = Integer.parseInt(id); // update position for animation 350 | VBox vbox = null; 351 | for(VBox box : vBoxedItems) { 352 | if(box.getId().equals(id)) { 353 | vbox = box; 354 | break; 355 | } 356 | 357 | String content = ((TextField)box.getChildren().get(0)).getText(); 358 | char contentChars[] = content.toCharArray(); 359 | for(char contentchar : contentChars) { // move forward to position in transcript character by character 360 | transcriptText.positionCaret(transcriptText.getCaretPosition()+1); 361 | } 362 | 363 | Boolean isPunctuation = punctuation_pattern.matcher(content).matches(); 364 | if(!isPunctuation) // forward past the next space for non punctuation 365 | transcriptText.positionCaret(transcriptText.getCaretPosition()+1); 366 | } 367 | transcriptText.selectEndOfNextWord(); 368 | 369 | String start_time = ((TextField)vbox.getChildren().get(3)).getText(); 370 | if(start_time.equals("")) 371 | return; 372 | 373 | highlightPriorVboxes(id); 374 | playOrPause(Double.parseDouble(start_time)); 375 | } 376 | 377 | private void highlightPriorVboxes(String id) { 378 | Boolean passedId = false; 379 | 380 | for(VBox vbox : vBoxedItems) { 381 | if(!passedId && vbox.getId() != null && vbox.getId().equals(id)) { 382 | passedId = true; 383 | vbox.setStyle("-fx-border-width: 2px; -fx-border-style: solid;"); // highlight selected 384 | } 385 | 386 | if(!passedId) 387 | vbox.setStyle("-fx-border-width: 2px; -fx-border-style: solid;"); // highlight item 388 | else 389 | vbox.setStyle(null); 390 | } 391 | 392 | // scoll as needed 393 | int i = 0; 394 | Bounds boundsInScene = vBoxedItems.get(currTransItem).localToScene(vBoxedItems.get(currTransItem).getBoundsInLocal()); 395 | if(boundsInScene.getMinX() > 0) { 396 | while(boundsInScene.getMinX() > SCROLL_TOL) { 397 | scrollPane.getOnScroll().handle(FAKE_SCROLL); 398 | boundsInScene = vBoxedItems.get(currTransItem).localToScene(vBoxedItems.get(currTransItem).getBoundsInLocal()); 399 | if(i++ > 1000) // ugly fix for when near end and cant scroll further. 400 | return; 401 | } 402 | }else { 403 | while(boundsInScene.getMaxX() < SCROLL_TOL) { 404 | scrollPane.getOnScroll().handle(FAKE_BACK_SCROLL); 405 | boundsInScene = vBoxedItems.get(currTransItem).localToScene(vBoxedItems.get(currTransItem).getBoundsInLocal()); 406 | if(i++ > 1000) 407 | return; 408 | } 409 | } 410 | 411 | } 412 | 413 | private void playWord(String id) { 414 | if(audioFilename == null) 415 | return; 416 | VBox vbox = vBoxedItems.get(Integer.parseInt(id)); 417 | String start_time = ((TextField)vbox.getChildren().get(3)).getText(); 418 | String end_time = ((TextField)vbox.getChildren().get(4)).getText(); 419 | 420 | try { 421 | AudioInputStream clipStream = TranscribeUtils.createClip(audioFilename, start_time, end_time); 422 | if(clip != null && clip.isOpen()) 423 | clip.close(); 424 | else if(clip == null) 425 | clip = AudioSystem.getClip(); 426 | clip.open(clipStream); 427 | clip.setFramePosition(0); 428 | clip.start(); 429 | }catch(Exception e) { 430 | e.printStackTrace(); 431 | } 432 | } 433 | 434 | private void removeColumn(String id) { 435 | // subtract one to ids of all later vboxes 436 | int idx = Integer.parseInt(id); 437 | for(VBox vbox : vBoxedItems) { 438 | int vboxId = Integer.parseInt(vbox.getId()); 439 | if(vboxId >= idx) 440 | vbox.setId("" + (vboxId - 1)); 441 | } 442 | // remove old vbox 443 | vBoxedItems.remove(idx); 444 | 445 | refreshTranscriptText(); 446 | scrollingHBox.getChildren().clear(); 447 | scrollingHBox.getChildren().addAll(vBoxedItems); 448 | vBoxedItems.get(Integer.parseInt(id)).getChildren().get(0).requestFocus(); // put focus on next vbox content so typing can resume 449 | } 450 | 451 | private void insertColumn(String id) { 452 | // add one to ids of all later vboxes 453 | int insert_idx = Integer.parseInt(id); 454 | for(VBox vbox : vBoxedItems) { 455 | int vboxId = Integer.parseInt(vbox.getId()); 456 | if(vboxId >= insert_idx) 457 | vbox.setId("" + (vboxId + 1)); 458 | } 459 | // create and insert new vbox 460 | VBox newVbox = createVBoxItem(new AWSTranscriptItem(), id); 461 | vBoxedItems.add(insert_idx, newVbox); 462 | 463 | refreshTranscriptText(); 464 | scrollingHBox.getChildren().clear(); 465 | scrollingHBox.getChildren().addAll(vBoxedItems); 466 | vBoxedItems.get(Integer.parseInt(id)).getChildren().get(0).requestFocus(); // put focus on new vbox content so typing can resume 467 | } 468 | 469 | private void processContentTyping(KeyEvent ke, String id, TextField content) { 470 | if(ke.getText().equals(" ")){ // i've decided to make spaces insert new vboxes. tab will get you to next vbox, space will create new subsequent vbox, this is all for editing convenience 471 | content.setText(content.getText().trim()); // remove the space that was typed 472 | Integer newVboxId = (Integer)(Integer.parseInt(id) + 1); 473 | insertColumn(newVboxId.toString() ); // create a subsequent vbox 474 | // vBoxedItems.get(newVboxId).getChildren().get(0).requestFocus(); // put focus on new vbox content so typing can resume 475 | } 476 | if( ke.getCode().equals( KeyCode.DELETE )) { 477 | removeColumn(id); 478 | } 479 | if(ke.getCode().equals(KeyCode.COMMA) && ke.isControlDown()) { // this is a shortcut for coping endtime of next vbox to current vbox endtime//, and deleting next 480 | Integer nextVboxId = (Integer)(Integer.parseInt(id) + 1); 481 | String copiedEndTime = ((TextField)vBoxedItems.get(nextVboxId).getChildren().get(4)).getText(); 482 | ((TextField)vBoxedItems.get(Integer.parseInt(id)).getChildren().get(4)).setText(copiedEndTime); 483 | // removeColumn(nextVboxId.toString()); 484 | } 485 | if(ke.getCode().equals(KeyCode.P) && ke.isControlDown()) { // shortcut for playing current word 486 | playWord(id); 487 | } 488 | if(ke.getCode().equals(KeyCode.K) && ke.isControlDown()) { // shortcut for saving current word, "K" for keep 489 | ((CheckBox)vBoxedItems.get(Integer.parseInt(id)).getChildren().get(5)).setSelected(true); 490 | } 491 | Integer id_int = Integer.parseInt(id); 492 | 493 | if(ke.getCode().equals(KeyCode.LEFT) && ke.isControlDown()) { // shortcut for moving to previous vbox 494 | if(id_int > 0) 495 | ((TextField)vBoxedItems.get(id_int - 1).getChildren().get(0)).requestFocus(); 496 | } 497 | if(ke.getCode().equals(KeyCode.RIGHT) && ke.isControlDown()) { // shortcut for moving to next vbox, tab does same thing 498 | if(id_int < vBoxedItems.size() -1) 499 | ((TextField)vBoxedItems.get(id_int + 1).getChildren().get(0)).requestFocus(); 500 | } 501 | 502 | refreshTranscriptText(); 503 | } 504 | 505 | private void refreshTranscriptText() { 506 | TranscribeUtils.updateTranscriptObj(vBoxedItems, awsTranscript); 507 | transcriptText.setText(awsTranscript.results.transcripts[0].transcript); 508 | } 509 | 510 | 511 | 512 | private MenuBar createMenus() { 513 | MenuBar mb = new MenuBar(); 514 | 515 | Menu fileMenu = new Menu("_File"); 516 | MenuItem openAWSJson = new MenuItem("Open AWS Transcribe_JSON Transcription"); 517 | MenuItem openGoogleJson = new MenuItem("Open GoogleSpeech JSON Transcript"); 518 | MenuItem openAudio = new MenuItem("Open _Audio"); 519 | MenuItem saveJson = new MenuItem("_Save as AWS JSON Transcription"); 520 | MenuItem exit = new MenuItem("_Exit"); 521 | 522 | Menu helpMenu = new Menu("_Help"); 523 | MenuItem about = new MenuItem("About"); 524 | helpMenu.getItems().addAll(about); 525 | 526 | 527 | Alert alert = new Alert(AlertType.INFORMATION); 528 | alert.setTitle("About"); 529 | alert.setHeaderText(null); 530 | alert.setContentText("Copyright 2018, Creed Alexander Erickson IV, All rights reserved."); 531 | 532 | about.setOnAction((ActionEvent ae)-> { alert.showAndWait(); }); 533 | 534 | openAWSJson.setOnAction((ActionEvent ae) -> { if( (jsonFilename = TranscribeUtils.getJSONFile()) != null ) loadCenterFromJsonFile(false); }); 535 | openGoogleJson.setOnAction((ActionEvent ae) -> { if( (jsonFilename = TranscribeUtils.getJSONFile()) != null ) loadCenterFromJsonFile(true); }); 536 | 537 | openAudio.setOnAction((ActionEvent ae) -> { 538 | audioFilename = TranscribeUtils.getAudioFile(); 539 | if(mediaPlayer != null) { 540 | mediaPlayer.dispose(); 541 | mediaPlayer = null; 542 | } 543 | }); 544 | saveJson.setOnAction((ActionEvent ae)->{ 545 | if(awsTranscript != null) { 546 | //updateTranscriptObj(); // we shouldnt need to update since we keep things up-to-date in real time, but this might be needed if we have sync problems... 547 | TranscribeUtils.saveJsonFile(awsTranscript); 548 | } 549 | }); 550 | exit.setOnAction((ActionEvent ae) -> {Platform.exit();}); 551 | 552 | openAWSJson.setAccelerator(KeyCombination.keyCombination("shortcut+J")); 553 | openAudio.setAccelerator(KeyCombination.keyCombination("shortcut+M")); 554 | saveJson.setAccelerator(KeyCombination.keyCombination("shortcut+S")); 555 | exit.setAccelerator(KeyCombination.keyCombination("shortcut+X")); 556 | 557 | fileMenu.getItems().addAll(openAWSJson, openGoogleJson, openAudio, new SeparatorMenuItem(), saveJson, new SeparatorMenuItem(), exit); 558 | mb.getMenus().addAll(fileMenu,helpMenu); 559 | return mb; 560 | } 561 | 562 | private FlowPane createBottomPane() { 563 | FlowPane bottomPane = new FlowPane(); 564 | bottomPane.setAlignment(Pos.CENTER); 565 | 566 | Button playButton = new Button("Play/Pause"); 567 | Button slowDown = new Button("Slower"); 568 | Button speedUp = new Button("Faster"); 569 | 570 | Button save = new Button("Save selected words"); 571 | 572 | 573 | save.setOnAction((ActionEvent ae)-> { saveWords();}); 574 | 575 | playButton.setOnAction((ActionEvent ae)-> {playOrPause();}); 576 | slowDown.setOnAction((ActionEvent ae)-> { if(mediaPlayer != null) mediaPlayer.setRate(mediaPlayer.getRate() - MP3_SPEED_DELTA); }); 577 | speedUp.setOnAction((ActionEvent ae)-> { if(mediaPlayer != null) mediaPlayer.setRate(mediaPlayer.getRate() + MP3_SPEED_DELTA); }); 578 | 579 | bottomPane.getChildren().addAll(slowDown, playButton, speedUp, save); 580 | return bottomPane; 581 | } 582 | 583 | 584 | private void saveWords() { 585 | //int savedWordCount = 0; 586 | 587 | if(audioFilename == null) { // if no mp3 file opened, provide open dialog box 588 | audioFilename = TranscribeUtils.getAudioFile(); 589 | if(audioFilename == null) 590 | return; 591 | } 592 | 593 | Iterator iter = vBoxedItems.iterator(); 594 | String chineseword = null; // assume we select both english and chinese words to save, and english comes first. we form the lessonData from these... 595 | while(iter.hasNext()) { 596 | VBox vbox = iter.next(); 597 | CheckBox checkbox = (CheckBox) vbox.getChildren().get(5); 598 | if(checkbox.selectedProperty().getValue() == true) { 599 | String wordFilename = ((TextField)vbox.getChildren().get(0)).getText(); 600 | String start_time = ((TextField)vbox.getChildren().get(3)).getText(); 601 | String end_time = ((TextField)vbox.getChildren().get(4)).getText(); 602 | while(iter.hasNext()) { // if consecutive vboxes are checked, find the end time by finding end_time of last box in the series 603 | vbox = iter.next(); 604 | checkbox = (CheckBox) vbox.getChildren().get(5); 605 | if(checkbox.selectedProperty().getValue() == true) { 606 | if(((TextField)vbox.getChildren().get(4)).getText() != null && !((TextField)vbox.getChildren().get(4)).getText().equals("")) // if there is a new endtime, update it 607 | end_time = ((TextField)vbox.getChildren().get(4)).getText(); 608 | wordFilename += " " + ((TextField)vbox.getChildren().get(0)).getText(); 609 | }else { 610 | break; 611 | } 612 | } 613 | if(chineseword == null) { 614 | chineseword = wordFilename; 615 | wordFilename = "chinese/" + wordFilename; 616 | }else{ 617 | String section = "grammarBuilder2"; 618 | System.out.println(section+".add(new String[] { \"" + chineseword + "\", \"" + wordFilename + "\"});"); 619 | chineseword= null; 620 | wordFilename = "english/" + wordFilename; 621 | } 622 | wordFilename = wordFilename.toLowerCase().replaceAll("[ .?!,()]", ""); 623 | wordFilename += ".wav"; 624 | saveClip(wordFilename, start_time, end_time); 625 | } 626 | } 627 | } 628 | 629 | private void saveClip(String outfilename, String start_timeStr, String end_timeStr) { 630 | try { 631 | AudioInputStream startStream = TranscribeUtils.createClip(audioFilename, start_timeStr, end_timeStr); 632 | File outfile = new File(outfilename); 633 | AudioSystem.write(startStream, Type.WAVE, outfile); 634 | startStream.close(); 635 | }catch(Exception e) { 636 | e.printStackTrace(); 637 | } 638 | } 639 | 640 | public void playOrPause() { 641 | playOrPause(null); 642 | } 643 | 644 | public void playOrPause(Double start_time) { // this turned messy due to mediaPlayer pause, play bug 645 | if(audioFilename == null) { // if no mp3 file opened, provide open dialog box 646 | audioFilename = TranscribeUtils.getAudioFile(); 647 | if(audioFilename == null) 648 | return; 649 | } 650 | 651 | if(mediaPlayer != null) { // if there was a mediaplayer, kill old media players because .pause .play doenst work right 652 | if(start_time == null && mediaPlaying) { // before disposing, if not asked to play at a time, and we where playing, record our current time. 653 | skiptime = mediaPlayer.getCurrentTime(); // record where we were playing 654 | } 655 | mediaPlayer.dispose(); 656 | } 657 | 658 | if(start_time != null) { // if asked to play at a time, play at that time 659 | skiptime = new Duration(start_time*1_000); 660 | File file = new File(audioFilename); 661 | Media media = new Media(file.toURI().toString()); 662 | mediaPlayer = new MediaPlayer(media); 663 | mediaPlayer.stop(); 664 | mediaPlayer.setStartTime(skiptime); 665 | mediaPlayer.seek(skiptime); 666 | mediaPlayer.play(); 667 | mediaPlaying=true; 668 | }else if(mediaPlaying) { // using my own playing status because built in status doesnt update correctly 669 | //mediaPlayer.pause(); 670 | mediaPlaying=false; 671 | }else { 672 | File file = new File(audioFilename); 673 | Media media = new Media(file.toURI().toString()); 674 | mediaPlayer = new MediaPlayer(media); 675 | if(skiptime != null) 676 | mediaPlayer.setStartTime(skiptime); // try to fix odd mediaPlayer issue, there are bugs in mediaPlayer..... it doesnt work as it should 677 | mediaPlayer.play(); 678 | mediaPlaying=true; 679 | } 680 | } 681 | 682 | public void stop() { 683 | } 684 | } --------------------------------------------------------------------------------