├── jl1.0.1.jar
├── .gitattributes
├── gson-2.8.5.jar
├── mp3spi1.9.5.jar
├── tritonus_share.jar
├── json-simple-1.1.1.jar
├── bin
├── AWSSpeakerItem.class
├── AWSTranscript.class
├── AWSTranscripts.class
├── AWSSpeakerLabels.class
├── TranscribeEditor.class
├── TranscribeUtils.class
├── AWSSpeakerSegment.class
├── AWSTranscriptItem.class
├── TranscribeEditor$1.class
├── AWSTranscriptResults.class
└── AWSTranscriptAlternatives.class
├── src
├── AWSTranscripts.java
├── GoogleSpeechWords.java
├── GoogleSpeechMetaData.java
├── GoogleSpeechResponse.java
├── AWSTranscriptAlternatives.java
├── AWSSpeakerItem.java
├── GoogleSpeechResult.java
├── GoogleSpeechAlternatives.java
├── AWSSpeakerLabels.java
├── AWSSpeakerSegment.java
├── AWSTranscriptItem.java
├── AWSTranscriptResults.java
├── AWSTranscript.java
├── GoogleTranscript.java
├── TranscribeUtils.java
└── TranscribeEditor.java
├── .project
├── .settings
└── org.eclipse.jdt.core.prefs
├── README.md
└── .classpath
/jl1.0.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/jl1.0.1.jar
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/gson-2.8.5.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/gson-2.8.5.jar
--------------------------------------------------------------------------------
/mp3spi1.9.5.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/mp3spi1.9.5.jar
--------------------------------------------------------------------------------
/tritonus_share.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/tritonus_share.jar
--------------------------------------------------------------------------------
/json-simple-1.1.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/json-simple-1.1.1.jar
--------------------------------------------------------------------------------
/bin/AWSSpeakerItem.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSSpeakerItem.class
--------------------------------------------------------------------------------
/bin/AWSTranscript.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSTranscript.class
--------------------------------------------------------------------------------
/bin/AWSTranscripts.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSTranscripts.class
--------------------------------------------------------------------------------
/bin/AWSSpeakerLabels.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSSpeakerLabels.class
--------------------------------------------------------------------------------
/bin/TranscribeEditor.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/TranscribeEditor.class
--------------------------------------------------------------------------------
/bin/TranscribeUtils.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/TranscribeUtils.class
--------------------------------------------------------------------------------
/bin/AWSSpeakerSegment.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSSpeakerSegment.class
--------------------------------------------------------------------------------
/bin/AWSTranscriptItem.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSTranscriptItem.class
--------------------------------------------------------------------------------
/bin/TranscribeEditor$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/TranscribeEditor$1.class
--------------------------------------------------------------------------------
/bin/AWSTranscriptResults.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSTranscriptResults.class
--------------------------------------------------------------------------------
/bin/AWSTranscriptAlternatives.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CreedIV/TranscribeEditor/HEAD/bin/AWSTranscriptAlternatives.class
--------------------------------------------------------------------------------
/src/AWSTranscripts.java:
--------------------------------------------------------------------------------
1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved.
2 |
3 | import org.json.simple.JSONObject;
4 |
5 | // AWS transcribe->results->transcript object
6 | public class AWSTranscripts{
7 | String transcript;
8 |
9 | public AWSTranscripts(JSONObject transcriptJSON) {
10 | transcript = (String)transcriptJSON.get("transcript");
11 | }
12 |
13 | public AWSTranscripts() {
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | TranscribeEditor
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/src/GoogleSpeechWords.java:
--------------------------------------------------------------------------------
1 | import org.json.simple.JSONObject;
2 |
3 | public class GoogleSpeechWords {
4 | String startTime;
5 | String endTime;
6 | String word;
7 | String confidence;
8 |
9 | GoogleSpeechWords(JSONObject wordJSON){
10 | startTime = (String) wordJSON.get("startTime");
11 | endTime = (String) wordJSON.get("endTime");
12 | word = (String) wordJSON.get("word");
13 | confidence = (String) wordJSON.get("confidence").toString();
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/src/GoogleSpeechMetaData.java:
--------------------------------------------------------------------------------
1 | import org.json.simple.JSONObject;
2 |
3 | public class GoogleSpeechMetaData {
4 | //String @type = null;
5 | String progressPercent;
6 | String startTime;
7 | String lastUpdateTime;
8 |
9 | public GoogleSpeechMetaData(JSONObject metaDataJSON) {
10 | startTime = (String) metaDataJSON.get("startTime");
11 | progressPercent = (String) metaDataJSON.get("progressPercent").toString();
12 | lastUpdateTime = (String) metaDataJSON.get("lastUpdateTime");
13 | }
14 |
15 | }
16 |
--------------------------------------------------------------------------------
/src/GoogleSpeechResponse.java:
--------------------------------------------------------------------------------
1 | import org.json.simple.JSONArray;
2 | import org.json.simple.JSONObject;
3 |
4 | public class GoogleSpeechResponse {
5 | //String @type
6 | GoogleSpeechResult results[];
7 |
8 | public GoogleSpeechResponse(JSONObject responseJSON) {
9 |
10 | JSONArray resultsJSON = (JSONArray) responseJSON.get("results");
11 |
12 | int results_size = resultsJSON.size();
13 |
14 | results = new GoogleSpeechResult[results_size];
15 | for(int i = 0 ; i < results_size; i++) {
16 | results[i] = new GoogleSpeechResult((JSONObject) resultsJSON.get(i));
17 | }
18 |
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
5 | org.eclipse.jdt.core.compiler.compliance=1.8
6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
11 | org.eclipse.jdt.core.compiler.source=1.8
12 |
--------------------------------------------------------------------------------
/src/AWSTranscriptAlternatives.java:
--------------------------------------------------------------------------------
1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved.
2 |
3 | import org.json.simple.JSONObject;
4 |
5 | public class AWSTranscriptAlternatives{
6 | String confidence = null;
7 | String content = null;
8 |
9 | AWSTranscriptAlternatives(String confidence, String content){
10 | this.confidence = confidence;
11 | this.content = content;
12 | }
13 |
14 | public AWSTranscriptAlternatives(JSONObject alt) {
15 | confidence = (String) alt.getOrDefault("confidence", null);
16 | content = (String) alt.getOrDefault("content", null);
17 | }
18 |
19 | public AWSTranscriptAlternatives() {
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/AWSSpeakerItem.java:
--------------------------------------------------------------------------------
1 | import org.json.simple.JSONArray;
2 | import org.json.simple.JSONObject;
3 |
4 | public class AWSSpeakerItem {
5 | String start_time;
6 | String speaker_label;
7 | String end_time;
8 |
9 | AWSSpeakerItem(JSONObject itemJSON){
10 | start_time = (String) itemJSON.getOrDefault("start_time", null);
11 | end_time = (String) itemJSON.getOrDefault("end_time", null);
12 | speaker_label = (String) itemJSON.getOrDefault("speaker_label", null);
13 | }
14 |
15 | public AWSSpeakerItem(String start_time2, String end_time2, String speaker_label2) {
16 | start_time = start_time2;
17 | end_time = end_time2;
18 | speaker_label = speaker_label2;
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/GoogleSpeechResult.java:
--------------------------------------------------------------------------------
1 | import org.json.simple.JSONArray;
2 | import org.json.simple.JSONObject;
3 |
4 | public class GoogleSpeechResult {
5 | String languageCode;
6 | GoogleSpeechAlternatives alternatives[];
7 |
8 | GoogleSpeechResult(JSONObject resultJSON){
9 | languageCode = (String) resultJSON.get("languageCode");
10 |
11 | JSONArray alternativesJSON = (JSONArray) resultJSON.get("alternatives");
12 |
13 | int alts_size = alternativesJSON.size();
14 |
15 | alternatives = new GoogleSpeechAlternatives[alts_size];
16 | for(int i = 0 ; i < alts_size; i++) {
17 | alternatives[i] = new GoogleSpeechAlternatives((JSONObject) alternativesJSON.get(i));
18 | }
19 |
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/GoogleSpeechAlternatives.java:
--------------------------------------------------------------------------------
1 | import org.json.simple.JSONArray;
2 | import org.json.simple.JSONObject;
3 |
4 | public class GoogleSpeechAlternatives {
5 | String transcript;
6 | String confidence;
7 | GoogleSpeechWords words[];
8 |
9 | GoogleSpeechAlternatives(JSONObject alternativeJSON){
10 | transcript = (String) alternativeJSON.get("transcript");
11 | confidence = (String) alternativeJSON.get("confidence").toString();
12 |
13 | JSONArray wordsJSON = (JSONArray) alternativeJSON.get("words");
14 |
15 | int words_size = wordsJSON.size();
16 |
17 | words = new GoogleSpeechWords[words_size];
18 | for(int i = 0 ; i < words_size; i++) {
19 | words[i] = new GoogleSpeechWords((JSONObject) wordsJSON.get(i));
20 | }
21 |
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/AWSSpeakerLabels.java:
--------------------------------------------------------------------------------
1 | import org.json.simple.JSONArray;
2 | import org.json.simple.JSONObject;
3 |
4 | public class AWSSpeakerLabels {
5 | String speakers;
6 | AWSSpeakerSegment[] segments;
7 |
8 | public AWSSpeakerLabels(JSONObject labelsJSON) {
9 | speakers = (String) labelsJSON.getOrDefault("speakers", null);
10 |
11 | JSONArray segmentsJSON = (JSONArray) labelsJSON.get("segments");
12 |
13 | int segments_size = segmentsJSON.size();
14 |
15 | segments = new AWSSpeakerSegment[segments_size];
16 | for(int i = 0 ; i < segments_size; i++) {
17 | segments[i] = new AWSSpeakerSegment((JSONObject) segmentsJSON.get(i));
18 | }
19 | }
20 |
21 | public AWSSpeakerLabels(Integer numSpeakers) {
22 | speakers = numSpeakers.toString();
23 | }
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TranscribeEditor
2 | #//Copyright 2018, Creed Alexander Erickson IV, All rights reserved.
3 |
4 | This is an editor for AWS Transcribe results. It allows you to load a JSON file which as been produced by AWS Transcribe,
5 | modify the transcription, including adding and deleting from the items array in the JSON. You can then save the edits as
6 | a json file which follows the AWS Transcribe JSON format.
7 |
8 | You can also load and play the orignal mp3 sound file which generated the transcription while you edit. The text will hightlight
9 | what is currently playing in the sound file to allow easy verification.
10 |
11 | The order of content for each item is, from the top, content; confidence, start_time, end_time. you can modify all of these.
12 | To add or delete items, right-click on the items array at the bottom.
13 |
14 |
--------------------------------------------------------------------------------
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/src/AWSSpeakerSegment.java:
--------------------------------------------------------------------------------
1 |
2 | import org.json.simple.JSONArray;
3 | import org.json.simple.JSONObject;
4 |
5 | public class AWSSpeakerSegment {
6 | String start_time;
7 | String speaker_label;
8 | String end_time;
9 | AWSSpeakerItem[] items; // if this serialized ok then I should not use raw arrays in these AWS classes....
10 |
11 | public AWSSpeakerSegment(JSONObject segmentJSON) {
12 | start_time = (String) segmentJSON.getOrDefault("start_time", null);
13 | end_time = (String) segmentJSON.getOrDefault("end_time", null);
14 | speaker_label = (String) segmentJSON.getOrDefault("speaker_label", null);
15 |
16 | JSONArray itemsJSON = (JSONArray) segmentJSON.get("items");
17 |
18 | int items_size = itemsJSON.size();
19 |
20 | items = new AWSSpeakerItem[items_size];
21 | for(int i = 0 ; i < items_size; i++) {
22 | items[i] = new AWSSpeakerItem((JSONObject) itemsJSON.get(i));
23 | }
24 | }
25 |
26 | public AWSSpeakerSegment(String speaker_label2, String start_time2, String end_time2) {
27 | this.start_time = start_time2;
28 | this.end_time = end_time2;
29 | this.speaker_label = speaker_label2;
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/AWSTranscriptItem.java:
--------------------------------------------------------------------------------
1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved.
2 |
3 | import org.json.simple.JSONArray;
4 | import org.json.simple.JSONObject;
5 |
6 | class AWSTranscriptItem{
7 | transient String speaker_label = null; // exclude this from serialzation when saving json...
8 | String start_time = null;
9 | String end_time = null;
10 | AWSTranscriptAlternatives[] alternatives;
11 | String type = null;
12 |
13 | AWSTranscriptItem(JSONObject itemJSON){
14 | start_time = (String) itemJSON.getOrDefault("start_time", null);
15 | end_time = (String) itemJSON.getOrDefault("end_time", null);
16 | type = (String) itemJSON.getOrDefault("type", null);
17 |
18 | JSONArray alts = (JSONArray) itemJSON.get("alternatives");
19 | JSONObject alt = (JSONObject) alts.get(0);
20 | alternatives = new AWSTranscriptAlternatives[1];
21 | alternatives[0] = new AWSTranscriptAlternatives(alt);
22 | }
23 |
24 | AWSTranscriptItem(String content, String speaker_label, String confidence, String start_time, String end_time, String type){
25 | this.start_time = start_time;
26 | this.end_time = end_time;
27 | this.speaker_label = speaker_label;
28 | this.type = type;
29 | alternatives = new AWSTranscriptAlternatives[1];
30 | alternatives[0] = new AWSTranscriptAlternatives(confidence, content);
31 | }
32 |
33 |
34 | AWSTranscriptItem(){ // this is used as an intermediary to create a new vbox element. since I dont have my own vbox class, this comes in handy
35 | start_time = "";
36 | end_time = "";
37 | String confidence = "1";
38 | String content = "";
39 | alternatives = new AWSTranscriptAlternatives[1];
40 | alternatives[0] = new AWSTranscriptAlternatives(confidence, content);
41 | }
42 | }
--------------------------------------------------------------------------------
/src/AWSTranscriptResults.java:
--------------------------------------------------------------------------------
1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved.
2 |
3 | import org.json.simple.JSONArray;
4 | import org.json.simple.JSONObject;
5 |
6 | // AWS transcribe->results object
7 | public class AWSTranscriptResults{
8 | AWSTranscripts[] transcripts;
9 | AWSTranscriptItem[] items;
10 | AWSSpeakerLabels speaker_labels;
11 |
12 | public AWSTranscriptResults(JSONObject resultsJSON) {
13 |
14 | JSONArray transcriptsJSON = (JSONArray) resultsJSON.get("transcripts");
15 | JSONArray itemsJSON = (JSONArray) resultsJSON.get("items");
16 |
17 | int transcripts_size = transcriptsJSON.size();
18 | int items_size = itemsJSON.size();
19 |
20 | transcripts = new AWSTranscripts[transcripts_size];
21 | for(int i = 0 ; i < transcripts_size; i++) {
22 | transcripts[i] = new AWSTranscripts((JSONObject) transcriptsJSON.get(i));
23 | }
24 |
25 | items = new AWSTranscriptItem[items_size];
26 | for(int i = 0 ; i < items_size; i++) {
27 | items[i] = new AWSTranscriptItem((JSONObject) itemsJSON.get(i));
28 | }
29 |
30 | // go through speaker_labels if it exists and
31 | // go through the items inside the segments assigning items speaker label to transcriptItems in order, skipping ones which aren't pronunciations because these dont have labels....
32 | JSONObject speaker_labelsJSON = (JSONObject) resultsJSON.getOrDefault("speaker_labels", null);
33 | if(speaker_labelsJSON != null) {
34 |
35 | JSONArray segmentsJSON = (JSONArray) speaker_labelsJSON.get("segments");
36 | int segment_size = segmentsJSON.size();
37 |
38 | int result_idx = 0;
39 | for(int i = 0; i < segment_size; i++) {
40 |
41 | JSONObject segmentJSON = (JSONObject) segmentsJSON.get(i);
42 | JSONArray segitemsJSON = (JSONArray) segmentJSON.get("items");
43 |
44 | int segitems_size = segitemsJSON.size();
45 | for(int j = 0; j < segitems_size; j++) {
46 | // get segitems speaker_label and assign to next result Item which is a pronunciation
47 | while(!items[result_idx].type.equals("pronunciation"))
48 | result_idx++;
49 | JSONObject segitemJSON = (JSONObject) segitemsJSON.get(j);
50 | items[result_idx].speaker_label = (String) segitemJSON.get("speaker_label");
51 | result_idx++;
52 | }
53 | }
54 | }
55 | }
56 |
57 | public AWSTranscriptResults() {
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/src/AWSTranscript.java:
--------------------------------------------------------------------------------
1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved.
2 |
3 | import java.io.FileReader;
4 | import java.io.IOException;
5 |
6 | import org.json.simple.JSONObject;
7 | import org.json.simple.parser.JSONParser;
8 | import org.json.simple.parser.ParseException;
9 |
10 | // AWS transcribe, transcribed object
11 | public class AWSTranscript{
12 | String jobName;
13 | String accountId;
14 | AWSTranscriptResults results;
15 | String status;
16 |
17 | public AWSTranscript(JSONObject transcriptJSON) {
18 | jobName = (String) transcriptJSON.get("jobName");
19 | accountId = (String) transcriptJSON.get("accountId");
20 | status = (String) transcriptJSON.get("status");
21 |
22 | JSONObject resultsJSON = (JSONObject) transcriptJSON.get("results");
23 | results = new AWSTranscriptResults(resultsJSON);
24 | }
25 |
26 | // factory constructor from filename
27 | static public AWSTranscript createFromFile(String filename) {
28 | JSONParser parser = new JSONParser();
29 |
30 | JSONObject fileAsJSON = null;
31 | try {
32 | fileAsJSON = (JSONObject) parser.parse(new FileReader(filename));
33 | } catch (IOException | ParseException e) {
34 | e.printStackTrace();
35 | }
36 | return new AWSTranscript(fileAsJSON);
37 | }
38 |
39 | public AWSTranscript(GoogleTranscript gTranscript) {
40 | jobName = gTranscript.name;
41 | accountId = "";
42 | status = "COMPLETED";
43 |
44 | results = new AWSTranscriptResults();
45 | results.speaker_labels = null;
46 |
47 | int item_count = 0;
48 | for(GoogleSpeechResult result : gTranscript.response.results) {
49 | for(GoogleSpeechWords word : result.alternatives[0].words) {
50 | item_count++;
51 | }
52 | }
53 |
54 | String transcriptText = "";
55 | results.items = new AWSTranscriptItem[item_count];
56 | int i = 0;
57 | for(GoogleSpeechResult result : gTranscript.response.results) {
58 | for(GoogleSpeechWords word : result.alternatives[0].words) {
59 | results.items[i] = new AWSTranscriptItem();
60 | results.items[i].start_time = word.startTime.replaceAll("s", "");
61 | results.items[i].end_time = word.endTime.replaceAll("s", "");
62 | results.items[i].alternatives = new AWSTranscriptAlternatives[1];
63 | results.items[i].alternatives[0] = new AWSTranscriptAlternatives();
64 | results.items[i].alternatives[0].confidence = word.confidence;
65 | results.items[i].alternatives[0].content = word.word;
66 | transcriptText += word.word;
67 | results.items[i].type = "pronunciation"; // note that google combines puncuation with a word, maybe we need to seperate them for AWS reuslts?? first just try combined.
68 | i++;
69 | }
70 | }
71 | results.transcripts = new AWSTranscripts[1];
72 | results.transcripts[0] = new AWSTranscripts();
73 | results.transcripts[0].transcript = transcriptText;
74 | }
75 |
76 | }
77 |
--------------------------------------------------------------------------------
/src/GoogleTranscript.java:
--------------------------------------------------------------------------------
1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved.
2 |
3 | import java.io.FileReader;
4 | import java.io.IOException;
5 |
6 | import org.json.simple.JSONObject;
7 | import org.json.simple.parser.JSONParser;
8 | import org.json.simple.parser.ParseException;
9 |
10 | // Google speech API, transcribed object
11 | public class GoogleTranscript{
12 | String name;
13 | String done;
14 | GoogleSpeechMetaData metadata;
15 | GoogleSpeechResponse response;
16 |
17 | public GoogleTranscript(JSONObject transcriptJSON) {
18 | name = (String) transcriptJSON.get("name");
19 | done = transcriptJSON.get("done").toString();
20 |
21 | JSONObject responseJSON = (JSONObject) transcriptJSON.get("response");
22 | response = new GoogleSpeechResponse(responseJSON);
23 |
24 | JSONObject metaDataJSON = (JSONObject) transcriptJSON.get("metadata");
25 | metadata = new GoogleSpeechMetaData(metaDataJSON);
26 | }
27 |
28 | /* dont need to create GOOGLe transcript from aws, need other way, just add missing constuctors if wanted
29 | public GoogleTranscript(AWSTranscript awsTranscript) {
30 | name = awsTranscript.jobName;
31 | done = "true"; // maybe handle this buy using aws.status values???
32 | metadata = GoogleSpeechMetaData();
33 |
34 | response = new GoogleSpeechResponse();
35 | response.results = new GoogleSpeechResult[1]; // only create one result, with all words
36 | response.results[0] = new GoogleSpeechResult();
37 | response.results[0].languageCode = "en-us";
38 |
39 | response.results[0].alternatives = new GoogleSpeechAlternatives[1];
40 | response.results[0].alternatives[0] = GoogleSpeechAlternatives();
41 | response.results[0].alternatives[0].transcript = awsTranscript.results.transcripts[0].transcript;
42 |
43 | AWSTranscriptItem[] awsItems = awsTranscript.results.items;
44 | response.results[0].alternatives[0].words = new GoogleSpeechWords[awsItems.length];
45 |
46 | int i = 0;
47 | for(AWSTranscriptItem awsItem : awsItems) {
48 | response.results[0].alternatives[0].words[i] = new GoogleSpeechWords();
49 | response.results[0].alternatives[0].words[i].startTime = awsItem.start_time + "s";
50 | response.results[0].alternatives[0].words[i].endTime = awsItem.end_time + "s";
51 | response.results[0].alternatives[0].words[i].confidence = awsItem.alternatives[0].confidence;
52 | response.results[0].alternatives[0].words[i].word = awsItem.alternatives[0].content;
53 | }
54 | }
55 | */
56 |
57 | // factory constructor from filename
58 | static public GoogleTranscript createFromFile(String filename) {
59 | JSONParser parser = new JSONParser();
60 |
61 | JSONObject fileAsJSON = null;
62 | try {
63 | fileAsJSON = (JSONObject) parser.parse(new FileReader(filename));
64 | } catch (IOException | ParseException e) {
65 | e.printStackTrace();
66 | }
67 | return new GoogleTranscript(fileAsJSON);
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/TranscribeUtils.java:
--------------------------------------------------------------------------------
1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved.
2 |
3 | import java.io.File;
4 | import java.io.FileInputStream;
5 | import java.io.FileWriter;
6 | import java.io.IOException;
7 | import java.util.ArrayList;
8 | import java.util.HashSet;
9 | import java.util.Set;
10 | import java.util.regex.Pattern;
11 |
12 | import javax.sound.sampled.AudioFormat;
13 | import javax.sound.sampled.AudioInputStream;
14 | import javax.sound.sampled.AudioSystem;
15 | import javax.sound.sampled.Clip;
16 | import javax.sound.sampled.AudioFileFormat.Type;
17 |
18 | import com.google.gson.Gson;
19 |
20 | import javafx.collections.FXCollections;
21 | import javafx.collections.ObservableList;
22 | import javafx.scene.Node;
23 | import javafx.scene.control.TextField;
24 | import javafx.scene.layout.VBox;
25 | import javafx.stage.FileChooser;
26 |
27 | public class TranscribeUtils {
28 |
29 | static File tempWavFile = null;
30 |
31 | static AudioInputStream createClip(String audioFilename, String start_timeStr, String end_timeStr) {
32 | try {
33 | if(audioFilename.endsWith(".mp3") && tempWavFile == null) {
34 | tempWavFile = makeTempWavFile(audioFilename);
35 | }
36 | // convert times given by user to millsec
37 | Double start_timeDbl = Double.parseDouble(start_timeStr);
38 | Double end_timeDbl = Double.parseDouble(end_timeStr);
39 | Long start_timeL = Math.round(start_timeDbl*1_000_000);
40 | Long end_timeL = Math.round(end_timeDbl*1_000_000);
41 |
42 | File file = (tempWavFile == null) ? new File(audioFilename) : tempWavFile;
43 | AudioInputStream sound = AudioSystem.getAudioInputStream(file);
44 | AudioFormat format = sound.getFormat();
45 | Clip clip = AudioSystem.getClip();
46 | clip.open(sound);
47 |
48 | // get the frames of the desired start and end times
49 | clip.setMicrosecondPosition(start_timeL);
50 | int start_frame = clip.getFramePosition();
51 | clip.setMicrosecondPosition(end_timeL);
52 | int end_frame = clip.getFramePosition();
53 | clip.close();
54 |
55 | // get size of desired portion in bytes
56 | int bytesPerFrame = format.getFrameSize();
57 | //int sampleByteSize = bytesPerFrame*(end_frame-start_frame); // i'm not sure why this is not right.... I swore it use to work
58 | int sampleByteSize = (end_frame-start_frame);
59 |
60 | // open file stream to desired portion
61 | // we can make this work so that the users can play mp3, we convert internal to wav and save wav clips... currently lets just assume wav files only
62 | FileInputStream fileStream = new FileInputStream(file);
63 | fileStream.skip(bytesPerFrame*start_frame);
64 |
65 | // create audio stream of desired portion
66 | return new AudioInputStream(fileStream, format, sampleByteSize);
67 |
68 | }catch(Exception e) {
69 | e.printStackTrace();
70 | }
71 | return null;
72 | }
73 |
74 |
75 | static File makeTempWavFile(String audioFilename) {
76 | try {
77 | File file = new File(audioFilename);
78 | AudioInputStream in= AudioSystem.getAudioInputStream(file);
79 | AudioFormat baseFormat = in.getFormat();
80 | AudioFormat decodedFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED,
81 | baseFormat.getSampleRate(),
82 | 16,
83 | baseFormat.getChannels(),
84 | baseFormat.getChannels() * 2,
85 | baseFormat.getSampleRate(),
86 | false);
87 | AudioInputStream din = AudioSystem.getAudioInputStream(decodedFormat, in);
88 | File outfile = File.createTempFile("temp", ".wav");
89 | outfile.deleteOnExit();
90 | AudioSystem.write(din, Type.WAVE, outfile);
91 | return outfile;
92 |
93 | }catch(Exception e) {
94 | e.printStackTrace();
95 | }
96 | return null;
97 | }
98 |
99 | static public void updateTranscriptObj(ArrayList vBoxedItems, AWSTranscript awsTranscription) {
100 | //update AwsTranscript from vBoxedItems, completely replace all content of AWSTranscriptResults by
101 | //building AWSTranscriptItem[] and speaker_labels from vboxedItems in GUI and create new transcript in process
102 |
103 | // not that we create the minimum number of reasonable segments, there seems to be little rhyme or reason to where AWS separates segments.
104 | // the only consistency is that segments have only one speaker. so we create segments at speaker change boundries.
105 |
106 | AWSTranscriptItem[] newItems = new AWSTranscriptItem[vBoxedItems.size()];
107 |
108 | String newTranscript = "";
109 | Set speakers = new HashSet();
110 | ArrayList segItemCounts = new ArrayList();
111 | int segItemCount = 1;
112 | int numSegments = 0;
113 | String currSpeaker = null;
114 | Pattern punctuation_pattern = Pattern.compile("[\\p{Punct}\\p{IsPunctuation}]");
115 |
116 | for(int i = 0; i < vBoxedItems.size(); i++) {
117 |
118 | ObservableList vboxChildren = FXCollections.observableArrayList(vBoxedItems.get(i).getChildren());
119 | String content = ((TextField) vboxChildren.get(0)).getText();
120 | String speaker_label = ((TextField) vboxChildren.get(1)).getText();
121 | String confidence = ((TextField) vboxChildren.get(2)).getText();
122 | String start_time = ((TextField) vboxChildren.get(3)).getText();
123 | String end_time = ((TextField) vboxChildren.get(4)).getText();
124 |
125 | // track speaker changes recreate speakerLabel segments, what a mess I made :(
126 | if(currSpeaker == null) {
127 | if(speaker_label != null) // first time we see a speaker, record that speaker_label.
128 | currSpeaker = speaker_label;
129 | }else if(speaker_label == null) { // no speaker label, no speaker change, no new segment... or segment item..
130 | ;
131 | }else if(!speaker_label.equals("") && !speaker_label.equals(currSpeaker)){ // new speaker => new segment
132 | segItemCounts.add(numSegments++, segItemCount); // record end of last segment by storing its item count
133 | currSpeaker = speaker_label;
134 | speakers.add(currSpeaker); // add any potential new speakers, to speaker set to record # speakers
135 | segItemCount = 1;
136 | }else { // if its not a new segment and an item which has a speaker_label, increment the segments item count...
137 | segItemCount++;
138 | }
139 |
140 | Boolean isPunctuation = punctuation_pattern.matcher(content).matches();
141 | String type = (isPunctuation) ? "punctuation" : "pronunciation";
142 |
143 | newItems[i] = new AWSTranscriptItem(content, speaker_label, confidence, start_time, end_time, type);
144 |
145 | if(isPunctuation)
146 | newTranscript += content;
147 | else
148 | newTranscript += " " + content;
149 | }
150 | segItemCounts.add(numSegments++, segItemCount);
151 |
152 | if(speakers.size() > 0)
153 | awsTranscription.results.speaker_labels = createSpeakerLabels(speakers.size(), numSegments, segItemCounts, newItems);
154 | awsTranscription.results.items = newItems;
155 | awsTranscription.results.transcripts[0].transcript = newTranscript;
156 | }
157 |
158 | private static AWSSpeakerLabels createSpeakerLabels(int numSpeakers, int numSegments, ArrayList segItemCount, AWSTranscriptItem[] transcript_items) {
159 | AWSSpeakerLabels speaker_labels = new AWSSpeakerLabels(numSpeakers);
160 | speaker_labels.segments = new AWSSpeakerSegment[numSegments];
161 |
162 | String currSpeaker = null;
163 | int seg_idx = 0;
164 | int seg_item_idx = 0;
165 | AWSSpeakerSegment currSegment = null;
166 |
167 | // go through all transcript items and if it is a pronunciated item, create a corresponding speaker_label item, and if needed a new speaker label segment
168 | for(int item_idx = 0; item_idx < transcript_items.length; item_idx++) { // index of transcript item
169 |
170 | String end_time = transcript_items[item_idx].end_time; // get items end time, it might or might not be segment endtime....
171 | String start_time = transcript_items[item_idx].start_time; // only set start time and speaker_label when creating new segment, but update end_time until new segment
172 | String speaker_label = transcript_items[item_idx].speaker_label;
173 | String type = transcript_items[item_idx].type;
174 |
175 | if(!type.equals("pronunciation"))
176 | continue;
177 |
178 | if(currSpeaker == null || !currSpeaker.equals(speaker_label)) { // if we have a new speaker in this transcript_item, then we have a new segment
179 | currSpeaker = speaker_label;
180 |
181 | currSegment = new AWSSpeakerSegment(speaker_label, start_time, end_time);
182 | currSegment.items = new AWSSpeakerItem[segItemCount.get(seg_idx)];
183 | speaker_labels.segments[seg_idx++] = currSegment;
184 | seg_item_idx=0;
185 |
186 | if(seg_idx >= segItemCount.size()) //this fixes problem where punctuation comes at end of transcript and no speaker labels, hence no segment items are present for last transcript items
187 | break; // all segments are done, no speaker labels on remaining items.
188 |
189 | }else { // not a new segment, so same speaker, but update segment end_time
190 | currSegment.end_time = end_time;
191 | }
192 | // new segment or not, each pronunciation transcript_items are corresponds to a segment item in the current segment
193 | AWSSpeakerItem speaker_item = new AWSSpeakerItem(start_time, end_time, speaker_label);
194 | currSegment.items[seg_item_idx++] = speaker_item;
195 | }
196 | return speaker_labels;
197 | }
198 |
199 |
200 | static void saveJsonFile(AWSTranscript transcriptObj) {
201 | // write new json file
202 | Gson gson = new Gson();
203 | String filename = saveJSONfile();
204 | try (FileWriter file = new FileWriter(filename)) {
205 | file.write(gson.toJson(transcriptObj));
206 | file.flush();
207 | } catch (IOException e) {
208 | e.printStackTrace();
209 | }
210 | }
211 |
212 | static public String getWavFile() {
213 | FileChooser fileChooser = new FileChooser();
214 | fileChooser.getExtensionFilters().add(new FileChooser.ExtensionFilter("WAV", "*.wav"));
215 | return getFile(fileChooser);
216 | }
217 |
218 | static public String getAudioFile() {
219 | FileChooser fileChooser = new FileChooser();
220 | //fileChooser.getExtensionFilters().add(new FileChooser.ExtensionFilter("MP3", "*.mp3"));
221 | return getFile(fileChooser);
222 | }
223 |
224 | static public String getJSONFile() {
225 | FileChooser fileChooser = new FileChooser();
226 | fileChooser.getExtensionFilters().add(new FileChooser.ExtensionFilter("JSON", "*.json"));
227 | return getFile(fileChooser);
228 | }
229 |
230 | private static String getFile(FileChooser fileChooser) {
231 | File file = fileChooser.showOpenDialog(null);
232 | if (file != null) {
233 | return file.getAbsolutePath();
234 | }
235 | return null;
236 | }
237 |
238 | static public String saveJSONfile() {
239 | FileChooser fileChooser = new FileChooser();
240 | fileChooser.getExtensionFilters().add(new FileChooser.ExtensionFilter("JSON", "*.json"));
241 | return saveFile(fileChooser);
242 | }
243 |
244 | private static String saveFile(FileChooser fileChooser) {
245 | File file = fileChooser.showSaveDialog(null);
246 | if (file != null) {
247 | return file.getAbsolutePath();
248 | }
249 | return null;
250 | }
251 | }
252 |
--------------------------------------------------------------------------------
/src/TranscribeEditor.java:
--------------------------------------------------------------------------------
1 | //Copyright 2018, Creed Alexander Erickson IV, All rights reserved.
2 |
3 | import javafx.animation.AnimationTimer;
4 | import javafx.application.*;
5 | import javafx.beans.property.LongProperty;
6 | import javafx.beans.property.SimpleLongProperty;
7 | import javafx.scene.*;
8 | import javafx.stage.*;
9 | import javafx.util.Duration;
10 | import javafx.scene.layout.*;
11 | import javafx.scene.media.Media;
12 | import javafx.scene.media.MediaPlayer;
13 | import javafx.scene.control.*;
14 | import javafx.scene.control.Alert.AlertType;
15 | import javafx.scene.control.ScrollPane.ScrollBarPolicy;
16 | import javafx.scene.input.ClipboardContent;
17 | import javafx.scene.input.DragEvent;
18 | import javafx.scene.input.Dragboard;
19 | import javafx.scene.input.KeyCode;
20 | import javafx.scene.input.KeyCombination;
21 | import javafx.scene.input.KeyEvent;
22 | import javafx.scene.input.MouseEvent;
23 | import javafx.scene.input.ScrollEvent;
24 | import javafx.scene.input.TransferMode;
25 | import javafx.event.*;
26 | import javafx.geometry.Bounds;
27 | import javafx.geometry.Pos;
28 |
29 | import java.io.File;
30 | import java.util.ArrayList;
31 | import java.util.Iterator;
32 | import java.util.regex.Pattern;
33 |
34 | import javax.sound.sampled.AudioInputStream;
35 | import javax.sound.sampled.AudioSystem;
36 | import javax.sound.sampled.Clip;
37 | import javax.sound.sampled.AudioFileFormat.Type;
38 |
39 | public class TranscribeEditor extends Application {
40 |
41 | static double SCROLL_DELTA = .002; //.001045001;
42 | static final double MP3_SPEED_DELTA = 0.05;
43 | static final double CONFIDENCE_LIMIT = .5;
44 | static final int SCENE_WIDTH = 1200;
45 | static final int TRANSCRIPT_HEIGHT = 600;
46 | static final int SCENE_HEIGHT = TRANSCRIPT_HEIGHT + 220;
47 | static final int BEHIND_LIMIT = 10;
48 | static final int VBOX_WIDTH = 75;
49 | static final int SCROLL_TOL = 2*VBOX_WIDTH;
50 | static final long MIN_UPDATE_INTERVAL = 100000 ; // nanoseconds. Set to higher number to slow update.
51 | static final ScrollEvent FAKE_SCROLL = new ScrollEvent(null, 0, 0, 0, 0, false, false, false, false, false, false, 0, 1, 0, 0, 0, 0, null, 0, null, 0, 0, null);
52 | static final ScrollEvent FAKE_BACK_SCROLL = new ScrollEvent(null, 0, 0, 0, 0, false, false, false, false, false, false, 0, -1, 0, 0, 0, 0, null, 0, null, 0, 0, null);
53 |
54 |
55 | String audioFilename = null; //"L1.mp3";
56 | String jsonFilename = null; //"Lesson1.json";
57 |
58 | AWSTranscript awsTranscript = null;
59 |
60 | static MediaPlayer mediaPlayer = null;
61 | Clip clip = null;
62 | boolean mediaPlaying = false;
63 | Duration skiptime = null;
64 |
65 | // is there a better way than to have all these floating out here?
66 | Scene myScene = null;
67 | TextArea transcriptText = new TextArea();
68 | BorderPane rootNode;
69 | ScrollPane scrollPane = new ScrollPane();
70 | HBox scrollingHBox = new HBox();
71 | HBox outerHBox = new HBox();
72 | VBox bigVbox = new VBox();
73 |
74 | ArrayList vBoxedItems;
75 |
76 | int currTransItem = 0; // should i be using bean properties for this?
77 |
78 | public static void main(String args[]) {
79 | launch(args);
80 | }
81 |
82 | // Override the start() method
83 | public void start(Stage myStage) {
84 | myStage.setTitle("Transcription Editor");
85 | rootNode = new BorderPane();
86 | myScene = new Scene(rootNode, SCENE_WIDTH, SCENE_HEIGHT);
87 |
88 | myStage.setScene(myScene);
89 |
90 | MenuBar mb = createMenus();
91 | FlowPane bottomPane = createBottomPane();
92 | transcriptText.setWrapText(true);
93 | transcriptText.setEditable(false);
94 | transcriptText.setPrefHeight(TRANSCRIPT_HEIGHT);
95 | transcriptText.setText("If you load a non .wav audio file (like .mp3), there will be a slight delay when first playing single words. "
96 | + "A temporary .wav file will be created to allow for easier word extraction. If you want faster single word play-back, use .wav files.");
97 |
98 | scrollPane.setOnScroll((ScrollEvent event) -> { scrollPane.setHvalue(scrollPane.getHvalue() + (event.getDeltaY()/Math.abs(event.getDeltaY()))*SCROLL_DELTA ); });
99 | scrollPane.setVbarPolicy(ScrollBarPolicy.NEVER);
100 | scrollPane.setHbarPolicy(ScrollBarPolicy.ALWAYS);
101 | scrollPane.setPannable(true);
102 | scrollPane.setFitToHeight(true);
103 | scrollPane.setContent(scrollingHBox);
104 |
105 | bigVbox.getChildren().add(transcriptText);
106 | rootNode.setTop(mb);
107 | rootNode.setCenter(bigVbox);
108 | rootNode.setBottom(bottomPane);
109 |
110 | AnimationTimer timer = getTimer();
111 | timer.start();
112 | myStage.show();
113 | }
114 |
115 | private AnimationTimer getTimer() {
116 | final LongProperty lastUpdate = new SimpleLongProperty();
117 | final LongProperty itemsBehind = new SimpleLongProperty();
118 |
119 | AnimationTimer timer = new AnimationTimer() {
120 | @Override // this will select text in the transcription to follow audio
121 | public void handle(long now) {
122 | if(jsonFilename == null)
123 | return;
124 | if (Math.abs(now - lastUpdate.get()) > MIN_UPDATE_INTERVAL) {
125 | if(mediaPlayer != null && mediaPlaying) {
126 | int skip = 1;
127 | if(currTransItem + skip < vBoxedItems.size()) {
128 | String lookAhead_startTime = getStartTime(vBoxedItems.get(currTransItem + skip));
129 | while((lookAhead_startTime == null || lookAhead_startTime.equals("")) && currTransItem + (++skip) < vBoxedItems.size()) // handle case where an item doesnt have a start_time, move to next item
130 | lookAhead_startTime = getStartTime(vBoxedItems.get(currTransItem + skip));
131 | if(lookAhead_startTime == null ) // if we cant find a item with a time, then return, we must have shown everything
132 | return;
133 |
134 | Double itemTime = Double.parseDouble(lookAhead_startTime);
135 | Double playTime = mediaPlayer.getCurrentTime().toSeconds();
136 | if(playTime > itemTime) { // we have passed the start time of this item, so highlight it and scroll if needed
137 | itemsBehind.set(itemsBehind.get() + skip); // record how many items since last scroll
138 | currTransItem += skip;
139 | if(itemsBehind.get() >= BEHIND_LIMIT) { // if we have not scrolled for enough items, scroll to get up-to-date
140 | Bounds boundsInScene = vBoxedItems.get(currTransItem).localToScene(vBoxedItems.get(currTransItem).getBoundsInLocal());
141 | double oldx = -1;
142 | while(boundsInScene.getMinX() > SCROLL_TOL && (oldx != boundsInScene.getMinX())) {
143 | oldx = boundsInScene.getMinX(); // if we hit the end, it wont scroll any further... check this
144 | scrollPane.getOnScroll().handle(FAKE_SCROLL);
145 | boundsInScene = vBoxedItems.get(currTransItem).localToScene(vBoxedItems.get(currTransItem).getBoundsInLocal());
146 | itemsBehind.set(0);
147 | }
148 | }
149 | transcriptText.deselect();
150 | transcriptText.selectNextWord();
151 | transcriptText.selectEndOfNextWord();
152 | transcriptText.requestFocus();
153 | vBoxedItems.get(currTransItem).setStyle("-fx-border-width: 2px; -fx-border-style: solid;"); // highlight item
154 | }
155 | }
156 | }
157 | lastUpdate.set(now);
158 | }
159 | }
160 | };
161 | return timer;
162 | }
163 |
164 | protected String getStartTime(VBox vBox) {
165 | TextField tf = (TextField) vBox.getChildren().get(3); // start time is 4 element; content, speaker confidence, start_time, end_time
166 | return tf.getText();
167 | }
168 |
169 | private void loadCenterFromJsonFile(Boolean isGooglefile) {
170 | if(isGooglefile) {
171 | GoogleTranscript googleTranscript = GoogleTranscript.createFromFile(jsonFilename);
172 | awsTranscript = new AWSTranscript(googleTranscript);
173 | }else {
174 | awsTranscript = AWSTranscript.createFromFile(jsonFilename);
175 | }
176 | vBoxedItems = new ArrayList();
177 | Integer i = 0;
178 | for(AWSTranscriptItem transItem : awsTranscript.results.items) {
179 | vBoxedItems.add(createVBoxItem(transItem, i.toString()));
180 | i++;
181 | }
182 | refreshTranscriptText();
183 |
184 | transcriptText.deselect();
185 | transcriptText.selectNextWord();
186 | transcriptText.selectEndOfNextWord();
187 | transcriptText.requestFocus();
188 |
189 | scrollingHBox.getChildren().clear();
190 | bigVbox.getChildren().clear();
191 | outerHBox.getChildren().clear();
192 | scrollingHBox.getChildren().addAll(vBoxedItems);
193 |
194 | VBox labels = createLabelsVBox();
195 | labels.setMinWidth(1.25*VBOX_WIDTH);
196 | outerHBox.getChildren().addAll(labels, scrollPane);
197 | bigVbox.getChildren().addAll(transcriptText, outerHBox);
198 | }
199 |
200 |
201 | public VBox createLabelsVBox() {
202 | VBox vbox = new VBox();
203 |
204 | Label savecontent = new Label("save word");
205 | TextField content = new TextField("content");
206 | TextField speaker_label = new TextField("speaker_label");
207 | TextField confidence = new TextField("confidence");
208 | TextField start_time = new TextField("start_time");
209 | TextField end_time = new TextField("end_time");
210 | content.setEditable(false);
211 | speaker_label.setEditable(false);
212 | confidence.setEditable(false);
213 | start_time.setEditable(false);
214 | end_time.setEditable(false);
215 | content.setStyle("-fx-background-color: gray;");
216 | speaker_label.setStyle("-fx-background-color: gray;");
217 | confidence.setStyle("-fx-background-color: gray;");
218 | start_time.setStyle("-fx-background-color: gray;");
219 | end_time.setStyle("-fx-background-color: gray;");
220 |
221 | vbox.getChildren().addAll(content, speaker_label, confidence, start_time, end_time, savecontent);
222 | return vbox;
223 | }
224 |
225 | public VBox createVBoxItem(AWSTranscriptItem transItem, String id ) {
226 | VBox vbox = new VBox();
227 |
228 | vbox.setId(id);
229 | CheckBox saveBox = new CheckBox();
230 | TextField content = new TextField(transItem.alternatives[0].content);
231 | TextField speaker_label = new TextField(transItem.speaker_label);
232 | TextField confidence = new TextField(transItem.alternatives[0].confidence);
233 | TextField start_time = new TextField(transItem.start_time);
234 | TextField end_time = new TextField(transItem.end_time);
235 | content.setPrefWidth(VBOX_WIDTH);
236 | speaker_label.setPrefWidth(VBOX_WIDTH);
237 | confidence.setPrefWidth(VBOX_WIDTH);
238 | start_time.setPrefWidth(VBOX_WIDTH);
239 | end_time.setPrefWidth(VBOX_WIDTH);
240 |
241 | // Create an Edit popup menu, and menu items
242 | ContextMenu contextMenu = new ContextMenu();
243 | MenuItem listen = new MenuItem("Listen to word");
244 | MenuItem playHere = new MenuItem("Play from here");
245 | MenuItem insertBefore = new MenuItem("Insert Before");
246 | MenuItem insertAfter = new MenuItem("Insert After");
247 | MenuItem delete = new MenuItem("Delete");
248 |
249 | // set actions on menuitems
250 | insertBefore.setOnAction((ActionEvent ae)->{ insertColumn(vbox.getId()); });
251 | insertAfter.setOnAction((ActionEvent ae)->{ insertColumn( ((Integer)(Integer.parseInt(vbox.getId()) + 1)).toString() ); });
252 | delete.setOnAction((ActionEvent ae)->{ removeColumn(vbox.getId()); });
253 | listen.setOnAction((ActionEvent ae)->{ playWord(vbox.getId()); });
254 | playHere.setOnAction((ActionEvent ae)->{ playFromHere(vbox.getId()); });
255 |
256 | // Add the menu items to the popup menu
257 | contextMenu.getItems().addAll(listen, playHere, new SeparatorMenuItem(), insertBefore, insertAfter, new SeparatorMenuItem(), delete);
258 |
259 | // add menu to vbox content
260 | content.setContextMenu(contextMenu);
261 | speaker_label.setContextMenu(contextMenu);
262 | confidence.setContextMenu(contextMenu);
263 | start_time.setContextMenu(contextMenu);
264 | end_time.setContextMenu(contextMenu);
265 |
266 | // update transcription on edit... keeps things up to date, in real time. better for sync, worse for performance
267 | content.setOnKeyReleased((KeyEvent ke)->{ processContentTyping(ke, vbox.getId(), content); });
268 | speaker_label.setOnKeyReleased((KeyEvent ke)-> { TranscribeUtils.updateTranscriptObj(vBoxedItems, awsTranscript); });
269 | confidence.setOnKeyReleased((KeyEvent ke)-> { TranscribeUtils.updateTranscriptObj(vBoxedItems, awsTranscript);});
270 | start_time.setOnKeyReleased((KeyEvent ke)-> { TranscribeUtils.updateTranscriptObj(vBoxedItems, awsTranscript); });
271 | end_time.setOnKeyReleased((KeyEvent ke)-> { TranscribeUtils.updateTranscriptObj(vBoxedItems, awsTranscript); });
272 |
273 | // add some editing conviences that I desire for faster editing
274 | EventHandler dragOverHandler = new EventHandler () {
275 | public void handle(DragEvent event){
276 | if (event.getGestureSource() != event.getGestureTarget() &&
277 | event.getDragboard().hasString()) {
278 | event.acceptTransferModes(TransferMode.COPY_OR_MOVE);
279 | }
280 | event.consume();
281 | }
282 | };
283 |
284 | EventHandler dragDroppedHandler = new EventHandler () {
285 | public void handle(DragEvent event){
286 | Dragboard db = event.getDragboard();
287 | boolean success = false;
288 | if (db.hasString()) {
289 | ((TextField)event.getGestureTarget()).setText(db.getString());
290 | success = true;
291 | }
292 | event.setDropCompleted(success);
293 | event.consume();
294 | }
295 | };
296 |
297 | EventHandler dragDetectedHandler = new EventHandler () {
298 | public void handle(MouseEvent me){
299 | Dragboard db = ((Node) me.getSource()).startDragAndDrop(TransferMode.ANY);
300 | ClipboardContent data = new ClipboardContent();
301 | data.putString(((TextField) me.getSource()).getText());
302 | db.setContent(data);
303 | me.consume();
304 | }
305 | };
306 |
307 | content.setOnDragDetected(dragDetectedHandler);
308 | content.setOnDragOver(dragOverHandler);
309 | content.setOnDragDropped(dragDroppedHandler);
310 | start_time.setOnDragDetected(dragDetectedHandler);
311 | start_time.setOnDragOver(dragOverHandler);
312 | start_time.setOnDragDropped(dragDroppedHandler);
313 | end_time.setOnDragDetected(dragDetectedHandler);
314 | end_time.setOnDragOver(dragOverHandler);
315 | end_time.setOnDragDropped(dragDroppedHandler);
316 | speaker_label.setOnDragDetected(dragDetectedHandler);
317 | speaker_label.setOnDragOver(dragOverHandler);
318 | speaker_label.setOnDragDropped(dragDroppedHandler);
319 |
320 | speaker_label.setFocusTraversable(false);
321 | confidence.setFocusTraversable(false);
322 | start_time.setFocusTraversable(false);
323 | end_time.setFocusTraversable(false);
324 | saveBox.setFocusTraversable(false);
325 |
326 |
327 | // flag items with low confidence
328 | if(transItem.alternatives[0].confidence != null
329 | && !transItem.alternatives[0].confidence.equals("")
330 | && (Double.parseDouble(transItem.alternatives[0].confidence) < CONFIDENCE_LIMIT))
331 | {
332 | confidence.setStyle("-fx-background-color: red;");
333 | }
334 | vbox.setAlignment(Pos.CENTER);
335 | vbox.getChildren().addAll(content, speaker_label, confidence, start_time, end_time, saveBox);
336 |
337 | return vbox;
338 | }
339 |
340 |
341 |
342 | private void playFromHere(String id) {
343 | // after many different attempts, i got good sync in transcript tracking with this code..
344 | Pattern punctuation_pattern = Pattern.compile("[\\p{Punct}\\p{IsPunctuation}]");
345 |
346 | transcriptText.deselect();
347 | transcriptText.selectHome();
348 | transcriptText.positionCaret(0);
349 | currTransItem = Integer.parseInt(id); // update position for animation
350 | VBox vbox = null;
351 | for(VBox box : vBoxedItems) {
352 | if(box.getId().equals(id)) {
353 | vbox = box;
354 | break;
355 | }
356 |
357 | String content = ((TextField)box.getChildren().get(0)).getText();
358 | char contentChars[] = content.toCharArray();
359 | for(char contentchar : contentChars) { // move forward to position in transcript character by character
360 | transcriptText.positionCaret(transcriptText.getCaretPosition()+1);
361 | }
362 |
363 | Boolean isPunctuation = punctuation_pattern.matcher(content).matches();
364 | if(!isPunctuation) // forward past the next space for non punctuation
365 | transcriptText.positionCaret(transcriptText.getCaretPosition()+1);
366 | }
367 | transcriptText.selectEndOfNextWord();
368 |
369 | String start_time = ((TextField)vbox.getChildren().get(3)).getText();
370 | if(start_time.equals(""))
371 | return;
372 |
373 | highlightPriorVboxes(id);
374 | playOrPause(Double.parseDouble(start_time));
375 | }
376 |
377 | private void highlightPriorVboxes(String id) {
378 | Boolean passedId = false;
379 |
380 | for(VBox vbox : vBoxedItems) {
381 | if(!passedId && vbox.getId() != null && vbox.getId().equals(id)) {
382 | passedId = true;
383 | vbox.setStyle("-fx-border-width: 2px; -fx-border-style: solid;"); // highlight selected
384 | }
385 |
386 | if(!passedId)
387 | vbox.setStyle("-fx-border-width: 2px; -fx-border-style: solid;"); // highlight item
388 | else
389 | vbox.setStyle(null);
390 | }
391 |
392 | // scoll as needed
393 | int i = 0;
394 | Bounds boundsInScene = vBoxedItems.get(currTransItem).localToScene(vBoxedItems.get(currTransItem).getBoundsInLocal());
395 | if(boundsInScene.getMinX() > 0) {
396 | while(boundsInScene.getMinX() > SCROLL_TOL) {
397 | scrollPane.getOnScroll().handle(FAKE_SCROLL);
398 | boundsInScene = vBoxedItems.get(currTransItem).localToScene(vBoxedItems.get(currTransItem).getBoundsInLocal());
399 | if(i++ > 1000) // ugly fix for when near end and cant scroll further.
400 | return;
401 | }
402 | }else {
403 | while(boundsInScene.getMaxX() < SCROLL_TOL) {
404 | scrollPane.getOnScroll().handle(FAKE_BACK_SCROLL);
405 | boundsInScene = vBoxedItems.get(currTransItem).localToScene(vBoxedItems.get(currTransItem).getBoundsInLocal());
406 | if(i++ > 1000)
407 | return;
408 | }
409 | }
410 |
411 | }
412 |
413 | private void playWord(String id) {
414 | if(audioFilename == null)
415 | return;
416 | VBox vbox = vBoxedItems.get(Integer.parseInt(id));
417 | String start_time = ((TextField)vbox.getChildren().get(3)).getText();
418 | String end_time = ((TextField)vbox.getChildren().get(4)).getText();
419 |
420 | try {
421 | AudioInputStream clipStream = TranscribeUtils.createClip(audioFilename, start_time, end_time);
422 | if(clip != null && clip.isOpen())
423 | clip.close();
424 | else if(clip == null)
425 | clip = AudioSystem.getClip();
426 | clip.open(clipStream);
427 | clip.setFramePosition(0);
428 | clip.start();
429 | }catch(Exception e) {
430 | e.printStackTrace();
431 | }
432 | }
433 |
434 | private void removeColumn(String id) {
435 | // subtract one to ids of all later vboxes
436 | int idx = Integer.parseInt(id);
437 | for(VBox vbox : vBoxedItems) {
438 | int vboxId = Integer.parseInt(vbox.getId());
439 | if(vboxId >= idx)
440 | vbox.setId("" + (vboxId - 1));
441 | }
442 | // remove old vbox
443 | vBoxedItems.remove(idx);
444 |
445 | refreshTranscriptText();
446 | scrollingHBox.getChildren().clear();
447 | scrollingHBox.getChildren().addAll(vBoxedItems);
448 | vBoxedItems.get(Integer.parseInt(id)).getChildren().get(0).requestFocus(); // put focus on next vbox content so typing can resume
449 | }
450 |
451 | private void insertColumn(String id) {
452 | // add one to ids of all later vboxes
453 | int insert_idx = Integer.parseInt(id);
454 | for(VBox vbox : vBoxedItems) {
455 | int vboxId = Integer.parseInt(vbox.getId());
456 | if(vboxId >= insert_idx)
457 | vbox.setId("" + (vboxId + 1));
458 | }
459 | // create and insert new vbox
460 | VBox newVbox = createVBoxItem(new AWSTranscriptItem(), id);
461 | vBoxedItems.add(insert_idx, newVbox);
462 |
463 | refreshTranscriptText();
464 | scrollingHBox.getChildren().clear();
465 | scrollingHBox.getChildren().addAll(vBoxedItems);
466 | vBoxedItems.get(Integer.parseInt(id)).getChildren().get(0).requestFocus(); // put focus on new vbox content so typing can resume
467 | }
468 |
469 | private void processContentTyping(KeyEvent ke, String id, TextField content) {
470 | if(ke.getText().equals(" ")){ // i've decided to make spaces insert new vboxes. tab will get you to next vbox, space will create new subsequent vbox, this is all for editing convenience
471 | content.setText(content.getText().trim()); // remove the space that was typed
472 | Integer newVboxId = (Integer)(Integer.parseInt(id) + 1);
473 | insertColumn(newVboxId.toString() ); // create a subsequent vbox
474 | // vBoxedItems.get(newVboxId).getChildren().get(0).requestFocus(); // put focus on new vbox content so typing can resume
475 | }
476 | if( ke.getCode().equals( KeyCode.DELETE )) {
477 | removeColumn(id);
478 | }
479 | if(ke.getCode().equals(KeyCode.COMMA) && ke.isControlDown()) { // this is a shortcut for coping endtime of next vbox to current vbox endtime//, and deleting next
480 | Integer nextVboxId = (Integer)(Integer.parseInt(id) + 1);
481 | String copiedEndTime = ((TextField)vBoxedItems.get(nextVboxId).getChildren().get(4)).getText();
482 | ((TextField)vBoxedItems.get(Integer.parseInt(id)).getChildren().get(4)).setText(copiedEndTime);
483 | // removeColumn(nextVboxId.toString());
484 | }
485 | if(ke.getCode().equals(KeyCode.P) && ke.isControlDown()) { // shortcut for playing current word
486 | playWord(id);
487 | }
488 | if(ke.getCode().equals(KeyCode.K) && ke.isControlDown()) { // shortcut for saving current word, "K" for keep
489 | ((CheckBox)vBoxedItems.get(Integer.parseInt(id)).getChildren().get(5)).setSelected(true);
490 | }
491 | Integer id_int = Integer.parseInt(id);
492 |
493 | if(ke.getCode().equals(KeyCode.LEFT) && ke.isControlDown()) { // shortcut for moving to previous vbox
494 | if(id_int > 0)
495 | ((TextField)vBoxedItems.get(id_int - 1).getChildren().get(0)).requestFocus();
496 | }
497 | if(ke.getCode().equals(KeyCode.RIGHT) && ke.isControlDown()) { // shortcut for moving to next vbox, tab does same thing
498 | if(id_int < vBoxedItems.size() -1)
499 | ((TextField)vBoxedItems.get(id_int + 1).getChildren().get(0)).requestFocus();
500 | }
501 |
502 | refreshTranscriptText();
503 | }
504 |
505 | private void refreshTranscriptText() {
506 | TranscribeUtils.updateTranscriptObj(vBoxedItems, awsTranscript);
507 | transcriptText.setText(awsTranscript.results.transcripts[0].transcript);
508 | }
509 |
510 |
511 |
512 | private MenuBar createMenus() {
513 | MenuBar mb = new MenuBar();
514 |
515 | Menu fileMenu = new Menu("_File");
516 | MenuItem openAWSJson = new MenuItem("Open AWS Transcribe_JSON Transcription");
517 | MenuItem openGoogleJson = new MenuItem("Open GoogleSpeech JSON Transcript");
518 | MenuItem openAudio = new MenuItem("Open _Audio");
519 | MenuItem saveJson = new MenuItem("_Save as AWS JSON Transcription");
520 | MenuItem exit = new MenuItem("_Exit");
521 |
522 | Menu helpMenu = new Menu("_Help");
523 | MenuItem about = new MenuItem("About");
524 | helpMenu.getItems().addAll(about);
525 |
526 |
527 | Alert alert = new Alert(AlertType.INFORMATION);
528 | alert.setTitle("About");
529 | alert.setHeaderText(null);
530 | alert.setContentText("Copyright 2018, Creed Alexander Erickson IV, All rights reserved.");
531 |
532 | about.setOnAction((ActionEvent ae)-> { alert.showAndWait(); });
533 |
534 | openAWSJson.setOnAction((ActionEvent ae) -> { if( (jsonFilename = TranscribeUtils.getJSONFile()) != null ) loadCenterFromJsonFile(false); });
535 | openGoogleJson.setOnAction((ActionEvent ae) -> { if( (jsonFilename = TranscribeUtils.getJSONFile()) != null ) loadCenterFromJsonFile(true); });
536 |
537 | openAudio.setOnAction((ActionEvent ae) -> {
538 | audioFilename = TranscribeUtils.getAudioFile();
539 | if(mediaPlayer != null) {
540 | mediaPlayer.dispose();
541 | mediaPlayer = null;
542 | }
543 | });
544 | saveJson.setOnAction((ActionEvent ae)->{
545 | if(awsTranscript != null) {
546 | //updateTranscriptObj(); // we shouldnt need to update since we keep things up-to-date in real time, but this might be needed if we have sync problems...
547 | TranscribeUtils.saveJsonFile(awsTranscript);
548 | }
549 | });
550 | exit.setOnAction((ActionEvent ae) -> {Platform.exit();});
551 |
552 | openAWSJson.setAccelerator(KeyCombination.keyCombination("shortcut+J"));
553 | openAudio.setAccelerator(KeyCombination.keyCombination("shortcut+M"));
554 | saveJson.setAccelerator(KeyCombination.keyCombination("shortcut+S"));
555 | exit.setAccelerator(KeyCombination.keyCombination("shortcut+X"));
556 |
557 | fileMenu.getItems().addAll(openAWSJson, openGoogleJson, openAudio, new SeparatorMenuItem(), saveJson, new SeparatorMenuItem(), exit);
558 | mb.getMenus().addAll(fileMenu,helpMenu);
559 | return mb;
560 | }
561 |
562 | private FlowPane createBottomPane() {
563 | FlowPane bottomPane = new FlowPane();
564 | bottomPane.setAlignment(Pos.CENTER);
565 |
566 | Button playButton = new Button("Play/Pause");
567 | Button slowDown = new Button("Slower");
568 | Button speedUp = new Button("Faster");
569 |
570 | Button save = new Button("Save selected words");
571 |
572 |
573 | save.setOnAction((ActionEvent ae)-> { saveWords();});
574 |
575 | playButton.setOnAction((ActionEvent ae)-> {playOrPause();});
576 | slowDown.setOnAction((ActionEvent ae)-> { if(mediaPlayer != null) mediaPlayer.setRate(mediaPlayer.getRate() - MP3_SPEED_DELTA); });
577 | speedUp.setOnAction((ActionEvent ae)-> { if(mediaPlayer != null) mediaPlayer.setRate(mediaPlayer.getRate() + MP3_SPEED_DELTA); });
578 |
579 | bottomPane.getChildren().addAll(slowDown, playButton, speedUp, save);
580 | return bottomPane;
581 | }
582 |
583 |
584 | private void saveWords() {
585 | //int savedWordCount = 0;
586 |
587 | if(audioFilename == null) { // if no mp3 file opened, provide open dialog box
588 | audioFilename = TranscribeUtils.getAudioFile();
589 | if(audioFilename == null)
590 | return;
591 | }
592 |
593 | Iterator iter = vBoxedItems.iterator();
594 | String chineseword = null; // assume we select both english and chinese words to save, and english comes first. we form the lessonData from these...
595 | while(iter.hasNext()) {
596 | VBox vbox = iter.next();
597 | CheckBox checkbox = (CheckBox) vbox.getChildren().get(5);
598 | if(checkbox.selectedProperty().getValue() == true) {
599 | String wordFilename = ((TextField)vbox.getChildren().get(0)).getText();
600 | String start_time = ((TextField)vbox.getChildren().get(3)).getText();
601 | String end_time = ((TextField)vbox.getChildren().get(4)).getText();
602 | while(iter.hasNext()) { // if consecutive vboxes are checked, find the end time by finding end_time of last box in the series
603 | vbox = iter.next();
604 | checkbox = (CheckBox) vbox.getChildren().get(5);
605 | if(checkbox.selectedProperty().getValue() == true) {
606 | if(((TextField)vbox.getChildren().get(4)).getText() != null && !((TextField)vbox.getChildren().get(4)).getText().equals("")) // if there is a new endtime, update it
607 | end_time = ((TextField)vbox.getChildren().get(4)).getText();
608 | wordFilename += " " + ((TextField)vbox.getChildren().get(0)).getText();
609 | }else {
610 | break;
611 | }
612 | }
613 | if(chineseword == null) {
614 | chineseword = wordFilename;
615 | wordFilename = "chinese/" + wordFilename;
616 | }else{
617 | String section = "grammarBuilder2";
618 | System.out.println(section+".add(new String[] { \"" + chineseword + "\", \"" + wordFilename + "\"});");
619 | chineseword= null;
620 | wordFilename = "english/" + wordFilename;
621 | }
622 | wordFilename = wordFilename.toLowerCase().replaceAll("[ .?!,()]", "");
623 | wordFilename += ".wav";
624 | saveClip(wordFilename, start_time, end_time);
625 | }
626 | }
627 | }
628 |
629 | private void saveClip(String outfilename, String start_timeStr, String end_timeStr) {
630 | try {
631 | AudioInputStream startStream = TranscribeUtils.createClip(audioFilename, start_timeStr, end_timeStr);
632 | File outfile = new File(outfilename);
633 | AudioSystem.write(startStream, Type.WAVE, outfile);
634 | startStream.close();
635 | }catch(Exception e) {
636 | e.printStackTrace();
637 | }
638 | }
639 |
640 | public void playOrPause() {
641 | playOrPause(null);
642 | }
643 |
644 | public void playOrPause(Double start_time) { // this turned messy due to mediaPlayer pause, play bug
645 | if(audioFilename == null) { // if no mp3 file opened, provide open dialog box
646 | audioFilename = TranscribeUtils.getAudioFile();
647 | if(audioFilename == null)
648 | return;
649 | }
650 |
651 | if(mediaPlayer != null) { // if there was a mediaplayer, kill old media players because .pause .play doenst work right
652 | if(start_time == null && mediaPlaying) { // before disposing, if not asked to play at a time, and we where playing, record our current time.
653 | skiptime = mediaPlayer.getCurrentTime(); // record where we were playing
654 | }
655 | mediaPlayer.dispose();
656 | }
657 |
658 | if(start_time != null) { // if asked to play at a time, play at that time
659 | skiptime = new Duration(start_time*1_000);
660 | File file = new File(audioFilename);
661 | Media media = new Media(file.toURI().toString());
662 | mediaPlayer = new MediaPlayer(media);
663 | mediaPlayer.stop();
664 | mediaPlayer.setStartTime(skiptime);
665 | mediaPlayer.seek(skiptime);
666 | mediaPlayer.play();
667 | mediaPlaying=true;
668 | }else if(mediaPlaying) { // using my own playing status because built in status doesnt update correctly
669 | //mediaPlayer.pause();
670 | mediaPlaying=false;
671 | }else {
672 | File file = new File(audioFilename);
673 | Media media = new Media(file.toURI().toString());
674 | mediaPlayer = new MediaPlayer(media);
675 | if(skiptime != null)
676 | mediaPlayer.setStartTime(skiptime); // try to fix odd mediaPlayer issue, there are bugs in mediaPlayer..... it doesnt work as it should
677 | mediaPlayer.play();
678 | mediaPlaying=true;
679 | }
680 | }
681 |
682 | public void stop() {
683 | }
684 | }
--------------------------------------------------------------------------------