├── .gitignore
├── README.md
├── desktop-subtitle.iml
├── pom.xml
└── src
└── main
├── java
└── subtitle
│ ├── Controller.java
│ ├── DragListener.java
│ ├── DragUtil.java
│ ├── Main.java
│ └── Task.java
└── resources
└── sample.fxml
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | target
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 桌面字幕——实时语音识别。
2 |
3 |
6 |
7 | 这是一个基于阿里云实时语音转写实现的桌面字幕。
8 |
9 | 代码实现非常简单,根据阿里云开发文档里面的demo代码复制粘贴就完成了。使用 `JavaFx` 实现透明窗口 + Label。
10 |
11 | 实时语音识别的效果不错,可以用于录制教学视频。
12 |
13 | 源代码:[https://github.com/yi-ge/desktop-subtitle ](https://github.com/yi-ge/desktop-subtitle)
14 |
15 | 相关博文:[https://www.wyr.me/post/602](https://www.wyr.me/post/602)
16 |
--------------------------------------------------------------------------------
/desktop-subtitle.iml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | yige.desktop.subtitles
8 | yige
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | org.apache.maven.plugins
14 | maven-compiler-plugin
15 |
16 | 6
17 | 6
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | com.google.guava
27 | guava
28 | 27.0.1-jre
29 |
30 |
31 |
32 | com.alibaba.nls
33 | nls-sdk-long-asr
34 | 2.0.3
35 |
36 |
37 |
--------------------------------------------------------------------------------
/src/main/java/subtitle/Controller.java:
--------------------------------------------------------------------------------
1 | package subtitle;
2 |
3 | import javafx.fxml.FXML;
4 | import javafx.fxml.Initializable;
5 | import javafx.scene.control.Label;
6 |
7 | import java.net.URL;
8 | import java.util.ResourceBundle;
9 |
10 | public class Controller implements Initializable {
11 |
12 | @FXML
13 | private Label label;
14 |
15 | @Override
16 | public void initialize(URL url, ResourceBundle rb) {
17 | Task task = new Task(label);
18 | new Thread(task).start();
19 |
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/java/subtitle/DragListener.java:
--------------------------------------------------------------------------------
1 | package subtitle;
2 |
3 | /**
4 | * Created by yi-ge
5 | * 2018-12-21 22:13
6 | */
7 |
8 | import javafx.event.EventHandler;
9 | import javafx.scene.Node;
10 | import javafx.scene.input.MouseEvent;
11 | import javafx.stage.Stage;
12 |
13 | /**
14 | * 拖拽监听器
15 | * @author Light
16 | */
17 | public class DragListener implements EventHandler {
18 |
19 | private double xOffset = 0;
20 | private double yOffset = 0;
21 | private final Stage stage;
22 |
23 | public DragListener(Stage stage) {
24 | this.stage = stage;
25 | }
26 |
27 | @Override
28 | public void handle(MouseEvent event) {
29 | event.consume();
30 | if (event.getEventType() == MouseEvent.MOUSE_PRESSED) {
31 | xOffset = event.getSceneX();
32 | yOffset = event.getSceneY();
33 | } else if (event.getEventType() == MouseEvent.MOUSE_DRAGGED) {
34 | stage.setX(event.getScreenX() - xOffset);
35 | if(event.getScreenY() - yOffset < 0) {
36 | stage.setY(0);
37 | }else {
38 | stage.setY(event.getScreenY() - yOffset);
39 | }
40 | }
41 | }
42 |
43 | public void enableDrag(Node node) {
44 | node.setOnMousePressed(this);
45 | node.setOnMouseDragged(this);
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/subtitle/DragUtil.java:
--------------------------------------------------------------------------------
1 | package subtitle;
2 |
3 | import javafx.scene.Node;
4 | import javafx.stage.Stage;
5 |
6 | /**
7 | * Created by yi-ge
8 | * 2018-12-21 22:13
9 | */
10 | public class DragUtil {
11 | public static void addDragListener(Stage stage, Node root) {
12 | new DragListener(stage).enableDrag(root);
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/src/main/java/subtitle/Main.java:
--------------------------------------------------------------------------------
1 | package subtitle;
2 |
3 | import com.google.common.io.Resources;
4 | import javafx.application.Application;
5 | import javafx.fxml.FXMLLoader;
6 | import javafx.geometry.Rectangle2D;
7 | import javafx.scene.Parent;
8 | import javafx.scene.Scene;
9 | import javafx.stage.Screen;
10 | import javafx.stage.Stage;
11 | import javafx.stage.StageStyle;
12 |
13 | public class Main extends Application {
14 |
15 | @Override
16 | public void start(Stage primaryStage) throws Exception{
17 | Parent root = FXMLLoader.load(Resources.getResource("sample.fxml"));
18 |
19 | final int width = 1024;
20 | final int height = 50;
21 |
22 | // Label label = new Label();
23 | // label.setContentDisplay(ContentDisplay.CENTER);
24 | // label.setTextFill(Color.web("#0076a3"));
25 | // label.setFont(new Font(32));
26 | // label.setMinWidth(1024);
27 | // label.setPrefWidth(1024);
28 | // label.setBackground(Background.EMPTY);
29 | // label.setStyle("-fx-background:transparent;");
30 |
31 | final Scene scene = new Scene(root, width, height);
32 | scene.setFill(null);
33 |
34 | final Stage stage = new Stage();
35 | stage.initStyle(StageStyle.TRANSPARENT);
36 | stage.setScene(scene);
37 | Rectangle2D primaryScreenBounds = Screen.getPrimary().getVisualBounds();
38 | stage.setX((primaryScreenBounds.getWidth() - width) / 2);
39 | stage.setY((primaryScreenBounds.getHeight() - height));
40 | stage.setAlwaysOnTop(true);
41 |
42 | // 拖动监听器
43 | DragUtil.addDragListener(stage, root);
44 | stage.show();
45 |
46 | }
47 |
48 |
49 | public static void main(String[] args) {
50 | launch(args);
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/java/subtitle/Task.java:
--------------------------------------------------------------------------------
1 | package subtitle;
2 |
3 | import com.alibaba.nls.client.protocol.InputFormatEnum;
4 | import com.alibaba.nls.client.protocol.NlsClient;
5 | import com.alibaba.nls.client.protocol.SampleRateEnum;
6 | import com.alibaba.nls.client.protocol.asr.SpeechTranscriber;
7 | import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
8 | import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse;
9 | import javafx.application.Platform;
10 | import javafx.scene.control.Label;
11 |
12 | import javax.sound.sampled.AudioFormat;
13 | import javax.sound.sampled.AudioSystem;
14 | import javax.sound.sampled.DataLine;
15 | import javax.sound.sampled.TargetDataLine;
16 | import java.util.Date;
17 |
18 | /**
19 | * Created by yi-ge
20 | * 2018-12-21 23:46
21 | */
22 | class Task implements Runnable {
23 | Label label;
24 |
25 | private String appKey = "";
26 | private String accessToken = "";
27 | NlsClient client;
28 |
29 | public Task(Label label) {
30 | this.label = label;
31 | }
32 |
33 | public SpeechTranscriberListener getTranscriberListener() {
34 | SpeechTranscriberListener listener = new SpeechTranscriberListener() {
35 | // 识别出中间结果.服务端识别出一个字或词时会返回此消息.仅当setEnableIntermediateResult(true)时,才会有此类消息返回
36 | @Override
37 | public void onTranscriptionResultChange(SpeechTranscriberResponse response) {
38 | System.out.println("name: " + response.getName() +
39 | // 状态码 20000000 表示正常识别
40 | ", status: " + response.getStatus() +
41 | // 句子编号,从1开始递增
42 | ", index: " + response.getTransSentenceIndex() +
43 | // 当前句子的中间识别结果
44 | ", result: " + response.getTransSentenceText() +
45 | // 当前已处理的音频时长,单位是毫秒
46 | ", time: " + response.getTransSentenceTime());
47 |
48 | final String r = response.getTransSentenceText();
49 |
50 | Platform.runLater(new Runnable() {
51 | @Override
52 | public void run() {
53 | // Update UI here.
54 | label.setText(r);
55 | }
56 | });
57 | }
58 |
59 | // 识别出一句话.服务端会智能断句,当识别到一句话结束时会返回此消息
60 | @Override
61 | public void onSentenceEnd(SpeechTranscriberResponse response) {
62 | System.out.println("name: " + response.getName() +
63 | // 状态码 20000000 表示正常识别
64 | ", status: " + response.getStatus() +
65 | // 句子编号,从1开始递增
66 | ", index: " + response.getTransSentenceIndex() +
67 | // 当前句子的完整识别结果
68 | ", result: " + response.getTransSentenceText() +
69 | // 当前已处理的音频时长,单位是毫秒
70 | ", time: " + response.getTransSentenceTime() +
71 | // SentenceBegin事件的时间,单位是毫秒
72 | ", begin time: " + response.getSentenceBeginTime() +
73 | // 识别结果置信度,取值范围[0.0, 1.0],值越大表示置信度越高
74 | ", confidence: " + response.getConfidence());
75 |
76 | final String r = response.getTransSentenceText();
77 |
78 | Platform.runLater(new Runnable() {
79 | @Override
80 | public void run() {
81 | // Update UI here.
82 | label.setText(r);
83 | }
84 | });
85 | }
86 |
87 | // 识别完毕
88 | @Override
89 | public void onTranscriptionComplete(SpeechTranscriberResponse response) {
90 | System.out.println("name: " + response.getName() +
91 | ", status: " + response.getStatus());
92 | }
93 | };
94 | return listener;
95 | }
96 |
97 | public void process() {
98 | SpeechTranscriber transcriber = null;
99 | try {
100 | // Step1 创建实例,建立连接
101 | transcriber = new SpeechTranscriber(client, getTranscriberListener());
102 | transcriber.setAppKey(appKey);
103 | // 输入音频编码方式
104 | transcriber.setFormat(InputFormatEnum.PCM);
105 | // 输入音频采样率
106 | transcriber.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K);
107 | // 是否返回中间识别结果
108 | transcriber.setEnableIntermediateResult(true);
109 | // 是否生成并返回标点符号
110 | transcriber.setEnablePunctuation(true);
111 | // 是否将返回结果规整化,比如将一百返回为100
112 | transcriber.setEnableITN(false);
113 |
114 | // Step2 此方法将以上参数设置序列化为json发送给服务端,并等待服务端确认
115 | transcriber.start();
116 |
117 | // Step3 读取麦克风数据
118 | AudioFormat audioFormat = new AudioFormat(16000.0F, 16, 1, true, false);
119 | DataLine.Info info = new DataLine.Info(TargetDataLine.class, audioFormat);
120 | TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
121 | targetDataLine.open(audioFormat);
122 | targetDataLine.start();
123 | Platform.runLater(new Runnable() {
124 | @Override
125 | public void run() {
126 | // Update UI here.
127 | label.setText("You can speak now!");
128 | }
129 | });
130 | // label.setText("You can speak now!");
131 | int nByte = 0;
132 | final int bufSize = 6400;
133 | byte[] buffer = new byte[bufSize];
134 | while ((nByte = targetDataLine.read(buffer, 0, bufSize)) > 0) {
135 | // Step4 直接发送麦克风数据流
136 | transcriber.send(buffer);
137 | }
138 |
139 | // Step5 通知服务端语音数据发送完毕,等待服务端处理完成
140 | transcriber.stop();
141 | } catch (Exception e) {
142 | System.err.println(e.getMessage());
143 | } finally {
144 | // Step6 关闭连接
145 | if (null != transcriber) {
146 | transcriber.close();
147 | }
148 | }
149 | }
150 |
151 | // public void shutdown() {
152 | // client.shutdown();
153 | // }
154 |
155 | @Override
156 | public void run() {
157 | // Step0 创建NlsClient实例,应用全局创建一个即可,默认服务地址为阿里云线上服务地址
158 | client = new NlsClient(accessToken);
159 | this.process();
160 | }
161 | }
--------------------------------------------------------------------------------
/src/main/resources/sample.fxml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------