├── .gitignore ├── README.md ├── desktop-subtitle.iml ├── pom.xml └── src └── main ├── java └── subtitle │ ├── Controller.java │ ├── DragListener.java │ ├── DragUtil.java │ ├── Main.java │ └── Task.java └── resources └── sample.fxml /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | target -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 桌面字幕——实时语音识别。 2 | 3 | 6 | 7 | 这是一个基于阿里云实时语音转写实现的桌面字幕。 8 | 9 | 代码实现非常简单,根据阿里云开发文档里面的demo代码复制粘贴就完成了。使用 `JavaFx` 实现透明窗口 + Label。 10 | 11 | 实时语音识别的效果不错,可以用于录制教学视频。 12 | 13 | 源代码:[https://github.com/yi-ge/desktop-subtitle ](https://github.com/yi-ge/desktop-subtitle) 14 | 15 | 相关博文:[https://www.wyr.me/post/602](https://www.wyr.me/post/602) 16 | -------------------------------------------------------------------------------- /desktop-subtitle.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | yige.desktop.subtitles 8 | yige 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | org.apache.maven.plugins 14 | maven-compiler-plugin 15 | 16 | 6 17 | 6 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | com.google.guava 27 | guava 28 | 27.0.1-jre 29 | 30 | 31 | 32 | com.alibaba.nls 33 | nls-sdk-long-asr 34 | 2.0.3 35 | 36 | 37 | -------------------------------------------------------------------------------- /src/main/java/subtitle/Controller.java: -------------------------------------------------------------------------------- 1 | package subtitle; 2 | 3 | import javafx.fxml.FXML; 4 | import javafx.fxml.Initializable; 5 | import javafx.scene.control.Label; 6 | 7 | import java.net.URL; 8 | import java.util.ResourceBundle; 9 | 10 | public class Controller implements Initializable { 11 | 12 | @FXML 13 | private Label label; 14 | 15 | @Override 16 | public void initialize(URL url, ResourceBundle rb) { 17 | Task task = new Task(label); 18 | new Thread(task).start(); 19 | 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/subtitle/DragListener.java: -------------------------------------------------------------------------------- 1 | package subtitle; 2 | 3 | /** 4 | * Created by yi-ge 5 | * 2018-12-21 22:13 6 | */ 7 | 8 | import javafx.event.EventHandler; 9 | import javafx.scene.Node; 10 | import javafx.scene.input.MouseEvent; 11 | import javafx.stage.Stage; 12 | 13 | /** 14 | * 拖拽监听器 15 | * @author Light 16 | */ 17 | public class DragListener implements EventHandler { 18 | 19 | private double xOffset = 0; 20 | private double yOffset = 0; 21 | private final Stage stage; 22 | 23 | public DragListener(Stage stage) { 24 | this.stage = stage; 25 | } 26 | 27 | @Override 28 | public void handle(MouseEvent event) { 29 | event.consume(); 30 | if (event.getEventType() == MouseEvent.MOUSE_PRESSED) { 31 | xOffset = event.getSceneX(); 32 | yOffset = event.getSceneY(); 33 | } else if (event.getEventType() == MouseEvent.MOUSE_DRAGGED) { 34 | stage.setX(event.getScreenX() - xOffset); 35 | if(event.getScreenY() - yOffset < 0) { 36 | stage.setY(0); 37 | }else { 38 | stage.setY(event.getScreenY() - yOffset); 39 | } 40 | } 41 | } 42 | 43 | public void enableDrag(Node node) { 44 | node.setOnMousePressed(this); 45 | node.setOnMouseDragged(this); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/subtitle/DragUtil.java: -------------------------------------------------------------------------------- 1 | package subtitle; 2 | 3 | import javafx.scene.Node; 4 | import javafx.stage.Stage; 5 | 6 | /** 7 | * Created by yi-ge 8 | * 2018-12-21 22:13 9 | */ 10 | public class DragUtil { 11 | public static void addDragListener(Stage stage, Node root) { 12 | new DragListener(stage).enableDrag(root); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/subtitle/Main.java: -------------------------------------------------------------------------------- 1 | package subtitle; 2 | 3 | import com.google.common.io.Resources; 4 | import javafx.application.Application; 5 | import javafx.fxml.FXMLLoader; 6 | import javafx.geometry.Rectangle2D; 7 | import javafx.scene.Parent; 8 | import javafx.scene.Scene; 9 | import javafx.stage.Screen; 10 | import javafx.stage.Stage; 11 | import javafx.stage.StageStyle; 12 | 13 | public class Main extends Application { 14 | 15 | @Override 16 | public void start(Stage primaryStage) throws Exception{ 17 | Parent root = FXMLLoader.load(Resources.getResource("sample.fxml")); 18 | 19 | final int width = 1024; 20 | final int height = 50; 21 | 22 | // Label label = new Label(); 23 | // label.setContentDisplay(ContentDisplay.CENTER); 24 | // label.setTextFill(Color.web("#0076a3")); 25 | // label.setFont(new Font(32)); 26 | // label.setMinWidth(1024); 27 | // label.setPrefWidth(1024); 28 | // label.setBackground(Background.EMPTY); 29 | // label.setStyle("-fx-background:transparent;"); 30 | 31 | final Scene scene = new Scene(root, width, height); 32 | scene.setFill(null); 33 | 34 | final Stage stage = new Stage(); 35 | stage.initStyle(StageStyle.TRANSPARENT); 36 | stage.setScene(scene); 37 | Rectangle2D primaryScreenBounds = Screen.getPrimary().getVisualBounds(); 38 | stage.setX((primaryScreenBounds.getWidth() - width) / 2); 39 | stage.setY((primaryScreenBounds.getHeight() - height)); 40 | stage.setAlwaysOnTop(true); 41 | 42 | // 拖动监听器 43 | DragUtil.addDragListener(stage, root); 44 | stage.show(); 45 | 46 | } 47 | 48 | 49 | public static void main(String[] args) { 50 | launch(args); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/subtitle/Task.java: -------------------------------------------------------------------------------- 1 | package subtitle; 2 | 3 | import com.alibaba.nls.client.protocol.InputFormatEnum; 4 | import com.alibaba.nls.client.protocol.NlsClient; 5 | import com.alibaba.nls.client.protocol.SampleRateEnum; 6 | import com.alibaba.nls.client.protocol.asr.SpeechTranscriber; 7 | import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener; 8 | import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse; 9 | import javafx.application.Platform; 10 | import javafx.scene.control.Label; 11 | 12 | import javax.sound.sampled.AudioFormat; 13 | import javax.sound.sampled.AudioSystem; 14 | import javax.sound.sampled.DataLine; 15 | import javax.sound.sampled.TargetDataLine; 16 | import java.util.Date; 17 | 18 | /** 19 | * Created by yi-ge 20 | * 2018-12-21 23:46 21 | */ 22 | class Task implements Runnable { 23 | Label label; 24 | 25 | private String appKey = ""; 26 | private String accessToken = ""; 27 | NlsClient client; 28 | 29 | public Task(Label label) { 30 | this.label = label; 31 | } 32 | 33 | public SpeechTranscriberListener getTranscriberListener() { 34 | SpeechTranscriberListener listener = new SpeechTranscriberListener() { 35 | // 识别出中间结果.服务端识别出一个字或词时会返回此消息.仅当setEnableIntermediateResult(true)时,才会有此类消息返回 36 | @Override 37 | public void onTranscriptionResultChange(SpeechTranscriberResponse response) { 38 | System.out.println("name: " + response.getName() + 39 | // 状态码 20000000 表示正常识别 40 | ", status: " + response.getStatus() + 41 | // 句子编号,从1开始递增 42 | ", index: " + response.getTransSentenceIndex() + 43 | // 当前句子的中间识别结果 44 | ", result: " + response.getTransSentenceText() + 45 | // 当前已处理的音频时长,单位是毫秒 46 | ", time: " + response.getTransSentenceTime()); 47 | 48 | final String r = response.getTransSentenceText(); 49 | 50 | Platform.runLater(new Runnable() { 51 | @Override 52 | public void run() { 53 | // Update UI here. 54 | label.setText(r); 55 | } 56 | }); 57 | } 58 | 59 | // 识别出一句话.服务端会智能断句,当识别到一句话结束时会返回此消息 60 | @Override 61 | public void onSentenceEnd(SpeechTranscriberResponse response) { 62 | System.out.println("name: " + response.getName() + 63 | // 状态码 20000000 表示正常识别 64 | ", status: " + response.getStatus() + 65 | // 句子编号,从1开始递增 66 | ", index: " + response.getTransSentenceIndex() + 67 | // 当前句子的完整识别结果 68 | ", result: " + response.getTransSentenceText() + 69 | // 当前已处理的音频时长,单位是毫秒 70 | ", time: " + response.getTransSentenceTime() + 71 | // SentenceBegin事件的时间,单位是毫秒 72 | ", begin time: " + response.getSentenceBeginTime() + 73 | // 识别结果置信度,取值范围[0.0, 1.0],值越大表示置信度越高 74 | ", confidence: " + response.getConfidence()); 75 | 76 | final String r = response.getTransSentenceText(); 77 | 78 | Platform.runLater(new Runnable() { 79 | @Override 80 | public void run() { 81 | // Update UI here. 82 | label.setText(r); 83 | } 84 | }); 85 | } 86 | 87 | // 识别完毕 88 | @Override 89 | public void onTranscriptionComplete(SpeechTranscriberResponse response) { 90 | System.out.println("name: " + response.getName() + 91 | ", status: " + response.getStatus()); 92 | } 93 | }; 94 | return listener; 95 | } 96 | 97 | public void process() { 98 | SpeechTranscriber transcriber = null; 99 | try { 100 | // Step1 创建实例,建立连接 101 | transcriber = new SpeechTranscriber(client, getTranscriberListener()); 102 | transcriber.setAppKey(appKey); 103 | // 输入音频编码方式 104 | transcriber.setFormat(InputFormatEnum.PCM); 105 | // 输入音频采样率 106 | transcriber.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K); 107 | // 是否返回中间识别结果 108 | transcriber.setEnableIntermediateResult(true); 109 | // 是否生成并返回标点符号 110 | transcriber.setEnablePunctuation(true); 111 | // 是否将返回结果规整化,比如将一百返回为100 112 | transcriber.setEnableITN(false); 113 | 114 | // Step2 此方法将以上参数设置序列化为json发送给服务端,并等待服务端确认 115 | transcriber.start(); 116 | 117 | // Step3 读取麦克风数据 118 | AudioFormat audioFormat = new AudioFormat(16000.0F, 16, 1, true, false); 119 | DataLine.Info info = new DataLine.Info(TargetDataLine.class, audioFormat); 120 | TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(info); 121 | targetDataLine.open(audioFormat); 122 | targetDataLine.start(); 123 | Platform.runLater(new Runnable() { 124 | @Override 125 | public void run() { 126 | // Update UI here. 127 | label.setText("You can speak now!"); 128 | } 129 | }); 130 | // label.setText("You can speak now!"); 131 | int nByte = 0; 132 | final int bufSize = 6400; 133 | byte[] buffer = new byte[bufSize]; 134 | while ((nByte = targetDataLine.read(buffer, 0, bufSize)) > 0) { 135 | // Step4 直接发送麦克风数据流 136 | transcriber.send(buffer); 137 | } 138 | 139 | // Step5 通知服务端语音数据发送完毕,等待服务端处理完成 140 | transcriber.stop(); 141 | } catch (Exception e) { 142 | System.err.println(e.getMessage()); 143 | } finally { 144 | // Step6 关闭连接 145 | if (null != transcriber) { 146 | transcriber.close(); 147 | } 148 | } 149 | } 150 | 151 | // public void shutdown() { 152 | // client.shutdown(); 153 | // } 154 | 155 | @Override 156 | public void run() { 157 | // Step0 创建NlsClient实例,应用全局创建一个即可,默认服务地址为阿里云线上服务地址 158 | client = new NlsClient(accessToken); 159 | this.process(); 160 | } 161 | } -------------------------------------------------------------------------------- /src/main/resources/sample.fxml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
8 |
10 |
--------------------------------------------------------------------------------