├── .gitignore ├── LICENSE ├── paddle-ocr ├── paddle-ocr-native-server │ ├── pom.xml │ ├── readme.md │ └── src │ │ └── main │ │ ├── java │ │ └── com │ │ │ └── litongjava │ │ │ └── ai │ │ │ └── server │ │ │ └── padddle │ │ │ └── ocr │ │ │ ├── IndexController.java │ │ │ ├── PaddleOcrController.java │ │ │ ├── PaddleOcrNativeServer.java │ │ │ ├── PaddlePaddleOCRNativeV4.java │ │ │ └── PaddlePaddleOCRNativeV4Demo.java │ │ └── resources │ │ └── images │ │ └── flight_ticket.jpg ├── paddle-ocr-server │ ├── Dockerfile │ ├── doc │ │ └── paddle-ocr.http │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── com │ │ │ └── litongjava │ │ │ └── ai │ │ │ └── server │ │ │ └── padddle │ │ │ └── ocr │ │ │ ├── PaddleOcrServer.java │ │ │ ├── config │ │ │ └── PaddleOcrConfig.java │ │ │ └── controller │ │ │ ├── IndexController.java │ │ │ ├── IndexHandler.java │ │ │ ├── PaddleOcrController.java │ │ │ └── PaddleOcrHandler.java │ │ └── resources │ │ └── images │ │ └── flight_ticket.jpg ├── paddle-ocr-service │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── litongjava │ │ │ │ └── ai │ │ │ │ └── djl │ │ │ │ └── paddle │ │ │ │ └── ocr │ │ │ │ └── v4 │ │ │ │ ├── OcrV4DetExample.java │ │ │ │ ├── OcrV4RecExample.java │ │ │ │ ├── OcrV4RecTensorExample.java │ │ │ │ ├── PaddlePaddleOCRV4.java │ │ │ │ ├── common │ │ │ │ ├── ImageUtils.java │ │ │ │ ├── RotatedBox.java │ │ │ │ └── RotatedBoxCompX.java │ │ │ │ ├── detection │ │ │ │ ├── OCRDetectionTranslator.java │ │ │ │ └── OcrV4Detection.java │ │ │ │ ├── opencv │ │ │ │ ├── NDArrayUtils.java │ │ │ │ └── OpenCVUtils.java │ │ │ │ └── recognition │ │ │ │ ├── OcrV4Recognition.java │ │ │ │ └── PpWordRecTranslator.java │ │ └── resources │ │ │ ├── logback.xml │ │ │ └── models │ │ │ └── readme.md │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── litongjava │ │ │ └── ai │ │ │ └── djl │ │ │ └── paddle │ │ │ └── ocr │ │ │ └── v4 │ │ │ ├── PaddlePaddleOCRV4Test.java │ │ │ ├── gpu │ │ │ └── GPUStudy.java │ │ │ └── recognition │ │ │ └── OcrV4RecognitionTest.java │ │ └── resources │ │ ├── 03.png │ │ └── 2.jpg ├── pom.xml └── readme.md ├── pom.xml ├── rapid-ocr-server ├── Dockerfile ├── deploy-win.txt ├── pom.xml ├── readme.md └── src │ └── main │ ├── java │ └── com │ │ └── litongjava │ │ └── ai │ │ └── server │ │ └── rapid │ │ └── ocr │ │ ├── RapidOcrServer.java │ │ ├── config │ │ └── RapidOcrConfig.java │ │ ├── controller │ │ ├── IndexHandler.java │ │ └── RapidOcrHandler.java │ │ └── instance │ │ └── EngineInstance.java │ └── resources │ ├── app.properties │ ├── images │ └── flight_ticket.jpg │ └── logback.xml ├── readme.md ├── s.yaml └── whisper-asr ├── pom.xml ├── readme.md ├── whisper-asr-server ├── docker │ ├── 1.0.0-base.en │ ├── 1.0.0-large │ ├── 1.0.1 │ └── readme.md ├── models │ └── readme.md ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── litongjava │ │ └── aio │ │ └── server │ │ └── tio │ │ ├── WhisperAsrServer.java │ │ ├── config │ │ └── WhisperAsrConfig.java │ │ └── controller │ │ ├── EnviormentController.java │ │ ├── IndexController.java │ │ ├── SystemController.java │ │ └── WhisperAsrController.java │ └── resources │ └── app.properties └── whisper-asr-service ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── litongjava │ │ └── ai │ │ └── server │ │ ├── enumeration │ │ ├── AudioType.java │ │ └── TextType.java │ │ ├── model │ │ └── WhisperSegment.java │ │ ├── property │ │ └── WhiserAsrProperties.java │ │ ├── service │ │ ├── TextService.java │ │ ├── WhisperCppBaseService.java │ │ ├── WhisperCppJni.java │ │ ├── WhisperCppLargeService.java │ │ └── WhisperCppService.java │ │ ├── single │ │ ├── LocalBaseWhisper.java │ │ ├── LocalLargeWhisper.java │ │ └── LocalWhisper.java │ │ └── utils │ │ ├── JFramUtils.java │ │ ├── MatPanel.java │ │ ├── Mp3Util.java │ │ ├── WhisperAudioUtils.java │ │ └── WhisperExecutorServiceUtils.java └── resources │ └── logback.xml └── test └── java └── com └── litongjava └── ai └── server ├── service └── WhisperCppServiceMultiThreadTest.java ├── single └── LocalLargeWhisperTest.java └── utils └── Mp3UtilTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | ### Eclipse template 2 | *.pydevproject 3 | .metadata 4 | .gradle* 5 | classes/ 6 | bin/ 7 | tmp/ 8 | *.tmp 9 | *.bak 10 | *.swp 11 | *~.nib 12 | local.properties 13 | .settings/ 14 | .loadpath 15 | rebel.xml 16 | 17 | # Eclipse Core 18 | .project 19 | 20 | generatedsources 21 | 22 | # External tool builders 23 | .externalToolBuilders/ 24 | 25 | # Locally stored "Eclipse launch configurations" 26 | *.launch 27 | 28 | # CDT-specific 29 | .cproject 30 | 31 | # JDT-specific (Eclipse Java Development Tools) 32 | .classpath 33 | 34 | # PDT-specific 35 | .buildpath 36 | 37 | # sbteclipse plugin 38 | .target 39 | 40 | # TeXlipse plugin 41 | .texlipse 42 | 43 | 44 | 45 | ### JetBrains template 46 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm 47 | 48 | *.iml 49 | .flattened-pom.xml 50 | ## Directory-based project format: 51 | .idea/ 52 | # if you remove the above rule, at least ignore the following: 53 | 54 | # User-specific stuff: 55 | # .idea/workspace.xml 56 | # .idea/tasks.xml 57 | # .idea/dictionaries 58 | 59 | # Sensitive or high-churn files: 60 | # .idea/dataSources.ids 61 | # .idea/dataSources.xml 62 | # .idea/sqlDataSources.xml 63 | # .idea/dynamic.xml 64 | # .idea/uiDesigner.xml 65 | 66 | # Gradle: 67 | # .idea/gradle.xml 68 | # .idea/libraries 69 | 70 | # Mongo Explorer plugin: 71 | # .idea/mongoSettings.xml 72 | 73 | ## File-based project format: 74 | *.ipr 75 | *.iws 76 | 77 | ## Plugin-specific files: 78 | 79 | # IntelliJ 80 | /out/ 81 | 82 | # mpeltonen/sbt-idea plugin 83 | .idea_modules/ 84 | 85 | # JIRA plugin 86 | atlassian-ide-plugin.xml 87 | 88 | # Crashlytics plugin (for Android Studio and IntelliJ) 89 | com_crashlytics_export_strings.xml 90 | crashlytics.properties 91 | crashlytics-build.properties 92 | 93 | build/ 94 | 95 | # Ignore Gradle GUI config 96 | gradle-app.setting 97 | 98 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored) 99 | !gradle-wrapper.jar 100 | 101 | db 102 | 103 | ### Java template 104 | *.class 105 | 106 | # Mobile Tools for Java (J2ME) 107 | .mtj.tmp/ 108 | 109 | # Package Files # 110 | #*.jar 111 | 112 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 113 | hs_err_pid* 114 | 115 | 116 | ### Leiningen template 117 | target/ 118 | logs/ 119 | checkouts/ 120 | .lein-deps-sum 121 | .lein-repl-history 122 | .lein-plugins/ 123 | .lein-failures 124 | .nrepl-port 125 | 126 | querydsl/ 127 | 128 | .DS_Store 129 | 130 | *.log 131 | node_modules/ 132 | dist/ 133 | dist.zip 134 | package-lock.json 135 | *.wav 136 | *.mp3 137 | *.onnx 138 | *.zip -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 李通 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-native-server/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | com.litongjava 5 | paddle-ocr 6 | 1.0.0 7 | 8 | paddle-ocr-native-server 9 | 10 | UTF-8 11 | 1.8 12 | ${java.version} 13 | ${java.version} 14 | 15 | 23.1.1 16 | com.litongjava.ai.server.padddle.ocr.PaddleOcrNativeServer 17 | 18 | 19 | 20 | 21 | io.github.mymonstercat 22 | rapidocr 23 | 0.0.7 24 | 25 | 26 | 27 | 28 | 29 | io.github.mymonstercat 30 | rapidocr-onnx-platform 31 | 0.0.7 32 | 33 | 34 | 35 | com.litongjava 36 | tio-http-server 37 | 3.7.3.v20231223-RELEASE 38 | 39 | 40 | 41 | ${project.artifactId} 42 | 43 | 44 | 45 | jar 46 | 47 | true 48 | 49 | 50 | 51 | 52 | ch.qos.logback 53 | logback-classic 54 | 1.2.3 55 | 56 | 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-jar-plugin 62 | 3.2.0 63 | 64 | 65 | org.apache.maven.plugins 66 | maven-assembly-plugin 67 | 3.1.1 68 | 69 | 70 | 71 | ${mainClass.server} 72 | 73 | 74 | 75 | jar-with-dependencies 76 | 77 | false 78 | 79 | 80 | 81 | make-assembly 82 | package 83 | 84 | single 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | server-graalvm 94 | 95 | 96 | 97 | org.slf4j 98 | slf4j-jdk14 99 | 1.7.31 100 | 101 | 102 | 103 | org.graalvm.sdk 104 | graal-sdk 105 | ${graalvm.version} 106 | provided 107 | 108 | 109 | 110 | tio-http-server-graal 111 | 112 | 113 | org.graalvm.nativeimage 114 | native-image-maven-plugin 115 | 21.2.0 116 | 117 | 118 | 119 | native-image 120 | 121 | package 122 | 123 | 124 | 125 | false 126 | ${project.artifactId} 127 | ${mainClass.server} 128 | 129 | -H:+RemoveSaturatedTypeFlows 130 | --allow-incomplete-classpath 131 | --no-fallback 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-native-server/readme.md: -------------------------------------------------------------------------------- 1 | ## 2 | ### 打包失败,错误日志如下,可能还是需要使用jni的方案才支持编译成二进制文件 3 | ``` 4 | Error: Class-path entry file:///root/.m2/repository/com/microsoft/onnxruntime/onnxruntime/1.16.0/onnxruntime-1.16.0.jar contains class ai.onnxruntime.ValueInfo. This class is part of the image builder itself (in file:///root/program/graalvm-jdk-21.0.1+12.1/lib/svm/builder/svm-enterprise.jar) and must not be passed via -cp. This can be caused by a fat-jar that illegally includes svm.jar (or graal-sdk.jar) due to its build-time dependency on it. As a workaround, -H:+AllowDeprecatedBuilderClassesOnImageClasspath allows turning this error into a warning. Note that this option is deprecated and will be removed in a future version. 5 | ``` 6 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-native-server/src/main/java/com/litongjava/ai/server/padddle/ocr/IndexController.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.padddle.ocr; 2 | 3 | import com.litongjava.tio.http.common.HttpRequest; 4 | import com.litongjava.tio.http.common.HttpResponse; 5 | import com.litongjava.tio.http.server.util.Resps; 6 | 7 | public class IndexController { 8 | 9 | public HttpResponse index(HttpRequest request) { 10 | return Resps.txt(request, "paddle-ocr-native-server"); 11 | 12 | } 13 | 14 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-native-server/src/main/java/com/litongjava/ai/server/padddle/ocr/PaddleOcrController.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.padddle.ocr; 2 | 3 | import java.net.URL; 4 | 5 | import com.litongjava.tio.http.common.HttpRequest; 6 | import com.litongjava.tio.http.common.HttpResponse; 7 | import com.litongjava.tio.http.common.UploadFile; 8 | import com.litongjava.tio.http.server.util.Resps; 9 | import com.litongjava.tio.utils.hutool.ResourceUtil; 10 | import com.litongjava.tio.utils.resp.RespVo; 11 | 12 | //@EnableCORS 13 | //@Controller 14 | //@RequestPath("/paddle/ocr") 15 | public class PaddleOcrController { 16 | 17 | // @RequestPath(value = "/rec") 18 | public HttpResponse rec(HttpRequest request) throws Exception { 19 | String url = request.getParam("url"); 20 | UploadFile file = request.getUploadFile("file"); 21 | String text = null; 22 | if (url != null) { 23 | text = PaddlePaddleOCRNativeV4.INSTANCE.ocr(url); 24 | } else if (file != null) { 25 | byte[] fileData = file.getData(); 26 | text = PaddlePaddleOCRNativeV4.INSTANCE.ocr(fileData); 27 | } 28 | if (text != null) { 29 | return Resps.json(request, RespVo.ok(text)); 30 | } else { 31 | return Resps.json(request, RespVo.fail()); 32 | } 33 | } 34 | 35 | // @RequestPath("/test") 36 | public HttpResponse test(HttpRequest request) throws Exception { 37 | URL resource = ResourceUtil.getResource("images/flight_ticket.jpg"); 38 | return Resps.json(request, RespVo.ok(PaddlePaddleOCRNativeV4.INSTANCE.ocr(resource))); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-native-server/src/main/java/com/litongjava/ai/server/padddle/ocr/PaddleOcrNativeServer.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.padddle.ocr; 2 | 3 | import java.io.IOException; 4 | 5 | import com.litongjava.tio.http.common.HttpConfig; 6 | import com.litongjava.tio.http.common.handler.HttpRequestHandler; 7 | import com.litongjava.tio.http.server.HttpServerStarter; 8 | import com.litongjava.tio.http.server.handler.HttpRoutes; 9 | import com.litongjava.tio.http.server.handler.SimpleHttpDispatcherHandler; 10 | import com.litongjava.tio.http.server.handler.SimpleHttpRoutes; 11 | 12 | public class PaddleOcrNativeServer { 13 | 14 | public static void main(String[] args) throws IOException { 15 | 16 | // init ocr 17 | // 实例化Controller 18 | IndexController controller = new IndexController(); 19 | PaddleOcrController paddleOcrController = new PaddleOcrController(); 20 | 21 | // 手动添加路由 22 | HttpRoutes simpleHttpRoutes = new SimpleHttpRoutes(); 23 | simpleHttpRoutes.add("/", controller::index); 24 | 25 | simpleHttpRoutes.add("/paddle/ocr/test", paddleOcrController::test); 26 | simpleHttpRoutes.add("/paddle/ocr/rec", paddleOcrController::rec); 27 | 28 | // 配置服务服务器 29 | HttpConfig httpConfig; 30 | HttpRequestHandler requestHandler; 31 | HttpServerStarter httpServerStarter; 32 | 33 | httpConfig = new HttpConfig(80, null, null, null); 34 | requestHandler = new SimpleHttpDispatcherHandler(httpConfig, simpleHttpRoutes); 35 | httpServerStarter = new HttpServerStarter(httpConfig, requestHandler); 36 | // 启动服务器 37 | httpServerStarter.start(); 38 | } 39 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-native-server/src/main/java/com/litongjava/ai/server/padddle/ocr/PaddlePaddleOCRNativeV4.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.padddle.ocr; 2 | 3 | import java.net.URL; 4 | 5 | public enum PaddlePaddleOCRNativeV4 { 6 | INSTANCE; 7 | 8 | String ocr(String url) { 9 | // TODO Auto-generated method stub 10 | return null; 11 | } 12 | 13 | String ocr(byte[] fileData) { 14 | // TODO Auto-generated method stub 15 | return null; 16 | } 17 | 18 | String ocr(URL resource) { 19 | // TODO Auto-generated method stub 20 | return null; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-native-server/src/main/java/com/litongjava/ai/server/padddle/ocr/PaddlePaddleOCRNativeV4Demo.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.padddle.ocr; 2 | 3 | import com.benjaminwan.ocrlibrary.OcrResult; 4 | 5 | import io.github.mymonstercat.Model; 6 | import io.github.mymonstercat.ocr.InferenceEngine; 7 | 8 | public class PaddlePaddleOCRNativeV4Demo { 9 | 10 | public static void main(String[] args) { 11 | InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); 12 | OcrResult ocrResult = engine.runOcr("E:\\code\\python\\project-litongjava\\cyg-v2\\img.png"); 13 | System.out.println(ocrResult.getStrRes().trim()); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-native-server/src/main/resources/images/flight_ticket.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/paddle-ocr/paddle-ocr-native-server/src/main/resources/images/flight_ticket.jpg -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-server/Dockerfile: -------------------------------------------------------------------------------- 1 | # build litongjava/paddle-ocr-server:1.0.1 2 | # Use litongjava/jdk:8u211 as the base image 3 | FROM litongjava/centos-8-jdk:8u341 4 | 5 | # Set the working directory in the container 6 | WORKDIR /app 7 | 8 | # Copy the jar file into the container 9 | COPY target/paddle-ocr-server-1.0.1.jar /app/ 10 | 11 | # download file 12 | RUN java -jar /app/paddle-ocr-server-1.0.1.jar --download 13 | 14 | # Command to run the jar file 15 | CMD ["java", "-jar", "paddle-ocr-server-1.0.1.jar", "--mode=prod"] 16 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-server/doc/paddle-ocr.http: -------------------------------------------------------------------------------- 1 | curl --location --request POST 'http://localhost/paddle/ocr/rec' \ 2 | --form 'file=@"E:\code\python\project-litongjava\cyg-v2\img.png" -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-server/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | com.litongjava 5 | paddle-ocr 6 | 1.0.0 7 | 8 | paddle-ocr-server 9 | 1.0.4 10 | 11 | UTF-8 12 | 1.8 13 | ${java.version} 14 | ${java.version} 15 | 23.1.1 16 | 1.4.0 17 | 1.18.30 18 | 1.2.1 19 | ocr-server 20 | com.litongjava.ai.server.padddle.ocr.PaddleOcrServer 21 | 22 | 23 | 24 | com.litongjava 25 | paddle-ocr-service 26 | 1.0.0 27 | 28 | 29 | com.litongjava 30 | tio-boot 31 | ${tio.boot.version} 32 | 33 | 34 | org.projectlombok 35 | lombok 36 | ${lombok-version} 37 | true 38 | provided 39 | 40 | 41 | 42 | 43 | 44 | development 45 | 46 | true 47 | 48 | 49 | 50 | ch.qos.logback 51 | logback-classic 52 | 1.2.3 53 | 54 | 55 | 56 | 57 | 58 | 59 | production 60 | 61 | 62 | ch.qos.logback 63 | logback-classic 64 | 1.2.3 65 | 66 | 67 | 68 | 69 | 70 | org.springframework.boot 71 | spring-boot-maven-plugin 72 | 2.7.4 73 | 74 | ${main.class} 75 | org.projectlombok 76 | 77 | 78 | 79 | 80 | 81 | repackage 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | assembly 92 | 93 | 94 | ch.qos.logback 95 | logback-classic 96 | 1.2.3 97 | 98 | 99 | 100 | 101 | 102 | org.apache.maven.plugins 103 | maven-jar-plugin 104 | 3.2.0 105 | 106 | 107 | org.apache.maven.plugins 108 | maven-assembly-plugin 109 | 3.1.1 110 | 111 | 112 | 113 | ${main.class} 114 | 115 | 116 | 117 | jar-with-dependencies 118 | 119 | false 120 | 121 | 122 | 123 | make-assembly 124 | package 125 | 126 | single 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | native 136 | 137 | 138 | 139 | org.slf4j 140 | slf4j-jdk14 141 | 1.7.31 142 | 143 | 144 | 145 | org.graalvm.sdk 146 | graal-sdk 147 | ${graalvm.version} 148 | provided 149 | 150 | 151 | 152 | ${final.name} 153 | 154 | 155 | org.graalvm.nativeimage 156 | native-image-maven-plugin 157 | 21.2.0 158 | 159 | 160 | 161 | native-image 162 | 163 | package 164 | 165 | 166 | 167 | false 168 | ${project.build.finalName} 169 | ${main.class} 170 | 171 | -H:+RemoveSaturatedTypeFlows 172 | --allow-incomplete-classpath 173 | --no-fallback 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/PaddleOcrServer.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.padddle.ocr; 2 | 3 | import java.util.Arrays; 4 | 5 | import com.litongjava.ai.djl.paddle.ocr.v4.PaddlePaddleOCRV4; 6 | import com.litongjava.jfinal.aop.annotation.AComponentScan; 7 | import com.litongjava.tio.boot.TioApplication; 8 | 9 | import cn.hutool.core.io.resource.ResourceUtil; 10 | import lombok.extern.slf4j.Slf4j; 11 | 12 | @AComponentScan 13 | @Slf4j 14 | public class PaddleOcrServer { 15 | 16 | public static void main(String[] args) throws Exception { 17 | 18 | boolean downloadMode = Arrays.asList(args).contains("--download"); 19 | if (downloadMode) { 20 | log.info("downloadMode:{}", downloadMode); 21 | downloadAndTest(); 22 | } else { 23 | long start = System.currentTimeMillis(); 24 | TioApplication.run(PaddleOcrServer.class, args); 25 | long end = System.currentTimeMillis(); 26 | System.out.println("started:" + (end - start) + "(ms)"); 27 | } 28 | } 29 | 30 | private static void downloadAndTest() throws Exception { 31 | PaddlePaddleOCRV4.INSTANCE.init(); 32 | long start = System.currentTimeMillis(); 33 | String ocr = PaddlePaddleOCRV4.INSTANCE.ocr(ResourceUtil.getResource("images/flight_ticket.jpg")); 34 | long end = System.currentTimeMillis(); 35 | System.out.println(ocr); 36 | System.out.println("inference time:" + (end - start) + "ms"); 37 | } 38 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/config/PaddleOcrConfig.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.padddle.ocr.config; 2 | 3 | import com.litongjava.ai.djl.paddle.ocr.v4.PaddlePaddleOCRV4; 4 | import com.litongjava.ai.server.padddle.ocr.controller.IndexHandler; 5 | import com.litongjava.ai.server.padddle.ocr.controller.PaddleOcrHandler; 6 | import com.litongjava.jfinal.aop.annotation.AInitialization; 7 | import com.litongjava.jfinal.aop.annotation.BeforeStartConfiguration; 8 | import com.litongjava.tio.boot.server.TioBootServer; 9 | import com.litongjava.tio.http.server.handler.SimpleHttpRoutes; 10 | 11 | @BeforeStartConfiguration 12 | public class PaddleOcrConfig { 13 | @AInitialization 14 | public void initOcr() { 15 | // init ocr 16 | PaddlePaddleOCRV4.INSTANCE.init(); 17 | // init handler 18 | 19 | // 创建simpleHttpRoutes 20 | SimpleHttpRoutes simpleHttpRoutes = new SimpleHttpRoutes(); 21 | // 创建controller 22 | IndexHandler indexHandler = new IndexHandler(); 23 | PaddleOcrHandler paddleOcrHandler = new PaddleOcrHandler(); 24 | 25 | // 添加action 26 | simpleHttpRoutes.add("/", indexHandler::index); 27 | simpleHttpRoutes.add("/paddle/ocr/rec", paddleOcrHandler::rec); 28 | simpleHttpRoutes.add("/paddle/ocr/test", paddleOcrHandler::test); 29 | 30 | // 将simpleHttpRoutes添加到TioBootServer 31 | TioBootServer.me().setHttpRoutes(simpleHttpRoutes); 32 | 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/controller/IndexController.java: -------------------------------------------------------------------------------- 1 | //package com.litongjava.ai.server.padddle.ocr.controller; 2 | // 3 | //import com.litongjava.jfinal.aop.annotation.AController; 4 | //import com.litongjava.tio.http.server.annotation.EnableCORS; 5 | //import com.litongjava.tio.http.server.annotation.RequestPath; 6 | // 7 | //@EnableCORS 8 | //@AController 9 | //@RequestPath(value = "/") 10 | //public class IndexController { 11 | // @RequestPath() 12 | // public String respText() { 13 | // return "paddle-ocr-server"; 14 | // } 15 | //} -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/controller/IndexHandler.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.padddle.ocr.controller; 2 | 3 | import com.litongjava.tio.http.common.HttpRequest; 4 | import com.litongjava.tio.http.common.HttpResponse; 5 | import com.litongjava.tio.http.server.util.Resps; 6 | 7 | public class IndexHandler { 8 | 9 | public HttpResponse index(HttpRequest httpRequest) { 10 | return Resps.txt(httpRequest, "paddle-ocr-server"); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/controller/PaddleOcrController.java: -------------------------------------------------------------------------------- 1 | //package com.litongjava.ai.server.padddle.ocr.controller; 2 | // 3 | //import java.net.URL; 4 | // 5 | //import com.litongjava.ai.djl.paddle.ocr.v4.PaddlePaddleOCRV4; 6 | //import com.litongjava.jfinal.aop.annotation.AController; 7 | //import com.litongjava.tio.http.common.HttpRequest; 8 | //import com.litongjava.tio.http.common.HttpResponse; 9 | //import com.litongjava.tio.http.common.UploadFile; 10 | //import com.litongjava.tio.http.server.annotation.EnableCORS; 11 | //import com.litongjava.tio.http.server.annotation.RequestPath; 12 | //import com.litongjava.tio.http.server.util.Resps; 13 | //import com.litongjava.tio.utils.resp.RespVo; 14 | // 15 | //import cn.hutool.core.io.resource.ResourceUtil; 16 | // 17 | //@EnableCORS 18 | //@AController 19 | //@RequestPath("/paddle/ocr") 20 | //public class PaddleOcrController { 21 | // 22 | // @RequestPath(value = "/rec") 23 | // public HttpResponse index(UploadFile file, String url, HttpRequest request) throws Exception { 24 | // String text = null; 25 | // if (url != null) { 26 | // text = PaddlePaddleOCRV4.INSTANCE.ocr(url); 27 | // } else if (file != null) { 28 | // byte[] fileData = file.getData(); 29 | // text = PaddlePaddleOCRV4.INSTANCE.ocr(fileData); 30 | // } 31 | // if (text != null) { 32 | // return Resps.json(request, RespVo.ok(text)); 33 | // } else { 34 | // return Resps.json(request, RespVo.fail()); 35 | // } 36 | // } 37 | // 38 | // @RequestPath("/test") 39 | // public HttpResponse test(HttpRequest request) throws Exception { 40 | // URL resource = ResourceUtil.getResource("images/flight_ticket.jpg"); 41 | // return Resps.json(request, RespVo.ok(PaddlePaddleOCRV4.INSTANCE.ocr(resource))); 42 | // } 43 | // 44 | //} 45 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/controller/PaddleOcrHandler.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.padddle.ocr.controller; 2 | 3 | import java.net.URL; 4 | 5 | import com.litongjava.ai.djl.paddle.ocr.v4.PaddlePaddleOCRV4; 6 | import com.litongjava.tio.http.common.HttpRequest; 7 | import com.litongjava.tio.http.common.HttpResponse; 8 | import com.litongjava.tio.http.common.UploadFile; 9 | import com.litongjava.tio.http.server.model.HttpCors; 10 | import com.litongjava.tio.http.server.util.HttpServerResponseUtils; 11 | import com.litongjava.tio.http.server.util.Resps; 12 | import com.litongjava.tio.utils.hutool.ResourceUtil; 13 | import com.litongjava.tio.utils.resp.RespVo; 14 | 15 | public class PaddleOcrHandler { 16 | 17 | public HttpResponse rec(HttpRequest httprequest) throws Exception { 18 | UploadFile file = httprequest.getUploadFile("file"); 19 | String url = httprequest.getParam("url"); 20 | String text = null; 21 | if (url != null) { 22 | text = PaddlePaddleOCRV4.INSTANCE.ocr(url); 23 | } else if (file != null) { 24 | byte[] fileData = file.getData(); 25 | text = PaddlePaddleOCRV4.INSTANCE.ocr(fileData); 26 | } 27 | HttpResponse httpResponse = null; 28 | if (text != null) { 29 | httpResponse = Resps.json(httprequest, RespVo.ok(text)); 30 | return httpResponse; 31 | } else { 32 | httpResponse = Resps.json(httprequest, RespVo.fail()); 33 | } 34 | HttpServerResponseUtils.enableCORS(httpResponse, new HttpCors()); 35 | return httpResponse; 36 | } 37 | 38 | public HttpResponse test(HttpRequest httprequest) throws Exception { 39 | URL resource = ResourceUtil.getResource("images/flight_ticket.jpg"); 40 | return Resps.json(httprequest, RespVo.ok(PaddlePaddleOCRV4.INSTANCE.ocr(resource))); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-server/src/main/resources/images/flight_ticket.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/paddle-ocr/paddle-ocr-server/src/main/resources/images/flight_ticket.jpg -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | com.litongjava 5 | paddle-ocr 6 | 1.0.0 7 | 8 | paddle-ocr-service 9 | 10 | UTF-8 11 | 1.8 12 | ${java.version} 13 | ${java.version} 14 | 1.18.30 15 | 1.2.3 16 | 2.0.24 17 | 0.25.0 18 | 5.8.11 19 | 20 | 21 | 22 | 23 | 24 | cn.hutool 25 | hutool-all 26 | ${hutool.version} 27 | 28 | 29 | 30 | org.apache.pdfbox 31 | pdfbox 32 | ${pdfbox.version} 33 | 34 | 35 | 36 | org.projectlombok 37 | lombok 38 | ${lombok.version} 39 | provided 40 | 41 | 42 | ch.qos.logback 43 | logback-classic 44 | ${logback.version} 45 | 46 | 47 | 48 | 49 | ai.djl 50 | api 51 | ${djl.version} 52 | 53 | 54 | ai.djl 55 | basicdataset 56 | ${djl.version} 57 | 58 | 59 | ai.djl 60 | model-zoo 61 | ${djl.version} 62 | 63 | 64 | 65 | 66 | ai.djl.pytorch 67 | pytorch-engine 68 | ${djl.version} 69 | runtime 70 | 71 | 72 | 73 | ai.djl.pytorch 74 | pytorch-jni 75 | 1.13.1-0.25.0 76 | runtime 77 | 78 | 79 | 80 | ai.djl.pytorch 81 | pytorch-native-cu117 82 | win-x86_64 83 | 1.13.1 84 | runtime 85 | 86 | 87 | 88 | 89 | ai.djl.onnxruntime 90 | onnxruntime-engine 91 | ${djl.version} 92 | runtime 93 | 94 | 95 | com.microsoft.onnxruntime 96 | onnxruntime 97 | 98 | 99 | 100 | 101 | 102 | 103 | com.microsoft.onnxruntime 104 | onnxruntime_gpu 105 | 1.14.0 106 | runtime 107 | 108 | 109 | 110 | ai.djl.opencv 111 | opencv 112 | ${djl.version} 113 | 114 | 115 | 116 | junit 117 | junit 118 | 4.13.2 119 | test 120 | 121 | 122 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/OcrV4DetExample.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | import java.nio.file.Paths; 6 | 7 | import org.opencv.core.Mat; 8 | 9 | import com.litongjava.ai.djl.paddle.ocr.v4.common.ImageUtils; 10 | import com.litongjava.ai.djl.paddle.ocr.v4.detection.OcrV4Detection; 11 | 12 | import ai.djl.ModelException; 13 | import ai.djl.inference.Predictor; 14 | import ai.djl.modality.cv.Image; 15 | import ai.djl.ndarray.NDList; 16 | import ai.djl.ndarray.NDManager; 17 | import ai.djl.opencv.OpenCVImageFactory; 18 | import ai.djl.repository.zoo.ModelZoo; 19 | import ai.djl.repository.zoo.ZooModel; 20 | import ai.djl.translate.TranslateException; 21 | 22 | public final class OcrV4DetExample { 23 | 24 | private OcrV4DetExample() { 25 | } 26 | 27 | public static void main(String[] args) throws IOException, ModelException, TranslateException { 28 | Path imageFile = Paths.get("src/test/resources/2.jpg"); 29 | Image image = OpenCVImageFactory.getInstance().fromFile(imageFile); 30 | 31 | OcrV4Detection detection = new OcrV4Detection(); 32 | try (@SuppressWarnings("rawtypes") 33 | ZooModel detectionModel = ModelZoo.loadModel(detection.chDetCriteria()); @SuppressWarnings("unchecked") 34 | Predictor detector = detectionModel.newPredictor(); 35 | NDManager manager = NDManager.newBaseManager();) { 36 | 37 | NDList dt_boxes = detector.predict(image); 38 | // 交给 NDManager自动管理内存 39 | // attach to manager for automatic memory management 40 | dt_boxes.attach(manager); 41 | 42 | for (int i = 0; i < dt_boxes.size(); i++) { 43 | ImageUtils.drawRect((Mat) image.getWrappedImage(), dt_boxes.get(i)); 44 | } 45 | ImageUtils.saveImage(image, "detect_rect.png", "build/output"); 46 | ((Mat) image.getWrappedImage()).release(); 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/OcrV4RecExample.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4; 2 | 3 | import java.awt.image.BufferedImage; 4 | import java.io.IOException; 5 | import java.nio.file.Path; 6 | import java.nio.file.Paths; 7 | import java.util.ArrayList; 8 | import java.util.Collections; 9 | import java.util.List; 10 | 11 | import org.opencv.core.Mat; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import com.litongjava.ai.djl.paddle.ocr.v4.common.ImageUtils; 16 | import com.litongjava.ai.djl.paddle.ocr.v4.common.RotatedBox; 17 | import com.litongjava.ai.djl.paddle.ocr.v4.common.RotatedBoxCompX; 18 | import com.litongjava.ai.djl.paddle.ocr.v4.detection.OcrV4Detection; 19 | import com.litongjava.ai.djl.paddle.ocr.v4.opencv.OpenCVUtils; 20 | import com.litongjava.ai.djl.paddle.ocr.v4.recognition.OcrV4Recognition; 21 | 22 | import ai.djl.ModelException; 23 | import ai.djl.inference.Predictor; 24 | import ai.djl.modality.cv.Image; 25 | import ai.djl.ndarray.NDList; 26 | import ai.djl.ndarray.NDManager; 27 | import ai.djl.opencv.OpenCVImageFactory; 28 | import ai.djl.repository.zoo.ModelZoo; 29 | import ai.djl.repository.zoo.ZooModel; 30 | import ai.djl.translate.TranslateException; 31 | 32 | /** 33 | * OCR V4模型 文字识别. 支持文本有旋转角度 34 | * OCR V4 model for text recognition. Supports text with rotation angles. 35 | */ 36 | public final class OcrV4RecExample { 37 | 38 | private static final Logger logger = LoggerFactory.getLogger(OcrV4RecExample.class); 39 | 40 | private OcrV4RecExample() { 41 | } 42 | 43 | public static void main(String[] args) throws IOException, ModelException, TranslateException { 44 | // IDEA 45 | Path imageFile = Paths.get("E:\\code\\python\\project-litongjava\\cyg-v2\\img.png"); 46 | Image image = OpenCVImageFactory.getInstance().fromFile(imageFile); 47 | 48 | OcrV4Detection detection = new OcrV4Detection(); 49 | OcrV4Recognition recognition = new OcrV4Recognition(); 50 | try (ZooModel detectionModel = ModelZoo.loadModel(detection.chDetCriteria()); 51 | Predictor detector = detectionModel.newPredictor(); 52 | ZooModel recognitionModel = ModelZoo.loadModel(recognition.chRecCriteria()); 53 | Predictor recognizer = recognitionModel.newPredictor(); 54 | NDManager manager = NDManager.newBaseManager()) { 55 | 56 | long timeInferStart = System.currentTimeMillis(); 57 | List detections = recognition.predict(manager, image, detector, recognizer); 58 | 59 | // for (int i = 0; i < 1000; i++) { 60 | // detections = recognition.predict(image, detector, recognizer); 61 | // for (RotatedBox result : detections) { 62 | // System.out.println(result.getText()); 63 | // } 64 | // System.out.println("index : " + i); 65 | // } 66 | 67 | long timeInferEnd = System.currentTimeMillis(); 68 | System.out.println("time: " + (timeInferEnd - timeInferStart)); 69 | 70 | // 对检测结果根据坐标位置,根据从上到下,从做到右,重新排序,下面算法对图片倾斜旋转角度较小的情形适用 71 | // 如果图片旋转角度较大,则需要自行改进算法,需要根据斜率校正计算位置。 72 | // Reorder the detection results based on the coordinate positions, from top to bottom, from left to right. The algorithm below is suitable for situations where the image is slightly tilted or rotated. 73 | // If the image rotation angle is large, the algorithm needs to be improved, and the position needs to be calculated based on the slope correction. 74 | List initList = new ArrayList<>(); 75 | if (detections != null) { 76 | for (RotatedBox result : detections) { 77 | // put low Y value at the head of the queue. 78 | initList.add(result); 79 | } 80 | } 81 | 82 | Collections.sort(initList); 83 | 84 | List> lines = new ArrayList<>(); 85 | List line = new ArrayList<>(); 86 | if (initList.size() > 0) { 87 | RotatedBoxCompX firstBox = new RotatedBoxCompX(initList.get(0).getBox(), initList.get(0).getText()); 88 | line.add(firstBox); 89 | lines.add((ArrayList) line); 90 | for (int i = 1; i < initList.size(); i++) { 91 | RotatedBoxCompX tmpBox = new RotatedBoxCompX(initList.get(i).getBox(), initList.get(i).getText()); 92 | float y1 = firstBox.getBox().toFloatArray()[1]; 93 | float y2 = tmpBox.getBox().toFloatArray()[1]; 94 | float dis = Math.abs(y2 - y1); 95 | if (dis < 20) { // 认为是同 1 行 - Considered to be in the same line 96 | line.add(tmpBox); 97 | } else { // 换行 - Line break 98 | firstBox = tmpBox; 99 | Collections.sort(line); 100 | line = new ArrayList<>(); 101 | line.add(firstBox); 102 | lines.add((ArrayList) line); 103 | } 104 | } 105 | } 106 | 107 | 108 | String fullText = ""; 109 | for (int i = 0; i < lines.size(); i++) { 110 | for (int j = 0; j < lines.get(i).size(); j++) { 111 | String text = lines.get(i).get(j).getText(); 112 | if (text.trim().equals("")) 113 | continue; 114 | fullText += text + " "; 115 | } 116 | fullText += '\n'; 117 | } 118 | 119 | System.out.println(fullText); 120 | 121 | // 转 BufferedImage 解决 Imgproc.putText 中文乱码问题 122 | Mat wrappedImage = (Mat) image.getWrappedImage(); 123 | BufferedImage bufferedImage = OpenCVUtils.mat2Image(wrappedImage); 124 | for (RotatedBox result : detections) { 125 | ImageUtils.drawImageRectWithText(bufferedImage, result.getBox(), result.getText()); 126 | } 127 | 128 | Mat image2Mat = OpenCVUtils.image2Mat(bufferedImage); 129 | image = OpenCVImageFactory.getInstance().fromImage(image2Mat); 130 | ImageUtils.saveImage(image, "ocr_result.png", "build/output"); 131 | 132 | wrappedImage.release(); 133 | image2Mat.release(); 134 | 135 | logger.info("{}", detections); 136 | } 137 | } 138 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/OcrV4RecTensorExample.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4; 2 | 3 | import java.nio.file.Path; 4 | import java.nio.file.Paths; 5 | 6 | import com.litongjava.ai.djl.paddle.ocr.v4.recognition.OcrV4Recognition; 7 | 8 | import ai.djl.inference.Predictor; 9 | import ai.djl.modality.cv.Image; 10 | import ai.djl.opencv.OpenCVImageFactory; 11 | import ai.djl.repository.zoo.ModelZoo; 12 | import ai.djl.repository.zoo.ZooModel; 13 | import lombok.Cleanup; 14 | 15 | public class OcrV4RecTensorExample { 16 | public static void main(String[] args) { 17 | Path imageFile = Paths.get("E:\\code\\python\\project-litongjava\\cyg-v2\\img.png"); 18 | Image image; 19 | try { 20 | image = OpenCVImageFactory.getInstance().fromFile(imageFile); 21 | OcrV4Recognition recognition = new OcrV4Recognition(); 22 | 23 | @Cleanup 24 | ZooModel recognitionModel = ModelZoo.loadModel(recognition.chRecCriteria()); 25 | 26 | @Cleanup 27 | Predictor newPredictor = recognitionModel.newPredictor(); 28 | String predict = newPredictor.predict(image); 29 | System.out.println("result:" + predict); 30 | } catch (Exception e) { 31 | e.printStackTrace(); 32 | } 33 | 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/PaddlePaddleOCRV4.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.File; 5 | import java.io.IOException; 6 | import java.net.URL; 7 | import java.nio.file.Path; 8 | import java.util.ArrayList; 9 | import java.util.Collections; 10 | import java.util.List; 11 | 12 | import com.litongjava.ai.djl.paddle.ocr.v4.common.RotatedBox; 13 | import com.litongjava.ai.djl.paddle.ocr.v4.common.RotatedBoxCompX; 14 | import com.litongjava.ai.djl.paddle.ocr.v4.detection.OcrV4Detection; 15 | import com.litongjava.ai.djl.paddle.ocr.v4.recognition.OcrV4Recognition; 16 | 17 | import ai.djl.MalformedModelException; 18 | import ai.djl.inference.Predictor; 19 | import ai.djl.modality.cv.Image; 20 | import ai.djl.modality.cv.ImageFactory; 21 | import ai.djl.ndarray.NDList; 22 | import ai.djl.ndarray.NDManager; 23 | import ai.djl.opencv.OpenCVImageFactory; 24 | import ai.djl.repository.zoo.ModelNotFoundException; 25 | import ai.djl.repository.zoo.ModelZoo; 26 | import ai.djl.repository.zoo.ZooModel; 27 | 28 | /** 29 | * Created by Tong Li on 11/23/2023_2:09 AM 30 | */ 31 | public enum PaddlePaddleOCRV4 { 32 | INSTANCE; 33 | 34 | private OcrV4Detection detection; 35 | private OcrV4Recognition recognition; 36 | private Predictor detector; 37 | private Predictor recognizer; 38 | private NDManager manager; 39 | 40 | PaddlePaddleOCRV4() { 41 | detection = new OcrV4Detection(); 42 | recognition = new OcrV4Recognition(); 43 | ZooModel detectionModel = null; 44 | ZooModel recognitionModel = null; 45 | try { 46 | detectionModel = ModelZoo.loadModel(detection.chDetCriteria()); 47 | recognitionModel = ModelZoo.loadModel(recognition.chRecCriteria()); 48 | } catch (IOException e) { 49 | e.printStackTrace(); 50 | } catch (ModelNotFoundException e) { 51 | e.printStackTrace(); 52 | } catch (MalformedModelException e) { 53 | e.printStackTrace(); 54 | } 55 | detector = detectionModel.newPredictor(); 56 | 57 | recognizer = recognitionModel.newPredictor(); 58 | manager = NDManager.newBaseManager(); 59 | } 60 | 61 | // noting not to do.but init 62 | public void init() { 63 | 64 | } 65 | 66 | public String ocr(String url) throws Exception { 67 | Image image = OpenCVImageFactory.getInstance().fromUrl(url); 68 | return ocr(image); 69 | } 70 | 71 | public String ocr(URL resource) throws Exception { 72 | Image image = OpenCVImageFactory.getInstance().fromUrl(resource); 73 | return ocr(image); 74 | } 75 | 76 | public String ocr(byte[] fileData) throws Exception { 77 | ByteArrayInputStream is = new ByteArrayInputStream(fileData); 78 | Image image = ImageFactory.getInstance().fromInputStream(is); 79 | return ocr(image); 80 | } 81 | 82 | public String ocr(File imageFile) throws Exception { 83 | Path path = imageFile.toPath(); 84 | Image image = OpenCVImageFactory.getInstance().fromFile(path); 85 | return ocr(image); 86 | } 87 | 88 | public String ocr(Image image) throws Exception { 89 | List detections = recognition.predict(manager, image, detector, recognizer); 90 | if (detections == null) { 91 | return null; 92 | } 93 | 94 | List initList = new ArrayList<>(); 95 | for (RotatedBox result : detections) { 96 | // put low Y value at the head of the queue. 97 | initList.add(result); 98 | } 99 | Collections.sort(initList); 100 | 101 | List> lines = new ArrayList<>(); 102 | List line = new ArrayList<>(); 103 | RotatedBoxCompX firstBox = new RotatedBoxCompX(initList.get(0).getBox(), initList.get(0).getText()); 104 | line.add(firstBox); 105 | lines.add((ArrayList) line); 106 | for (int i = 1; i < initList.size(); i++) { 107 | RotatedBoxCompX tmpBox = new RotatedBoxCompX(initList.get(i).getBox(), initList.get(i).getText()); 108 | float y1 = firstBox.getBox().toFloatArray()[1]; 109 | float y2 = tmpBox.getBox().toFloatArray()[1]; 110 | float dis = Math.abs(y2 - y1); 111 | if (dis < 20) { // 认为是同 1 行 - Considered to be in the same line 112 | line.add(tmpBox); 113 | } else { // 换行 - Line break 114 | firstBox = tmpBox; 115 | Collections.sort(line); 116 | line = new ArrayList<>(); 117 | line.add(firstBox); 118 | lines.add((ArrayList) line); 119 | } 120 | } 121 | 122 | StringBuffer fullText = new StringBuffer(); 123 | for (int i = 0; i < lines.size(); i++) { 124 | for (int j = 0; j < lines.get(i).size(); j++) { 125 | String text = lines.get(i).get(j).getText(); 126 | if (text.trim().equals("")) 127 | continue; 128 | fullText.append(text + " "); 129 | } 130 | fullText.append('\n'); 131 | } 132 | return fullText.toString(); 133 | } 134 | 135 | public void close() { 136 | detector.close(); 137 | recognizer.close(); 138 | } 139 | 140 | } 141 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/common/ImageUtils.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.common; 2 | 3 | import java.awt.BasicStroke; 4 | import java.awt.Color; 5 | import java.awt.Font; 6 | import java.awt.Graphics; 7 | import java.awt.Graphics2D; 8 | import java.awt.image.BufferedImage; 9 | import java.io.IOException; 10 | import java.nio.file.Files; 11 | import java.nio.file.Path; 12 | import java.nio.file.Paths; 13 | import java.util.ArrayList; 14 | import java.util.List; 15 | 16 | import org.opencv.core.Mat; 17 | import org.opencv.core.Point; 18 | import org.opencv.core.Scalar; 19 | import org.opencv.imgproc.Imgproc; 20 | 21 | import ai.djl.modality.cv.Image; 22 | import ai.djl.modality.cv.ImageFactory; 23 | import ai.djl.modality.cv.output.DetectedObjects; 24 | import ai.djl.ndarray.NDArray; 25 | 26 | /** 27 | * 图像工具类 28 | */ 29 | public class ImageUtils { 30 | 31 | /** 32 | * 保存BufferedImage图片 33 | * 34 | * @param img 35 | * @param name 36 | * @param path 37 | */ 38 | public static void saveImage(BufferedImage img, String name, String path) { 39 | Image djlImg = ImageFactory.getInstance().fromImage(img); // 支持多种图片格式,自动适配 40 | Path outputDir = Paths.get(path); 41 | Path imagePath = outputDir.resolve(name); 42 | // OpenJDK 不能保存 jpg 图片的 alpha channel 43 | try { 44 | djlImg.save(Files.newOutputStream(imagePath), "png"); 45 | } catch (IOException e) { 46 | e.printStackTrace(); 47 | } 48 | } 49 | 50 | /** 51 | * 保存DJL图片 52 | * 53 | * @param img 54 | * @param name 55 | * @param path 56 | */ 57 | public static void saveImage(Image img, String name, String path) { 58 | Path outputDir = Paths.get(path); 59 | if (!Files.exists(outputDir)) { 60 | try { 61 | Files.createDirectories(outputDir); 62 | } catch (IOException e) { 63 | e.printStackTrace(); 64 | } 65 | } 66 | Path imagePath = outputDir.resolve(name); 67 | // OpenJDK 不能保存 jpg 图片的 alpha channel 68 | try { 69 | img.save(Files.newOutputStream(imagePath), "png"); 70 | } catch (IOException e) { 71 | e.printStackTrace(); 72 | } 73 | } 74 | 75 | /** 76 | * 保存图片,含检测框 77 | * 78 | * @param img 79 | * @param detection 80 | * @param name 81 | * @param path 82 | * @throws IOException 83 | */ 84 | public static void saveBoundingBoxImage(Image img, DetectedObjects detection, String name, String path) 85 | throws IOException { 86 | // Make image copy with alpha channel because original image was jpg 87 | img.drawBoundingBoxes(detection); 88 | Path outputDir = Paths.get(path); 89 | Files.createDirectories(outputDir); 90 | Path imagePath = outputDir.resolve(name); 91 | // OpenJDK can't save jpg with alpha channel 92 | img.save(Files.newOutputStream(imagePath), "png"); 93 | } 94 | 95 | /** 96 | * 画矩形 97 | * 98 | * @param mat 99 | * @param box 100 | */ 101 | public static void drawRect(Mat mat, NDArray box) { 102 | 103 | float[] points = box.toFloatArray(); 104 | List list = new ArrayList<>(); 105 | 106 | for (int i = 0; i < 4; i++) { 107 | Point point = new Point((int) points[2 * i], (int) points[2 * i + 1]); 108 | list.add(point); 109 | } 110 | 111 | Imgproc.line(mat, list.get(0), list.get(1), new Scalar(0, 255, 0), 1); 112 | Imgproc.line(mat, list.get(1), list.get(2), new Scalar(0, 255, 0), 1); 113 | Imgproc.line(mat, list.get(2), list.get(3), new Scalar(0, 255, 0), 1); 114 | Imgproc.line(mat, list.get(3), list.get(0), new Scalar(0, 255, 0), 1); 115 | } 116 | 117 | /** 118 | * 画矩形 119 | * 120 | * @param mat 121 | * @param box 122 | * @param text 123 | */ 124 | public static void drawRectWithText(Mat mat, NDArray box, String text) { 125 | 126 | float[] points = box.toFloatArray(); 127 | List list = new ArrayList<>(); 128 | 129 | for (int i = 0; i < 4; i++) { 130 | Point point = new Point((int) points[2 * i], (int) points[2 * i + 1]); 131 | list.add(point); 132 | } 133 | 134 | Imgproc.line(mat, list.get(0), list.get(1), new Scalar(0, 255, 0), 1); 135 | Imgproc.line(mat, list.get(1), list.get(2), new Scalar(0, 255, 0), 1); 136 | Imgproc.line(mat, list.get(2), list.get(3), new Scalar(0, 255, 0), 1); 137 | Imgproc.line(mat, list.get(3), list.get(0), new Scalar(0, 255, 0), 1); 138 | // 中文乱码 139 | Imgproc.putText(mat, text, list.get(0), Imgproc.FONT_HERSHEY_SCRIPT_SIMPLEX, 1.0, new Scalar(0, 255, 0), 1); 140 | } 141 | 142 | /** 143 | * 画检测框(有倾斜角) 144 | * 145 | * @param image 146 | * @param box 147 | */ 148 | public static void drawImageRect(BufferedImage image, NDArray box) { 149 | float[] points = box.toFloatArray(); 150 | int[] xPoints = new int[5]; 151 | int[] yPoints = new int[5]; 152 | 153 | for (int i = 0; i < 4; i++) { 154 | xPoints[i] = (int) points[2 * i]; 155 | yPoints[i] = (int) points[2 * i + 1]; 156 | } 157 | xPoints[4] = xPoints[0]; 158 | yPoints[4] = yPoints[0]; 159 | 160 | // 将绘制图像转换为Graphics2D 161 | Graphics2D g = (Graphics2D) image.getGraphics(); 162 | try { 163 | g.setColor(new Color(0, 255, 0)); 164 | // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角 165 | BasicStroke bStroke = new BasicStroke(4, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER); 166 | g.setStroke(bStroke); 167 | g.drawPolyline(xPoints, yPoints, 5); // xPoints, yPoints, nPoints 168 | } finally { 169 | g.dispose(); 170 | } 171 | } 172 | 173 | /** 174 | * 画检测框(有倾斜角)和文本 175 | * 176 | * @param image 177 | * @param box 178 | * @param text 179 | */ 180 | public static void drawImageRectWithText(BufferedImage image, NDArray box, String text) { 181 | float[] points = box.toFloatArray(); 182 | int[] xPoints = new int[5]; 183 | int[] yPoints = new int[5]; 184 | 185 | for (int i = 0; i < 4; i++) { 186 | xPoints[i] = (int) points[2 * i]; 187 | yPoints[i] = (int) points[2 * i + 1]; 188 | } 189 | xPoints[4] = xPoints[0]; 190 | yPoints[4] = yPoints[0]; 191 | 192 | // 将绘制图像转换为Graphics2D 193 | Graphics2D g = (Graphics2D) image.getGraphics(); 194 | try { 195 | int fontSize = 32; 196 | Font font = new Font("楷体", Font.PLAIN, fontSize); 197 | g.setFont(font); 198 | g.setColor(new Color(0, 0, 255)); 199 | // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角 200 | BasicStroke bStroke = new BasicStroke(2, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER); 201 | g.setStroke(bStroke); 202 | g.drawPolyline(xPoints, yPoints, 5); // xPoints, yPoints, nPoints 203 | g.drawString(text, xPoints[0], yPoints[0]); 204 | } finally { 205 | g.dispose(); 206 | } 207 | } 208 | 209 | /** 210 | * 画检测框 211 | * 212 | * @param image 213 | * @param x 214 | * @param y 215 | * @param width 216 | * @param height 217 | */ 218 | public static void drawImageRect(BufferedImage image, int x, int y, int width, int height) { 219 | // 将绘制图像转换为Graphics2D 220 | Graphics2D g = (Graphics2D) image.getGraphics(); 221 | try { 222 | g.setColor(new Color(0, 255, 0)); 223 | // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角 224 | BasicStroke bStroke = new BasicStroke(2, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER); 225 | g.setStroke(bStroke); 226 | g.drawRect(x, y, width, height); 227 | } finally { 228 | g.dispose(); 229 | } 230 | } 231 | 232 | /** 233 | * 显示文字 234 | * 235 | * @param image 236 | * @param text 237 | * @param x 238 | * @param y 239 | */ 240 | public static void drawImageText(BufferedImage image, String text, int x, int y) { 241 | Graphics graphics = image.getGraphics(); 242 | int fontSize = 32; 243 | Font font = new Font("楷体", Font.PLAIN, fontSize); 244 | try { 245 | graphics.setFont(font); 246 | graphics.setColor(new Color(0, 0, 255)); 247 | //int strWidth = graphics.getFontMetrics().stringWidth(text); 248 | graphics.drawString(text, x, y); 249 | } finally { 250 | graphics.dispose(); 251 | } 252 | } 253 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/common/RotatedBox.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.common; 2 | 3 | import ai.djl.ndarray.NDArray; 4 | 5 | /** 6 | * 旋转检测框 7 | */ 8 | public class RotatedBox implements Comparable { 9 | private NDArray box; 10 | private String text; 11 | 12 | public RotatedBox(NDArray box, String text) { 13 | this.box = box; 14 | this.text = text; 15 | } 16 | 17 | /** 18 | * 将左上角 Y 坐标升序排序 19 | * 20 | * @param o 21 | * @return 22 | */ 23 | @Override 24 | public int compareTo(RotatedBox o) { 25 | NDArray lowBox = this.getBox(); 26 | NDArray highBox = o.getBox(); 27 | float lowY = lowBox.toFloatArray()[1]; 28 | float highY = highBox.toFloatArray()[1]; 29 | return (lowY < highY) ? -1 : 1; 30 | } 31 | 32 | public NDArray getBox() { 33 | return box; 34 | } 35 | 36 | public void setBox(NDArray box) { 37 | this.box = box; 38 | } 39 | 40 | public String getText() { 41 | return text; 42 | } 43 | 44 | public void setText(String text) { 45 | this.text = text; 46 | } 47 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/common/RotatedBoxCompX.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.common; 2 | 3 | import ai.djl.ndarray.NDArray; 4 | 5 | /** 6 | */ 7 | public class RotatedBoxCompX implements Comparable { 8 | private NDArray box; 9 | private String text; 10 | 11 | public RotatedBoxCompX(NDArray box, String text) { 12 | this.box = box; 13 | this.text = text; 14 | } 15 | 16 | /** 17 | * 将左上角 X 坐标升序排序 18 | * 19 | * @param o 20 | * @return 21 | */ 22 | @Override 23 | public int compareTo(RotatedBoxCompX o) { 24 | NDArray leftBox = this.getBox(); 25 | NDArray rightBox = o.getBox(); 26 | float leftX = leftBox.toFloatArray()[0]; 27 | float rightX = rightBox.toFloatArray()[0]; 28 | return (leftX < rightX) ? -1 : 1; 29 | } 30 | 31 | public NDArray getBox() { 32 | return box; 33 | } 34 | 35 | public void setBox(NDArray box) { 36 | this.box = box; 37 | } 38 | 39 | public String getText() { 40 | return text; 41 | } 42 | 43 | public void setText(String text) { 44 | this.text = text; 45 | } 46 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/detection/OCRDetectionTranslator.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.detection; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | import org.opencv.core.Core; 8 | import org.opencv.core.CvType; 9 | import org.opencv.core.Mat; 10 | import org.opencv.core.MatOfPoint; 11 | import org.opencv.core.MatOfPoint2f; 12 | import org.opencv.core.RotatedRect; 13 | import org.opencv.core.Scalar; 14 | import org.opencv.imgproc.Imgproc; 15 | 16 | import com.litongjava.ai.djl.paddle.ocr.v4.opencv.NDArrayUtils; 17 | 18 | import ai.djl.modality.cv.Image; 19 | import ai.djl.modality.cv.util.NDImageUtils; 20 | import ai.djl.ndarray.NDArray; 21 | import ai.djl.ndarray.NDArrays; 22 | import ai.djl.ndarray.NDList; 23 | import ai.djl.ndarray.NDManager; 24 | import ai.djl.ndarray.index.NDIndex; 25 | import ai.djl.ndarray.types.DataType; 26 | import ai.djl.ndarray.types.Shape; 27 | import ai.djl.translate.Batchifier; 28 | import ai.djl.translate.Translator; 29 | import ai.djl.translate.TranslatorContext; 30 | 31 | /** 32 | * 文字检测前后处理 33 | */ 34 | public class OCRDetectionTranslator implements Translator { 35 | // det_algorithm == "DB" 36 | private final float thresh = 0.3f; 37 | private final boolean use_dilation = false; 38 | private final String score_mode = "fast"; 39 | private final String box_type = "quad"; 40 | 41 | private final int limit_side_len; 42 | private final int max_candidates; 43 | private final int min_size; 44 | private final float box_thresh; 45 | private final float unclip_ratio; 46 | private float ratio_h; 47 | private float ratio_w; 48 | private int img_height; 49 | private int img_width; 50 | 51 | public OCRDetectionTranslator(Map arguments) { 52 | limit_side_len = 53 | arguments.containsKey("limit_side_len") 54 | ? Integer.parseInt(arguments.get("limit_side_len").toString()) 55 | : 960; 56 | max_candidates = 57 | arguments.containsKey("max_candidates") 58 | ? Integer.parseInt(arguments.get("max_candidates").toString()) 59 | : 1000; 60 | min_size = 61 | arguments.containsKey("min_size") 62 | ? Integer.parseInt(arguments.get("min_size").toString()) 63 | : 3; 64 | box_thresh = 65 | arguments.containsKey("box_thresh") 66 | ? Float.parseFloat(arguments.get("box_thresh").toString()) 67 | : 0.6f; // 0.5f 68 | unclip_ratio = 69 | arguments.containsKey("unclip_ratio") 70 | ? Float.parseFloat(arguments.get("unclip_ratio").toString()) 71 | : 1.6f; 72 | } 73 | 74 | @Override 75 | public NDList processOutput(TranslatorContext ctx, NDList list) { 76 | NDManager manager = ctx.getNDManager(); 77 | NDArray pred = list.get(0); 78 | pred = pred.squeeze(); 79 | NDArray segmentation = pred.gt(thresh); // thresh=0.3 .mul(255f) 80 | 81 | segmentation = segmentation.toType(DataType.UINT8, true); 82 | Shape shape = segmentation.getShape(); 83 | int rows = (int) shape.get(0); 84 | int cols = (int) shape.get(1); 85 | 86 | Mat newMask = new Mat(); 87 | if (this.use_dilation) { 88 | Mat mask = new Mat(); 89 | //convert from NDArray to Mat 90 | Mat srcMat = NDArrayUtils.uint8NDArrayToMat(segmentation); 91 | // size 越小,腐蚀的单位越小,图片越接近原图 92 | // Mat dilation_kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(2, 2)); 93 | Mat dilation_kernel = NDArrayUtils.uint8ArrayToMat(new byte[][]{{1, 1}, {1, 1}}); 94 | /** 95 | * 膨胀说明: 图像的一部分区域与指定的核进行卷积, 求核的最`大`值并赋值给指定区域。 膨胀可以理解为图像中`高亮区域`的'领域扩大'。 96 | * 意思是高亮部分会侵蚀不是高亮的部分,使高亮部分越来越多。 97 | */ 98 | Imgproc.dilate(srcMat, mask, dilation_kernel); 99 | //destination Matrix 100 | Scalar scalar = new Scalar(255); 101 | Core.multiply(mask, scalar, newMask); 102 | // release Mat 103 | mask.release(); 104 | srcMat.release(); 105 | dilation_kernel.release(); 106 | } else { 107 | Mat srcMat = NDArrayUtils.uint8NDArrayToMat(segmentation); 108 | //destination Matrix 109 | Scalar scalar = new Scalar(255); 110 | Core.multiply(srcMat, scalar, newMask); 111 | // release Mat 112 | srcMat.release(); 113 | } 114 | 115 | NDList dt_boxes = null; 116 | NDArray boxes = boxes_from_bitmap(manager, pred, newMask); 117 | if (boxes != null) { 118 | //boxes[:, :, 0] = boxes[:, :, 0] / ratio_w 119 | NDArray boxes1 = boxes.get(":, :, 0").div(ratio_w); 120 | boxes.set(new NDIndex(":, :, 0"), boxes1); 121 | //boxes[:, :, 1] = boxes[:, :, 1] / ratio_h 122 | NDArray boxes2 = boxes.get(":, :, 1").div(ratio_h); 123 | boxes.set(new NDIndex(":, :, 1"), boxes2); 124 | 125 | dt_boxes = this.filter_tag_det_res(boxes); 126 | 127 | dt_boxes.detach(); 128 | } 129 | 130 | // release Mat 131 | newMask.release(); 132 | 133 | return dt_boxes; 134 | } 135 | 136 | 137 | private NDList filter_tag_det_res(NDArray dt_boxes) { 138 | NDList boxesList = new NDList(); 139 | 140 | int num = (int) dt_boxes.getShape().get(0); 141 | for (int i = 0; i < num; i++) { 142 | NDArray box = dt_boxes.get(i); 143 | box = order_points_clockwise(box); 144 | box = clip_det_res(box); 145 | float[] box0 = box.get(0).toFloatArray(); 146 | float[] box1 = box.get(1).toFloatArray(); 147 | float[] box3 = box.get(3).toFloatArray(); 148 | int rect_width = (int) Math.sqrt(Math.pow(box1[0] - box0[0], 2) + Math.pow(box1[1] - box0[1], 2)); 149 | int rect_height = (int) Math.sqrt(Math.pow(box3[0] - box0[0], 2) + Math.pow(box3[1] - box0[1], 2)); 150 | if (rect_width <= 3 || rect_height <= 3) 151 | continue; 152 | boxesList.add(box); 153 | } 154 | 155 | return boxesList; 156 | } 157 | 158 | private NDArray clip_det_res(NDArray points) { 159 | for (int i = 0; i < points.getShape().get(0); i++) { 160 | int value = Math.max((int) points.get(i, 0).toFloatArray()[0], 0); 161 | value = Math.min(value, img_width - 1); 162 | points.set(new NDIndex(i + ",0"), value); 163 | value = Math.max((int) points.get(i, 1).toFloatArray()[0], 0); 164 | value = Math.min(value, img_height - 1); 165 | points.set(new NDIndex(i + ",1"), value); 166 | } 167 | 168 | return points; 169 | } 170 | 171 | /** 172 | * sort the points based on their x-coordinates 173 | * 顺时针 174 | * 175 | * @param pts 176 | * @return 177 | */ 178 | 179 | private NDArray order_points_clockwise(NDArray pts) { 180 | NDList list = new NDList(); 181 | long[] indexes = pts.get(":, 0").argSort().toLongArray(); 182 | 183 | // grab the left-most and right-most points from the sorted 184 | // x-roodinate points 185 | Shape s1 = pts.getShape(); 186 | NDArray leftMost1 = pts.get(indexes[0] + ",:"); 187 | NDArray leftMost2 = pts.get(indexes[1] + ",:"); 188 | NDArray leftMost = leftMost1.concat(leftMost2).reshape(2, 2); 189 | NDArray rightMost1 = pts.get(indexes[2] + ",:"); 190 | NDArray rightMost2 = pts.get(indexes[3] + ",:"); 191 | NDArray rightMost = rightMost1.concat(rightMost2).reshape(2, 2); 192 | 193 | // now, sort the left-most coordinates according to their 194 | // y-coordinates so we can grab the top-left and bottom-left 195 | // points, respectively 196 | indexes = leftMost.get(":, 1").argSort().toLongArray(); 197 | NDArray lt = leftMost.get(indexes[0] + ",:"); 198 | NDArray lb = leftMost.get(indexes[1] + ",:"); 199 | indexes = rightMost.get(":, 1").argSort().toLongArray(); 200 | NDArray rt = rightMost.get(indexes[0] + ",:"); 201 | NDArray rb = rightMost.get(indexes[1] + ",:"); 202 | 203 | list.add(lt); 204 | list.add(rt); 205 | list.add(rb); 206 | list.add(lb); 207 | 208 | NDArray rect = NDArrays.concat(list).reshape(4, 2); 209 | return rect; 210 | } 211 | 212 | /** 213 | * Get boxes from the binarized image predicted by DB 214 | * 215 | * @param manager 216 | * @param pred the binarized image predicted by DB. 217 | * @param bitmap new 'pred' after threshold filtering. 218 | */ 219 | private NDArray boxes_from_bitmap(NDManager manager, NDArray pred, Mat bitmap) { 220 | int dest_height = (int) pred.getShape().get(0); 221 | int dest_width = (int) pred.getShape().get(1); 222 | int height = bitmap.rows(); 223 | int width = bitmap.cols(); 224 | 225 | List contours = new ArrayList<>(); 226 | Mat hierarchy = new Mat(); 227 | // 寻找轮廓 228 | Imgproc.findContours( 229 | bitmap, 230 | contours, 231 | hierarchy, 232 | Imgproc.RETR_LIST, 233 | Imgproc.CHAIN_APPROX_SIMPLE); 234 | 235 | int num_contours = Math.min(contours.size(), max_candidates); 236 | NDList boxList = new NDList(); 237 | float[] scores = new float[num_contours]; 238 | 239 | for (int index = 0; index < num_contours; index++) { 240 | MatOfPoint contour = contours.get(index); 241 | MatOfPoint2f newContour = new MatOfPoint2f(contour.toArray()); 242 | float[][] pointsArr = new float[4][2]; 243 | int sside = get_mini_boxes(newContour, pointsArr); 244 | if (sside < this.min_size) 245 | continue; 246 | NDArray points = manager.create(pointsArr); 247 | float score = box_score_fast(manager, pred, points); 248 | if (score < this.box_thresh) 249 | continue; 250 | 251 | NDArray box = unclip(manager, points); // TODO get_mini_boxes(box) 252 | 253 | // box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width) 254 | NDArray boxes1 = box.get(":,0").div(width).mul(dest_width).round().clip(0, dest_width); 255 | box.set(new NDIndex(":, 0"), boxes1); 256 | // box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height) 257 | NDArray boxes2 = box.get(":,1").div(height).mul(dest_height).round().clip(0, dest_height); 258 | box.set(new NDIndex(":, 1"), boxes2); 259 | 260 | boxList.add(box); 261 | scores[index] = score; 262 | 263 | // release memory 264 | contour.release(); 265 | newContour.release(); 266 | } 267 | 268 | // release 269 | hierarchy.release(); 270 | 271 | NDArray boxes = null; 272 | if (boxList.size() > 0) { 273 | boxes = NDArrays.stack(boxList); 274 | return boxes; 275 | } 276 | 277 | return boxes; 278 | 279 | 280 | } 281 | 282 | /** 283 | * Shrink or expand the boxaccording to 'unclip_ratio' 284 | * 285 | * @param points The predicted box. 286 | * @return uncliped box 287 | */ 288 | private NDArray unclip(NDManager manager, NDArray points) { 289 | points = order_points_clockwise(points); 290 | float[] pointsArr = points.toFloatArray(); 291 | float[] lt = java.util.Arrays.copyOfRange(pointsArr, 0, 2); 292 | float[] lb = java.util.Arrays.copyOfRange(pointsArr, 6, 8); 293 | 294 | float[] rt = java.util.Arrays.copyOfRange(pointsArr, 2, 4); 295 | float[] rb = java.util.Arrays.copyOfRange(pointsArr, 4, 6); 296 | 297 | float width = distance(lt, rt); 298 | float height = distance(lt, lb); 299 | 300 | if (width > height) { 301 | float k = (lt[1] - rt[1]) / (lt[0] - rt[0]); // y = k * x + b 302 | 303 | float delta_dis = height; 304 | float delta_x = (float) Math.sqrt((delta_dis * delta_dis) / (k * k + 1)); 305 | float delta_y = Math.abs(k * delta_x); 306 | 307 | if (k > 0) { 308 | pointsArr[0] = lt[0] - delta_x + delta_y; 309 | pointsArr[1] = lt[1] - delta_y - delta_x; 310 | pointsArr[2] = rt[0] + delta_x + delta_y; 311 | pointsArr[3] = rt[1] + delta_y - delta_x; 312 | 313 | pointsArr[4] = rb[0] + delta_x - delta_y; 314 | pointsArr[5] = rb[1] + delta_y + delta_x; 315 | pointsArr[6] = lb[0] - delta_x - delta_y; 316 | pointsArr[7] = lb[1] - delta_y + delta_x; 317 | } else { 318 | pointsArr[0] = lt[0] - delta_x - delta_y; 319 | pointsArr[1] = lt[1] + delta_y - delta_x; 320 | pointsArr[2] = rt[0] + delta_x - delta_y; 321 | pointsArr[3] = rt[1] - delta_y - delta_x; 322 | 323 | pointsArr[4] = rb[0] + delta_x + delta_y; 324 | pointsArr[5] = rb[1] - delta_y + delta_x; 325 | pointsArr[6] = lb[0] - delta_x + delta_y; 326 | pointsArr[7] = lb[1] + delta_y + delta_x; 327 | } 328 | } else { 329 | float k = (lt[1] - rt[1]) / (lt[0] - rt[0]); // y = k * x + b 330 | 331 | float delta_dis = width; 332 | float delta_y = (float) Math.sqrt((delta_dis * delta_dis) / (k * k + 1)); 333 | float delta_x = Math.abs(k * delta_y); 334 | 335 | if (k > 0) { 336 | pointsArr[0] = lt[0] + delta_x - delta_y; 337 | pointsArr[1] = lt[1] - delta_y - delta_x; 338 | pointsArr[2] = rt[0] + delta_x + delta_y; 339 | pointsArr[3] = rt[1] - delta_y + delta_x; 340 | 341 | pointsArr[4] = rb[0] - delta_x + delta_y; 342 | pointsArr[5] = rb[1] + delta_y + delta_x; 343 | pointsArr[6] = lb[0] - delta_x - delta_y; 344 | pointsArr[7] = lb[1] + delta_y - delta_x; 345 | } else { 346 | pointsArr[0] = lt[0] - delta_x - delta_y; 347 | pointsArr[1] = lt[1] - delta_y + delta_x; 348 | pointsArr[2] = rt[0] - delta_x + delta_y; 349 | pointsArr[3] = rt[1] - delta_y - delta_x; 350 | 351 | pointsArr[4] = rb[0] + delta_x + delta_y; 352 | pointsArr[5] = rb[1] + delta_y - delta_x; 353 | pointsArr[6] = lb[0] + delta_x - delta_y; 354 | pointsArr[7] = lb[1] + delta_y + delta_x; 355 | } 356 | } 357 | points = manager.create(pointsArr).reshape(4, 2); 358 | 359 | return points; 360 | } 361 | 362 | private float distance(float[] point1, float[] point2) { 363 | float disX = point1[0] - point2[0]; 364 | float disY = point1[1] - point2[1]; 365 | float dis = (float) Math.sqrt(disX * disX + disY * disY); 366 | return dis; 367 | } 368 | 369 | /** 370 | * Get boxes from the contour or box. 371 | * 372 | * @param contour The predicted contour. 373 | * @param pointsArr The predicted box. 374 | * @return smaller side of box 375 | */ 376 | private int get_mini_boxes(MatOfPoint2f contour, float[][] pointsArr) { 377 | // https://blog.csdn.net/qq_37385726/article/details/82313558 378 | // bounding_box[1] - rect 返回矩形的长和宽 379 | RotatedRect rect = Imgproc.minAreaRect(contour); 380 | Mat points = new Mat(); 381 | Imgproc.boxPoints(rect, points); 382 | 383 | float[][] fourPoints = new float[4][2]; 384 | for (int row = 0; row < 4; row++) { 385 | fourPoints[row][0] = (float) points.get(row, 0)[0]; 386 | fourPoints[row][1] = (float) points.get(row, 1)[0]; 387 | } 388 | 389 | float[] tmpPoint = new float[2]; 390 | for (int i = 0; i < 4; i++) { 391 | for (int j = i + 1; j < 4; j++) { 392 | if (fourPoints[j][0] < fourPoints[i][0]) { 393 | tmpPoint[0] = fourPoints[i][0]; 394 | tmpPoint[1] = fourPoints[i][1]; 395 | fourPoints[i][0] = fourPoints[j][0]; 396 | fourPoints[i][1] = fourPoints[j][1]; 397 | fourPoints[j][0] = tmpPoint[0]; 398 | fourPoints[j][1] = tmpPoint[1]; 399 | } 400 | } 401 | } 402 | 403 | int index_1 = 0; 404 | int index_2 = 1; 405 | int index_3 = 2; 406 | int index_4 = 3; 407 | 408 | if (fourPoints[1][1] > fourPoints[0][1]) { 409 | index_1 = 0; 410 | index_4 = 1; 411 | } else { 412 | index_1 = 1; 413 | index_4 = 0; 414 | } 415 | 416 | if (fourPoints[3][1] > fourPoints[2][1]) { 417 | index_2 = 2; 418 | index_3 = 3; 419 | } else { 420 | index_2 = 3; 421 | index_3 = 2; 422 | } 423 | 424 | pointsArr[0] = fourPoints[index_1]; 425 | pointsArr[1] = fourPoints[index_2]; 426 | pointsArr[2] = fourPoints[index_3]; 427 | pointsArr[3] = fourPoints[index_4]; 428 | 429 | int height = rect.boundingRect().height; 430 | int width = rect.boundingRect().width; 431 | int sside = Math.min(height, width); 432 | 433 | // release 434 | points.release(); 435 | 436 | return sside; 437 | } 438 | 439 | /** 440 | * Calculate the score of box. 441 | * 442 | * @param bitmap The binarized image predicted by DB. 443 | * @param points The predicted box 444 | * @return 445 | */ 446 | private float box_score_fast(NDManager manager, NDArray bitmap, NDArray points) { 447 | NDArray box = points.get(":"); 448 | long h = bitmap.getShape().get(0); 449 | long w = bitmap.getShape().get(1); 450 | // xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) 451 | int xmin = box.get(":, 0").min().floor().clip(0, w - 1).toType(DataType.INT32, true).toIntArray()[0]; 452 | int xmax = box.get(":, 0").max().ceil().clip(0, w - 1).toType(DataType.INT32, true).toIntArray()[0]; 453 | int ymin = box.get(":, 1").min().floor().clip(0, h - 1).toType(DataType.INT32, true).toIntArray()[0]; 454 | int ymax = box.get(":, 1").max().ceil().clip(0, h - 1).toType(DataType.INT32, true).toIntArray()[0]; 455 | 456 | NDArray mask = manager.zeros(new Shape(ymax - ymin + 1, xmax - xmin + 1), DataType.UINT8); 457 | 458 | box.set(new NDIndex(":, 0"), box.get(":, 0").sub(xmin)); 459 | box.set(new NDIndex(":, 1"), box.get(":, 1").sub(ymin)); 460 | 461 | //mask - convert from NDArray to Mat 462 | Mat maskMat = NDArrayUtils.uint8NDArrayToMat(mask); 463 | 464 | //mask - convert from NDArray to Mat - 4 rows, 2 cols 465 | Mat boxMat = NDArrayUtils.floatNDArrayToMat(box, CvType.CV_32S); 466 | 467 | // boxMat.reshape(1, new int[]{1, 4, 2}); 468 | List pts = new ArrayList<>(); 469 | MatOfPoint matOfPoint = NDArrayUtils.matToMatOfPoint(boxMat); // new MatOfPoint(boxMat); 470 | pts.add(matOfPoint); 471 | Imgproc.fillPoly(maskMat, pts, new Scalar(1)); 472 | 473 | 474 | NDArray subBitMap = bitmap.get(ymin + ":" + (ymax + 1) + "," + xmin + ":" + (xmax + 1)); 475 | Mat bitMapMat = NDArrayUtils.floatNDArrayToMat(subBitMap); 476 | 477 | Scalar score = Core.mean(bitMapMat, maskMat); 478 | float scoreValue = (float) score.val[0]; 479 | // release 480 | maskMat.release(); 481 | boxMat.release(); 482 | bitMapMat.release(); 483 | 484 | return scoreValue; 485 | } 486 | 487 | @Override 488 | public NDList processInput(TranslatorContext ctx, Image input) { 489 | NDArray img = input.toNDArray(ctx.getNDManager()); 490 | int h = input.getHeight(); 491 | int w = input.getWidth(); 492 | img_height = h; 493 | img_width = w; 494 | 495 | // limit the max side 496 | float ratio = 1.0f; 497 | if (Math.max(h, w) > limit_side_len) { 498 | if (h > w) { 499 | ratio = (float) limit_side_len / (float) h; 500 | } else { 501 | ratio = (float) limit_side_len / (float) w; 502 | } 503 | } 504 | 505 | int resize_h = (int) (h * ratio); 506 | int resize_w = (int) (w * ratio); 507 | 508 | resize_h = Math.round((float) resize_h / 32f) * 32; 509 | resize_w = Math.round((float) resize_w / 32f) * 32; 510 | 511 | ratio_h = resize_h / (float) h; 512 | ratio_w = resize_w / (float) w; 513 | 514 | img = NDImageUtils.resize(img, resize_w, resize_h); 515 | 516 | img = NDImageUtils.toTensor(img); 517 | 518 | img = 519 | NDImageUtils.normalize( 520 | img, 521 | new float[]{0.485f, 0.456f, 0.406f}, 522 | new float[]{0.229f, 0.224f, 0.225f}); 523 | 524 | img = img.expandDims(0); 525 | 526 | return new NDList(img); 527 | } 528 | 529 | @Override 530 | public Batchifier getBatchifier() { 531 | return null; 532 | } 533 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/detection/OcrV4Detection.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.detection; 2 | 3 | import java.net.URL; 4 | import java.nio.file.Path; 5 | import java.nio.file.Paths; 6 | import java.util.concurrent.ConcurrentHashMap; 7 | 8 | import ai.djl.Device; 9 | import ai.djl.modality.cv.Image; 10 | import ai.djl.ndarray.NDList; 11 | import ai.djl.repository.zoo.Criteria; 12 | import ai.djl.repository.zoo.Criteria.Builder; 13 | import ai.djl.training.util.ProgressBar; 14 | import cn.hutool.core.io.resource.ResourceUtil; 15 | 16 | /** 17 | * 文字检测 18 | */ 19 | public final class OcrV4Detection { 20 | /** 21 | * 中文文本检测 22 | * 23 | * @return 24 | */ 25 | public Criteria chDetCriteria() { 26 | URL resource = ResourceUtil.getResource("models/ch_PP-OCRv4_det_infer.zip"); 27 | System.out.println("resource:" + resource); 28 | Path modelPath = null; 29 | try { 30 | modelPath = Paths.get(resource.toURI()); 31 | } catch (Exception e) { 32 | System.err.println(e.getMessage()); 33 | } 34 | 35 | Device device = Device.gpu(); 36 | Builder builder = Criteria.builder() 37 | //engine 38 | .optEngine("OnnxRuntime") 39 | //.optEngine("PyTorch") 40 | 41 | // .optModelName("inference") 42 | .setTypes(Image.class, NDList.class) 43 | .optDevice(device) 44 | .optTranslator(new OCRDetectionTranslator(new ConcurrentHashMap())) 45 | .optProgress(new ProgressBar()); 46 | 47 | if (modelPath != null) { 48 | System.out.println("load from file"); 49 | builder.optModelPath(modelPath).optModelName("ch_PP-OCRv4_det_infer"); 50 | } else { 51 | System.out.println("load from jar"); 52 | builder.optModelUrls("jar:///models/ch_PP-OCRv4_det_infer.zip"); 53 | } 54 | Criteria criteria = builder.build(); 55 | return criteria; 56 | } 57 | 58 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/opencv/NDArrayUtils.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.opencv; 2 | 3 | import ai.djl.ndarray.NDArray; 4 | import org.opencv.core.CvType; 5 | import org.opencv.core.Mat; 6 | import org.opencv.core.MatOfPoint; 7 | import org.opencv.core.Point; 8 | 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | public class NDArrayUtils { 13 | /** 14 | * Mat To MatOfPoint 15 | * 16 | * @param mat 17 | * @return 18 | */ 19 | public static MatOfPoint matToMatOfPoint(Mat mat) { 20 | int rows = mat.rows(); 21 | MatOfPoint matOfPoint = new MatOfPoint(); 22 | 23 | List list = new ArrayList<>(); 24 | for (int i = 0; i < rows; i++) { 25 | Point point = new Point((float) mat.get(i, 0)[0], (float) mat.get(i, 1)[0]); 26 | list.add(point); 27 | } 28 | matOfPoint.fromList(list); 29 | 30 | return matOfPoint; 31 | } 32 | 33 | /** 34 | * float NDArray To float[][] Array 35 | * 36 | * @param ndArray 37 | * @return 38 | */ 39 | public static float[][] floatNDArrayToArray(NDArray ndArray) { 40 | int rows = (int) (ndArray.getShape().get(0)); 41 | int cols = (int) (ndArray.getShape().get(1)); 42 | float[][] arr = new float[rows][cols]; 43 | 44 | float[] arrs = ndArray.toFloatArray(); 45 | for (int i = 0; i < rows; i++) { 46 | for (int j = 0; j < cols; j++) { 47 | arr[i][j] = arrs[i * cols + j]; 48 | } 49 | } 50 | return arr; 51 | } 52 | 53 | /** 54 | * Mat To double[][] Array 55 | * 56 | * @param mat 57 | * @return 58 | */ 59 | public static double[][] matToDoubleArray(Mat mat) { 60 | int rows = mat.rows(); 61 | int cols = mat.cols(); 62 | 63 | double[][] doubles = new double[rows][cols]; 64 | 65 | for (int i = 0; i < rows; i++) { 66 | for (int j = 0; j < cols; j++) { 67 | doubles[i][j] = mat.get(i, j)[0]; 68 | } 69 | } 70 | 71 | return doubles; 72 | } 73 | 74 | /** 75 | * Mat To float[][] Array 76 | * 77 | * @param mat 78 | * @return 79 | */ 80 | public static float[][] matToFloatArray(Mat mat) { 81 | int rows = mat.rows(); 82 | int cols = mat.cols(); 83 | 84 | float[][] floats = new float[rows][cols]; 85 | 86 | for (int i = 0; i < rows; i++) { 87 | for (int j = 0; j < cols; j++) { 88 | floats[i][j] = (float) mat.get(i, j)[0]; 89 | } 90 | } 91 | 92 | return floats; 93 | } 94 | 95 | /** 96 | * Mat To byte[][] Array 97 | * 98 | * @param mat 99 | * @return 100 | */ 101 | public static byte[][] matToUint8Array(Mat mat) { 102 | int rows = mat.rows(); 103 | int cols = mat.cols(); 104 | 105 | byte[][] bytes = new byte[rows][cols]; 106 | 107 | for (int i = 0; i < rows; i++) { 108 | for (int j = 0; j < cols; j++) { 109 | bytes[i][j] = (byte) mat.get(i, j)[0]; 110 | } 111 | } 112 | 113 | return bytes; 114 | } 115 | 116 | /** 117 | * float NDArray To float[][] Array 118 | * 119 | * @param ndArray 120 | * @param cvType 121 | * @return 122 | */ 123 | public static Mat floatNDArrayToMat(NDArray ndArray, int cvType) { 124 | int rows = (int) (ndArray.getShape().get(0)); 125 | int cols = (int) (ndArray.getShape().get(1)); 126 | Mat mat = new Mat(rows, cols, cvType); 127 | 128 | float[] arrs = ndArray.toFloatArray(); 129 | for (int i = 0; i < rows; i++) { 130 | for (int j = 0; j < cols; j++) { 131 | mat.put(i, j, arrs[i * cols + j]); 132 | } 133 | } 134 | return mat; 135 | } 136 | 137 | /** 138 | * float NDArray To Mat 139 | * 140 | * @param ndArray 141 | * @return 142 | */ 143 | public static Mat floatNDArrayToMat(NDArray ndArray) { 144 | int rows = (int) (ndArray.getShape().get(0)); 145 | int cols = (int) (ndArray.getShape().get(1)); 146 | Mat mat = new Mat(rows, cols, CvType.CV_32F); 147 | 148 | float[] arrs = ndArray.toFloatArray(); 149 | for (int i = 0; i < rows; i++) { 150 | for (int j = 0; j < cols; j++) { 151 | mat.put(i, j, arrs[i * cols + j]); 152 | } 153 | } 154 | 155 | return mat; 156 | 157 | } 158 | 159 | /** 160 | * uint8 NDArray To Mat 161 | * 162 | * @param ndArray 163 | * @return 164 | */ 165 | public static Mat uint8NDArrayToMat(NDArray ndArray) { 166 | int rows = (int) (ndArray.getShape().get(0)); 167 | int cols = (int) (ndArray.getShape().get(1)); 168 | Mat mat = new Mat(rows, cols, CvType.CV_8U); 169 | 170 | byte[] arrs = ndArray.toByteArray(); 171 | 172 | for (int i = 0; i < rows; i++) { 173 | for (int j = 0; j < cols; j++) { 174 | mat.put(i, j, arrs[i * cols + j]); 175 | } 176 | } 177 | return mat; 178 | } 179 | 180 | /** 181 | * float[][] Array To Mat 182 | * 183 | * @param arr 184 | * @return 185 | */ 186 | public static Mat floatArrayToMat(float[][] arr) { 187 | int rows = arr.length; 188 | int cols = arr[0].length; 189 | Mat mat = new Mat(rows, cols, CvType.CV_32F); 190 | 191 | for (int i = 0; i < rows; i++) { 192 | for (int j = 0; j < cols; j++) { 193 | mat.put(i, j, arr[i][j]); 194 | } 195 | } 196 | 197 | return mat; 198 | } 199 | 200 | /** 201 | * byte[][] Array To Mat 202 | * 203 | * @param arr 204 | * @return 205 | */ 206 | public static Mat uint8ArrayToMat(byte[][] arr) { 207 | int rows = arr.length; 208 | int cols = arr[0].length; 209 | Mat mat = new Mat(rows, cols, CvType.CV_8U); 210 | 211 | for (int i = 0; i < rows; i++) { 212 | for (int j = 0; j < cols; j++) { 213 | mat.put(i, j, arr[i][j]); 214 | } 215 | } 216 | 217 | return mat; 218 | } 219 | 220 | /** 221 | * List To Mat 222 | * 223 | * @param points 224 | * @return 225 | */ 226 | public static Mat toMat(List points) { 227 | Mat mat = new Mat(points.size(), 2, CvType.CV_32F); 228 | for (int i = 0; i < points.size(); i++) { 229 | ai.djl.modality.cv.output.Point point = points.get(i); 230 | mat.put(i, 0, (float) point.getX()); 231 | mat.put(i, 1, (float) point.getY()); 232 | } 233 | 234 | return mat; 235 | } 236 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/opencv/OpenCVUtils.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.opencv; 2 | 3 | import org.opencv.core.CvType; 4 | import org.opencv.core.Mat; 5 | import org.opencv.imgproc.Imgproc; 6 | 7 | import java.awt.image.BufferedImage; 8 | import java.awt.image.DataBufferByte; 9 | 10 | public class OpenCVUtils { 11 | 12 | /** 13 | * 透视变换 14 | * 15 | * @param src 16 | * @param srcPoints 17 | * @param dstPoints 18 | * @return 19 | */ 20 | public static Mat perspectiveTransform(Mat src, Mat srcPoints, Mat dstPoints) { 21 | Mat dst = src.clone(); 22 | Mat warp_mat = Imgproc.getPerspectiveTransform(srcPoints, dstPoints); 23 | Imgproc.warpPerspective(src, dst, warp_mat, dst.size()); 24 | warp_mat.release(); 25 | 26 | return dst; 27 | } 28 | 29 | /** 30 | * Mat to BufferedImage 31 | * 32 | * @param mat 33 | * @return 34 | */ 35 | public static BufferedImage mat2Image(Mat mat) { 36 | int width = mat.width(); 37 | int height = mat.height(); 38 | byte[] data = new byte[width * height * (int) mat.elemSize()]; 39 | Imgproc.cvtColor(mat, mat, 4); 40 | mat.get(0, 0, data); 41 | BufferedImage ret = new BufferedImage(width, height, 5); 42 | ret.getRaster().setDataElements(0, 0, width, height, data); 43 | return ret; 44 | } 45 | 46 | /** 47 | * BufferedImage to Mat 48 | * 49 | * @param img 50 | * @return 51 | */ 52 | public static Mat image2Mat(BufferedImage img) { 53 | int width = img.getWidth(); 54 | int height = img.getHeight(); 55 | byte[] data = ((DataBufferByte) img.getRaster().getDataBuffer()).getData(); 56 | Mat mat = new Mat(height, width, CvType.CV_8UC3); 57 | mat.put(0, 0, data); 58 | return mat; 59 | } 60 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/recognition/OcrV4Recognition.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.recognition; 2 | 3 | import java.awt.image.BufferedImage; 4 | import java.net.URL; 5 | import java.nio.file.Path; 6 | import java.nio.file.Paths; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | import java.util.concurrent.ConcurrentHashMap; 10 | 11 | import org.opencv.core.Mat; 12 | 13 | import com.litongjava.ai.djl.paddle.ocr.v4.common.RotatedBox; 14 | import com.litongjava.ai.djl.paddle.ocr.v4.opencv.NDArrayUtils; 15 | import com.litongjava.ai.djl.paddle.ocr.v4.opencv.OpenCVUtils; 16 | 17 | import ai.djl.Device; 18 | import ai.djl.inference.Predictor; 19 | import ai.djl.modality.cv.Image; 20 | import ai.djl.modality.cv.ImageFactory; 21 | import ai.djl.modality.cv.output.Point; 22 | import ai.djl.modality.cv.util.NDImageUtils; 23 | import ai.djl.ndarray.NDArray; 24 | import ai.djl.ndarray.NDList; 25 | import ai.djl.ndarray.NDManager; 26 | import ai.djl.opencv.OpenCVImageFactory; 27 | import ai.djl.repository.zoo.Criteria; 28 | import ai.djl.repository.zoo.Criteria.Builder; 29 | import ai.djl.training.util.ProgressBar; 30 | import ai.djl.translate.TranslateException; 31 | import cn.hutool.core.io.resource.ResourceUtil; 32 | 33 | /** 34 | * 文字识别 35 | */ 36 | public final class OcrV4Recognition { 37 | 38 | /** 39 | * 中文简体 40 | * 41 | * @return 42 | */ 43 | public Criteria chRecCriteria() { 44 | URL resource = ResourceUtil.getResource("models/ch_PP-OCRv4_rec_infer.zip"); 45 | System.out.println("resource:" + resource); 46 | Path modelPath = null; 47 | try { 48 | modelPath = Paths.get(resource.toURI()); 49 | } catch (Exception e) { 50 | System.err.println(e.getMessage()); 51 | } 52 | 53 | Device device = Device.gpu(); 54 | Builder builder = Criteria.builder() 55 | // engine 56 | .optEngine("OnnxRuntime") 57 | // .optEngine("PyTorch") 58 | // .optModelName("inference") 59 | // devices 60 | .optDevice(device) 61 | // type 62 | .setTypes(Image.class, String.class).optProgress(new ProgressBar()) 63 | .optTranslator(new PpWordRecTranslator(new ConcurrentHashMap())); 64 | 65 | if (modelPath != null) { 66 | System.out.println("load from file"); 67 | builder.optModelPath(modelPath).optModelName("ch_PP-OCRv4_det_infer"); 68 | } else { 69 | System.out.println("load from jar"); 70 | builder.optModelUrls("jar:///models/ch_PP-OCRv4_rec_infer.zip"); 71 | } 72 | return builder.build(); 73 | } 74 | 75 | /** 76 | * 图像推理 77 | * 78 | * @param manager 79 | * @param image 80 | * @param detector 81 | * @param recognizer 82 | * @return 83 | * @throws TranslateException 84 | */ 85 | public List predict(NDManager manager, Image image, Predictor detector, 86 | Predictor recognizer) throws TranslateException { 87 | NDList boxes = detector.predict(image); 88 | if (boxes == null) { 89 | return null; 90 | } 91 | // 交给 NDManager自动管理内存 92 | // attach to manager for automatic memory management 93 | boxes.attach(manager); 94 | 95 | List result = new ArrayList<>(); 96 | 97 | Mat mat = (Mat) image.getWrappedImage(); 98 | 99 | for (int i = 0; i < boxes.size(); i++) { 100 | NDArray box = boxes.get(i); 101 | 102 | float[] pointsArr = box.toFloatArray(); 103 | float[] lt = java.util.Arrays.copyOfRange(pointsArr, 0, 2); 104 | float[] rt = java.util.Arrays.copyOfRange(pointsArr, 2, 4); 105 | float[] rb = java.util.Arrays.copyOfRange(pointsArr, 4, 6); 106 | float[] lb = java.util.Arrays.copyOfRange(pointsArr, 6, 8); 107 | int img_crop_width = (int) Math.max(distance(lt, rt), distance(rb, lb)); 108 | int img_crop_height = (int) Math.max(distance(lt, lb), distance(rt, rb)); 109 | List srcPoints = new ArrayList<>(); 110 | srcPoints.add(new Point(lt[0], lt[1])); 111 | srcPoints.add(new Point(rt[0], rt[1])); 112 | srcPoints.add(new Point(rb[0], rb[1])); 113 | srcPoints.add(new Point(lb[0], lb[1])); 114 | List dstPoints = new ArrayList<>(); 115 | dstPoints.add(new Point(0, 0)); 116 | dstPoints.add(new Point(img_crop_width, 0)); 117 | dstPoints.add(new Point(img_crop_width, img_crop_height)); 118 | dstPoints.add(new Point(0, img_crop_height)); 119 | 120 | Mat srcPoint2f = NDArrayUtils.toMat(srcPoints); 121 | Mat dstPoint2f = NDArrayUtils.toMat(dstPoints); 122 | 123 | Mat cvMat = OpenCVUtils.perspectiveTransform(mat, srcPoint2f, dstPoint2f); 124 | 125 | Image subImg = OpenCVImageFactory.getInstance().fromImage(cvMat); 126 | // ImageUtils.saveImage(subImg, i + ".png", "build/output"); 127 | 128 | subImg = subImg.getSubImage(0, 0, img_crop_width, img_crop_height); 129 | if (subImg.getHeight() * 1.0 / subImg.getWidth() > 1.5) { 130 | subImg = rotateImg(manager, subImg); 131 | } 132 | 133 | String name = recognizer.predict(subImg); 134 | RotatedBox rotatedBox = new RotatedBox(box, name); 135 | result.add(rotatedBox); 136 | 137 | cvMat.release(); 138 | srcPoint2f.release(); 139 | dstPoint2f.release(); 140 | 141 | } 142 | 143 | return result; 144 | } 145 | 146 | private BufferedImage get_rotate_crop_image(Image image, NDArray box) { 147 | return null; 148 | } 149 | 150 | /** 151 | * 欧式距离计算 152 | * 153 | * @param point1 154 | * @param point2 155 | * @return 156 | */ 157 | private float distance(float[] point1, float[] point2) { 158 | float disX = point1[0] - point2[0]; 159 | float disY = point1[1] - point2[1]; 160 | float dis = (float) Math.sqrt(disX * disX + disY * disY); 161 | return dis; 162 | } 163 | 164 | /** 165 | * 图片旋转 166 | * 167 | * @param manager 168 | * @param image 169 | * @return 170 | */ 171 | private Image rotateImg(NDManager manager, Image image) { 172 | NDArray rotated = NDImageUtils.rotate90(image.toNDArray(manager), 1); 173 | return ImageFactory.getInstance().fromNDArray(rotated); 174 | } 175 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/recognition/PpWordRecTranslator.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.recognition; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.util.Arrays; 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | import ai.djl.Model; 10 | import ai.djl.modality.cv.Image; 11 | import ai.djl.modality.cv.util.NDImageUtils; 12 | import ai.djl.ndarray.NDArray; 13 | import ai.djl.ndarray.NDList; 14 | import ai.djl.ndarray.index.NDIndex; 15 | import ai.djl.ndarray.types.DataType; 16 | import ai.djl.ndarray.types.Shape; 17 | import ai.djl.translate.Batchifier; 18 | import ai.djl.translate.Translator; 19 | import ai.djl.translate.TranslatorContext; 20 | import ai.djl.util.Utils; 21 | 22 | /** 23 | * 文字识别前后处理 24 | */ 25 | public class PpWordRecTranslator implements Translator { 26 | private List table; 27 | private final boolean use_space_char; 28 | 29 | public PpWordRecTranslator(Map arguments) { 30 | use_space_char = 31 | arguments.containsKey("use_space_char") 32 | ? Boolean.parseBoolean(arguments.get("use_space_char").toString()) 33 | : true; 34 | } 35 | 36 | @Override 37 | public void prepare(TranslatorContext ctx) throws IOException { 38 | Model model = ctx.getModel(); 39 | try (InputStream is = model.getArtifact("dict.txt").openStream()) { 40 | table = Utils.readLines(is, true); 41 | table.add(0, "blank"); 42 | if (use_space_char) { 43 | table.add(" "); 44 | table.add(" "); 45 | } else { 46 | table.add(""); 47 | table.add(""); 48 | } 49 | 50 | } 51 | } 52 | 53 | @Override 54 | public String processOutput(TranslatorContext ctx, NDList list) throws IOException { 55 | StringBuilder sb = new StringBuilder(); 56 | NDArray tokens = list.singletonOrThrow(); 57 | 58 | long[] indices = tokens.get(0).argMax(1).toLongArray(); 59 | boolean[] selection = new boolean[indices.length]; 60 | Arrays.fill(selection, true); 61 | for (int i = 1; i < indices.length; i++) { 62 | if (indices[i] == indices[i - 1]) { 63 | selection[i] = false; 64 | } 65 | } 66 | 67 | // 字符置信度 68 | // float[] probs = new float[indices.length]; 69 | // for (int row = 0; row < indices.length; row++) { 70 | // NDArray value = tokens.get(0).get(new NDIndex(""+ row +":" + (row + 1) +"," + indices[row] +":" + ( indices[row] + 1))); 71 | // probs[row] = value.toFloatArray()[0]; 72 | // } 73 | 74 | int lastIdx = 0; 75 | for (int i = 0; i < indices.length; i++) { 76 | if (selection[i] == true && indices[i] > 0 && !(i > 0 && indices[i] == lastIdx)) { 77 | sb.append(table.get((int) indices[i])); 78 | } 79 | } 80 | return sb.toString(); 81 | } 82 | 83 | @Override 84 | public NDList processInput(TranslatorContext ctx, Image input) { 85 | NDArray img = input.toNDArray(ctx.getNDManager(), Image.Flag.COLOR); 86 | int imgC = 3; 87 | int imgH = 48; 88 | int imgW = 320; 89 | 90 | float max_wh_ratio = (float) imgW / (float) imgH; 91 | 92 | int h = input.getHeight(); 93 | int w = input.getWidth(); 94 | float wh_ratio = (float) w / (float) h; 95 | 96 | max_wh_ratio = Math.max(max_wh_ratio, wh_ratio); 97 | imgW = (int) (imgH * max_wh_ratio); 98 | 99 | int resized_w; 100 | if (Math.ceil(imgH * wh_ratio) > imgW) { 101 | resized_w = imgW; 102 | } else { 103 | resized_w = (int) (Math.ceil(imgH * wh_ratio)); 104 | } 105 | NDArray resized_image = NDImageUtils.resize(img, resized_w, imgH); 106 | resized_image = resized_image.transpose(2, 0, 1).toType(DataType.FLOAT32, false); 107 | resized_image.divi(255f).subi(0.5f).divi(0.5f); 108 | NDArray padding_im = ctx.getNDManager().zeros(new Shape(imgC, imgH, imgW), DataType.FLOAT32); 109 | padding_im.set(new NDIndex(":,:,0:" + resized_w), resized_image); 110 | 111 | padding_im = padding_im.flip(0); 112 | padding_im = padding_im.expandDims(0); 113 | return new NDList(padding_im); 114 | } 115 | 116 | @Override 117 | public Batchifier getBatchifier() { 118 | return null; 119 | } 120 | 121 | } -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | ${CONSOLE_LOG_PATTERN} 12 | 13 | 14 | 15 | 16 | 17 | 18 | ${CONSOLE_LOG_PATTERN} 19 | 20 | 21 | 22 | ${LOG_HOME}/project-name-%d{yyyy-MM-dd}.log 23 | 24 | 180 25 | 26 | 27 | 28 | 10MB 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/main/resources/models/readme.md: -------------------------------------------------------------------------------- 1 | models path 2 | ```shell 3 | wget https://github.com/litongjava/tools-ocr/releases/download/model-ppocr-v4/ch_PP-OCRv4_det_infer-onnx.zip 4 | wget https://github.com/litongjava/tools-ocr/releases/download/model-ppocr-v4/ch_PP-OCRv4_rec_infer-onnx.zip 5 | mkdir -p models/ch_PP-OCRv4_det_infer 6 | mkdir -p models/ch_PP-OCRv4_rec_infer 7 | unzip ch_PP-OCRv4_det_infer-onnx.zip -d models/ch_PP-OCRv4_det_infer 8 | unzip ch_PP-OCRv4_rec_infer-onnx.zip -d models/ch_PP-OCRv4_rec_infer 9 | ``` -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/test/java/com/litongjava/ai/djl/paddle/ocr/v4/PaddlePaddleOCRV4Test.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4; 2 | 3 | import org.junit.Test; 4 | 5 | import lombok.extern.slf4j.Slf4j; 6 | 7 | @Slf4j 8 | public class PaddlePaddleOCRV4Test { 9 | 10 | @Test 11 | public void test() { 12 | String url = "https://resources.djl.ai/images/flight_ticket.jpg"; 13 | String text = null; 14 | for (int i = 0; i < 100; i++) { 15 | try { 16 | text = PaddlePaddleOCRV4.INSTANCE.ocr(url); 17 | } catch (Exception e) { 18 | // TODO Auto-generated catch block 19 | e.printStackTrace(); 20 | } 21 | log.info("text:{}", text); 22 | } 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/test/java/com/litongjava/ai/djl/paddle/ocr/v4/gpu/GPUStudy.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.gpu; 2 | 3 | import org.junit.Test; 4 | 5 | import ai.djl.Device; 6 | import ai.djl.engine.Engine; 7 | import ai.djl.ndarray.NDArray; 8 | import ai.djl.ndarray.NDManager; 9 | 10 | public class GPUStudy { 11 | /* Return the i'th GPU if it exists, otherwise return the CPU */ 12 | public Device tryGpu(int i) { 13 | return Engine.getInstance().getGpuCount() > i ? Device.gpu(i) : Device.cpu(); 14 | } 15 | 16 | /* Return all available GPUs or the [CPU] if no GPU exists */ 17 | public Device[] tryAllGpus() { 18 | int gpuCount = Engine.getInstance().getGpuCount(); 19 | if (gpuCount > 0) { 20 | Device[] devices = new Device[gpuCount]; 21 | for (int i = 0; i < gpuCount; i++) { 22 | devices[i] = Device.gpu(i); 23 | } 24 | return devices; 25 | } 26 | return new Device[] { Device.cpu() }; 27 | } 28 | 29 | public static void main(String[] args) { 30 | System.out.println(Device.cpu()); 31 | System.out.println(Device.gpu()); 32 | System.out.println(Device.gpu(1)); 33 | 34 | System.out.println("GPU count: " + Engine.getInstance().getGpuCount()); 35 | } 36 | 37 | @Test 38 | public void getDevices() { 39 | NDManager manager = NDManager.newBaseManager(); 40 | NDArray x = manager.create(new int[] { 1, 2, 3 }); 41 | Device device = x.getDevice(); 42 | System.out.println(device); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/test/java/com/litongjava/ai/djl/paddle/ocr/v4/recognition/OcrV4RecognitionTest.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.djl.paddle.ocr.v4.recognition; 2 | 3 | import java.net.URISyntaxException; 4 | import java.net.URL; 5 | import java.nio.file.Path; 6 | import java.nio.file.Paths; 7 | 8 | import org.junit.Test; 9 | 10 | import cn.hutool.core.io.resource.ResourceUtil; 11 | import lombok.extern.slf4j.Slf4j; 12 | 13 | @Slf4j 14 | public class OcrV4RecognitionTest { 15 | 16 | @Test 17 | public void testGetModelPath() { 18 | // URL resource = ClassUtil.getClassLoader().getResource(); 19 | URL resource = ResourceUtil.getResource("models/ch_PP-OCRv4_rec_infer/inference.onnx"); 20 | log.info("resource:{}", resource); 21 | Path modelPath = null; 22 | try { 23 | modelPath = Paths.get(resource.toURI()); 24 | } catch (URISyntaxException e) { 25 | e.printStackTrace(); 26 | } 27 | log.info("modelPath:{}", modelPath); 28 | 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/test/resources/03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/paddle-ocr/paddle-ocr-service/src/test/resources/03.png -------------------------------------------------------------------------------- /paddle-ocr/paddle-ocr-service/src/test/resources/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/paddle-ocr/paddle-ocr-service/src/test/resources/2.jpg -------------------------------------------------------------------------------- /paddle-ocr/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | com.litongjava 5 | ai-server 6 | 1.0.0 7 | 8 | paddle-ocr 9 | pom 10 | 11 | paddle-ocr-server 12 | paddle-ocr-service 13 | paddle-ocr-native-server 14 | 15 | -------------------------------------------------------------------------------- /paddle-ocr/readme.md: -------------------------------------------------------------------------------- 1 | # paddle-ocr-server 2 | 3 | 4 | ## 简介 5 | 6 | 基于java语言的开箱即用的ocr识别服务,用到的框架和技术 7 | - tio-boot 8 | - djl 9 | - opencv 10 | - pytorch 11 | - onnx 12 | - paddle-ocr 13 | ## require 14 | glic==2.28 or CentOS 8.4 15 | 16 | ## How to build 17 | ``` 18 | git clone https://github.com/litongjava/ai-server.git 19 | or 20 | #git clone https://gitee.com/ppnt/ai-server.git 21 | #set JAVA_HOME=D:\java\jdk1.8.0_121 22 | mvn clean package -pl paddle-ocr-server -DskipTests -Pproduction 23 | ``` 24 | run 25 | ``` 26 | java -jar paddle-ocr-server/target/paddle-ocr-server-1.0.4.jar 27 | ``` 28 | 29 | ## 库文件存储路径 30 | 启动后默认会下载pytorch,djl-pytorch库文件存储路径 31 | ``` 32 | $HOME/.djl.ai/pytorch 33 | ``` 34 | 如果下载太慢可以到下面的地址下载pytorch库 35 | ``` 36 | https://github.com/litongjava/djl-libs/releases/tag/pytorch 37 | ``` 38 | 39 | ## 接口文档 40 | 41 | ### 在线文档地址 42 | https://apifox.com/apidoc/shared-98cc5675-f1a3-4250-a940-cfe060854ef4 43 | 44 | #### 测试接口 45 | 测试接口不需要上传文件,使用程序自带的文件进行识别,用于测试环境安装是否成功 46 | 访问地址:http://localhost/paddle/ocr/test 47 | 返回数据: 48 | ``` 49 | {"data":"www.997788 + 登机牌 BOARDING PASS \n航班 FLIGHT 日期 DATE 舱位 CLASS 序号SERIALNO. 座位号SEATNO \nMU 2379 03DEC W 035 12F \n目的地TO 始发地 FROM 一 登机口 GATE 登机时间BDT \n福州 TAIYUAN G11 \nFUZHOU 身份识别IDNO. \n姓名NAME \nZHANGQIWEI 票号TKTNO. \n张祺伟 \n票价FARE ETKT7813699238489/1 \n登机口于起飞前10分钟关闭 GATES CLOSE 10MINUTES BEFORE DEPARTURE TIME + \n","ok":true} 50 | ``` 51 | ### 识别接口 52 | ``` 53 | curl --location --request POST 'http://localhost/paddle/ocr/rec' \ 54 | --header 'User-Agent: Apifox/1.0.0 (https://apifox.com)' \ 55 | --form 'file=@""' 56 | ``` 57 | 返回数据格式 58 | ``` 59 | { 60 | "data": "text数据", 61 | "ok": true 62 | } 63 | ``` 64 | ## Docker 65 | ### build 66 | ``` 67 | mvn package -DskipTests -Pproduction 68 | docker build -t litongjava/paddle-ocr-server:1.0.1 . 69 | docker tag litongjava/paddle-ocr-server:1.0.1 litongjava/paddle-ocr-server 70 | ``` 71 | ### run 72 | ``` 73 | docker run --name ocr_server -dit -p 8080:80 litongjava/paddle-ocr-server 74 | ``` 75 | ### 部署到aliyun fc 76 | 77 | #### 推送镜像到 78 | ##### 在 阿里云镜像仓库中 新建仓库 79 | - 访问https://cr.console.aliyun.com/cn-hangzhou/instances/repositories 80 | - 点击个人版 81 | - 点击镜像仓库 82 | - 点击创建镜像仓库 输入名称paddle-ocr-server 83 | 84 | ##### 推送镜像到 阿里云镜像仓库 85 | - 登录查看用户名https://cr.console.aliyun.com/cn-hangzhou/instance/credentials 86 | - 进入linux命令行 87 | - 登录 docker login registry.cn-hangzhou.aliyuncs.com 88 | - 拉取 litongjava/paddle-ocr-server 89 | - tag;docker tag litongjava/paddle-ocr-server registry.cn-hangzhou.aliyuncs.com/litongjava/paddle-ocr-server 90 | - push: docker push registry.cn-hangzhou.aliyuncs.com/litongjava/paddle-ocr-server 91 | 92 | #### 部署到 aliyun fc 93 | - 登录 阿里云 94 | - 选择"阿里云函数"计算 95 | - 选择"服务及函数" 96 | - 创建"服务" 97 | - 名称:paddle-ocr-server 确定创建 98 | - 创建函数-->使用容器镜像创建 99 | - 函数名称 ocr 100 | - web server模式 是 101 | - 请求处理程序类型 处理 Http请求 102 | - 镜像 选择刚才推送的镜像 registry-vpc.cn-hangzhou.aliyuncs.com/litongjava/paddle-ocr-server:latest 103 | - 监听端口 80 104 | - 是否使用GPU 不使用GPU 105 | - 规格方案 默认即可 106 | - 禁用公网访问 URL 否 107 | - 点击部署 108 | - 部署成功过后获取地址访问测试即可 -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | com.litongjava 4 | ai-server 5 | 1.0.0 6 | pom 7 | ai-server 8 | ai development toolkit service and server based on java language 9 | https://github.com/litongjava/ai-server 10 | 11 | paddle-ocr 12 | whisper-asr 13 | 14 | 15 | 16 | The Apache Software License, Version 2.0 17 | http://apache.org/licenses/LICENSE-2.0.txt 18 | 19 | 20 | 21 | 22 | 23 | litongjava 24 | Ping E Lee 25 | litongjava@qq.com 26 | https://github.com/litongjava 27 | 28 | 29 | 30 | 31 | scm:git:git@github.com:litongjava/ai-server.git 32 | scm:git:git@github.com:litongjava/ai-server.git 33 | git@github.com:litongjava/ai-server.git 34 | 35 | 36 | 37 | 38 | ossrh 39 | https://s01.oss.sonatype.org/content/repositories/snapshots 40 | 41 | 42 | ossrh 43 | https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ 44 | 45 | 46 | 47 | 48 | 49 | 50 | org.apache.maven.plugins 51 | maven-source-plugin 52 | 2.2.1 53 | 54 | 55 | package 56 | 57 | jar-no-fork 58 | 59 | 60 | 61 | 62 | 63 | 64 | org.apache.maven.plugins 65 | maven-javadoc-plugin 66 | 2.9.1 67 | 68 | 69 | -Xdoclint:none 70 | 71 | 72 | 73 | package 74 | 75 | jar 76 | 77 | 78 | 79 | 80 | 81 | 82 | org.apache.maven.plugins 83 | maven-gpg-plugin 84 | 1.5 85 | 86 | 87 | sign-artifacts 88 | verify 89 | 90 | sign 91 | 92 | 93 | 94 | 95 | 96 | org.sonatype.plugins 97 | nexus-staging-maven-plugin 98 | 1.6.7 99 | true 100 | 101 | ossrh 102 | https://s01.oss.sonatype.org/ 103 | true 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /rapid-ocr-server/Dockerfile: -------------------------------------------------------------------------------- 1 | # build litongjava/paddle-ocr-server:1.0.1 2 | # Use litongjava/jdk:8u211 as the base image 3 | FROM litongjava/centos-8-jdk:8u341 4 | 5 | # Set the working directory in the container 6 | WORKDIR /app 7 | 8 | # Copy the jar file into the container 9 | COPY target/paddle-ocr-server-1.0.1.jar /app/ 10 | 11 | # download file 12 | RUN java -jar /app/paddle-ocr-server-1.0.1.jar --download 13 | 14 | # Command to run the jar file 15 | CMD ["java", "-jar", "paddle-ocr-server-1.0.1.jar", "--mode=prod"] 16 | -------------------------------------------------------------------------------- /rapid-ocr-server/deploy-win.txt: -------------------------------------------------------------------------------- 1 | set JAVA_HOME=D:\\java\\jdk1.8.0_121 2 | mvn clean package -DskipTests -Dgpg.skip -Pproduction -------------------------------------------------------------------------------- /rapid-ocr-server/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | com.litongjava 4 | rapid-ocr-server 5 | 1.0.4 6 | 7 | UTF-8 8 | 1.8 9 | ${java.version} 10 | ${java.version} 11 | 23.1.1 12 | 1.4.3 13 | 1.18.30 14 | 1.2.1 15 | ocr-server 16 | com.litongjava.ai.server.rapid.ocr.RapidOcrServer 17 | 18 | 19 | 20 | 21 | 22 | 23 | io.github.mymonstercat 24 | rapidocr 25 | 0.0.7 26 | 27 | 28 | 29 | 30 | 31 | io.github.mymonstercat 32 | rapidocr-onnx-platform 33 | 0.0.7 34 | 35 | 36 | com.litongjava 37 | tio-boot 38 | ${tio.boot.version} 39 | 40 | 41 | org.projectlombok 42 | lombok 43 | ${lombok-version} 44 | true 45 | provided 46 | 47 | 48 | 49 | com.alibaba.fastjson2 50 | fastjson2 51 | 2.0.12 52 | 53 | 54 | 55 | 56 | 57 | development 58 | 59 | true 60 | 61 | 62 | 63 | ch.qos.logback 64 | logback-classic 65 | 1.2.3 66 | 67 | 68 | 69 | 70 | 71 | 72 | production 73 | 74 | 75 | ch.qos.logback 76 | logback-classic 77 | 1.2.3 78 | 79 | 80 | 81 | 82 | 83 | org.springframework.boot 84 | spring-boot-maven-plugin 85 | 2.7.4 86 | 87 | ${main.class} 88 | org.projectlombok 89 | 90 | 91 | 92 | 93 | 94 | repackage 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | assembly 105 | 106 | 107 | ch.qos.logback 108 | logback-classic 109 | 1.2.3 110 | 111 | 112 | 113 | 114 | 115 | org.apache.maven.plugins 116 | maven-jar-plugin 117 | 3.2.0 118 | 119 | 120 | org.apache.maven.plugins 121 | maven-assembly-plugin 122 | 3.1.1 123 | 124 | 125 | 126 | ${main.class} 127 | 128 | 129 | 130 | jar-with-dependencies 131 | 132 | false 133 | 134 | 135 | 136 | make-assembly 137 | package 138 | 139 | single 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | native 149 | 150 | 151 | 152 | org.slf4j 153 | slf4j-jdk14 154 | 1.7.31 155 | 156 | 157 | 158 | org.graalvm.sdk 159 | graal-sdk 160 | ${graalvm.version} 161 | provided 162 | 163 | 164 | 165 | ${final.name} 166 | 167 | 168 | org.graalvm.nativeimage 169 | native-image-maven-plugin 170 | 21.2.0 171 | 172 | 173 | 174 | native-image 175 | 176 | package 177 | 178 | 179 | 180 | false 181 | ${project.build.finalName} 182 | ${main.class} 183 | 184 | -H:+RemoveSaturatedTypeFlows 185 | --allow-incomplete-classpath 186 | --no-fallback 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | -------------------------------------------------------------------------------- /rapid-ocr-server/readme.md: -------------------------------------------------------------------------------- 1 | # rapid-ocr-server 2 | 3 | ## requirements 4 | - java 1.8 5 | 6 | ## build 7 | 克隆并构建 TIO 工具库: 这些是 TIO 项目的辅助工具库。 8 | ``` 9 | git clone https://github.com/litongjava/tio-utils.git 10 | cd tio-utils 11 | mvn clean install -DskipTests -Dgpg.skip 12 | ``` 13 | 克隆并构建 TIO 核心库: 这是所有基于 TIO 项目所需的核心库。 14 | ``` 15 | git clone https://github.com/litongjava/t-io.git 16 | cd t-io 17 | mvn clean install -DskipTests -Dgpg.skip 18 | ``` 19 | 克隆并构建 TIO HTTP 组件: 这些组件为 TIO 应用程序启用 HTTP 功能。 20 | ``` 21 | git clone https://github.com/litongjava/tio-http.git 22 | cd tio-http/tio-http-common/ 23 | mvn clean install -DskipTests -Dgpg.skip 24 | ``` 25 | ``` 26 | cd ../tio-http-server 27 | mvn clean install -DskipTests -Dgpg.skip 28 | ``` 29 | 克隆并构建 TIO Boot: TIO Boot 简化了 TIO 应用程序的引导过程。 30 | ``` 31 | git clone https://github.com/litongjava/tio-boot.git 32 | cd tio-boot 33 | mvn clean install -DskipTests -Dgpg.skip 34 | ``` 35 | RapidOcr-Java 36 | ``` 37 | https://github.com/litongjava/RapidOcr-Java 38 | cd RapidOcr-Java 39 | mvn clean install -DskipTests -Dgpg.skip 40 | ``` 41 | 构建本项目 42 | ``` 43 | mvn clean package -DskipTests -Dgpg.skip -Pproduction 44 | ``` 45 | ## test 46 | - http://localhost/rapid/ocr/test 47 | - http://localhost/rapid/ocr/rec 48 | 49 | ## curl 50 | 51 | ``` 52 | curl --location --request POST 'http://localhost/rapid/ocr/rec' \ 53 | --header 'Accept: */*' \ 54 | --header 'Content-Type: multipart/form-data; boundary=--------------------------865945034672416949878658' \ 55 | --form 'file=@"flight_ticket.jpg"' \ 56 | --form 'responseFormat="text"' 57 | ``` -------------------------------------------------------------------------------- /rapid-ocr-server/src/main/java/com/litongjava/ai/server/rapid/ocr/RapidOcrServer.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.rapid.ocr; 2 | 3 | import com.litongjava.ai.server.rapid.ocr.config.RapidOcrConfig; 4 | import com.litongjava.tio.boot.TioApplication; 5 | 6 | public class RapidOcrServer { 7 | public static void main(String[] args) throws Exception { 8 | 9 | long start = System.currentTimeMillis(); 10 | TioApplication.run(RapidOcrServer.class, new RapidOcrConfig(),args); 11 | long end = System.currentTimeMillis(); 12 | System.out.println("started:" + (end - start) + "(ms)"); 13 | } 14 | 15 | } -------------------------------------------------------------------------------- /rapid-ocr-server/src/main/java/com/litongjava/ai/server/rapid/ocr/config/RapidOcrConfig.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.rapid.ocr.config; 2 | 3 | import com.litongjava.ai.server.rapid.ocr.controller.IndexHandler; 4 | import com.litongjava.ai.server.rapid.ocr.controller.RapidOcrHandler; 5 | import com.litongjava.ai.server.rapid.ocr.instance.EngineInstance; 6 | import com.litongjava.tio.boot.context.TioBootConfiguration; 7 | import com.litongjava.tio.boot.server.TioBootServer; 8 | import com.litongjava.tio.http.server.router.HttpReqeustSimpleHandlerRoute; 9 | 10 | public class RapidOcrConfig implements TioBootConfiguration { 11 | @Override 12 | public void config() { 13 | registerRouter(); 14 | EngineInstance.init(); 15 | } 16 | 17 | public void registerRouter() { 18 | HttpReqeustSimpleHandlerRoute r = TioBootServer.me().getHttpReqeustSimpleHandlerRoute(); 19 | // 创建handler 20 | IndexHandler indexHandler = new IndexHandler(); 21 | RapidOcrHandler ocrHandler = new RapidOcrHandler(); 22 | 23 | // 添加action 24 | r.add("/", indexHandler::index); 25 | r.add("/rapid/ocr/rec", ocrHandler::rec); 26 | r.add("/rapid/ocr/test", ocrHandler::test); 27 | 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /rapid-ocr-server/src/main/java/com/litongjava/ai/server/rapid/ocr/controller/IndexHandler.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.rapid.ocr.controller; 2 | 3 | 4 | import com.litongjava.tio.http.common.HttpRequest; 5 | import com.litongjava.tio.http.common.HttpResponse; 6 | import com.litongjava.tio.http.server.util.Resps; 7 | 8 | public class IndexHandler { 9 | 10 | public HttpResponse index(HttpRequest httpRequest) { 11 | return Resps.txt(httpRequest, "rapid-ocr-server"); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /rapid-ocr-server/src/main/java/com/litongjava/ai/server/rapid/ocr/controller/RapidOcrHandler.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.rapid.ocr.controller; 2 | 3 | import java.io.File; 4 | import java.net.URL; 5 | 6 | import com.benjaminwan.ocrlibrary.OcrResult; 7 | import com.litongjava.ai.server.rapid.ocr.instance.EngineInstance; 8 | import com.litongjava.tio.boot.http.TioControllerContext; 9 | import com.litongjava.tio.http.common.HttpRequest; 10 | import com.litongjava.tio.http.common.HttpResponse; 11 | import com.litongjava.tio.http.common.UploadFile; 12 | import com.litongjava.tio.http.server.model.HttpCors; 13 | import com.litongjava.tio.http.server.util.HttpServerResponseUtils; 14 | import com.litongjava.tio.http.server.util.Resps; 15 | import com.litongjava.tio.utils.http.HttpUtils; 16 | import com.litongjava.tio.utils.hutool.FileUtil; 17 | import com.litongjava.tio.utils.hutool.ResourceUtil; 18 | import com.litongjava.tio.utils.resp.RespVo; 19 | 20 | public class RapidOcrHandler { 21 | 22 | public HttpResponse rec(HttpRequest httprequest) throws Exception { 23 | HttpResponse httpResponse = TioControllerContext.getResponse(); 24 | HttpServerResponseUtils.enableCORS(httpResponse, new HttpCors()); 25 | 26 | UploadFile uploadFile = httprequest.getUploadFile("file"); 27 | String responseFormat = httprequest.getParam("responseFormat"); 28 | OcrResult runOcr = null; 29 | if (uploadFile != null) { 30 | byte[] fileData = uploadFile.getData(); 31 | String name = uploadFile.getName(); 32 | File file = new File(System.currentTimeMillis() + "_" + name); 33 | 34 | FileUtil.writeBytes(fileData, file); 35 | runOcr = EngineInstance.runOcr(file.getAbsolutePath()); 36 | file.delete(); 37 | 38 | if (runOcr != null) { 39 | if ("json".equals(responseFormat)) { 40 | httpResponse = Resps.json(httprequest, RespVo.ok(runOcr)); 41 | } else { 42 | httpResponse = Resps.json(httprequest, RespVo.ok(runOcr.getStrRes().trim())); 43 | } 44 | 45 | } else { 46 | httpResponse = Resps.json(httprequest, RespVo.fail()); 47 | } 48 | 49 | } else { 50 | httpResponse = Resps.json(httprequest, RespVo.fail("please upload file")); 51 | } 52 | 53 | return httpResponse; 54 | } 55 | 56 | public HttpResponse test(HttpRequest httprequest) throws Exception { 57 | URL resource = ResourceUtil.getResource("images/flight_ticket.jpg"); 58 | String fileString = resource.getFile(); 59 | File file = new File(fileString); 60 | // OcrResult ocrResult = engine.runOcr(resource.toString()); 61 | // OcrResult ocrResult = EngineInstance.runOcr(resource.toURI().getPath()); 62 | OcrResult ocrResult = EngineInstance.runOcr(file.getAbsolutePath()); 63 | return Resps.json(httprequest, ocrResult.getStrRes().trim()); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /rapid-ocr-server/src/main/java/com/litongjava/ai/server/rapid/ocr/instance/EngineInstance.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.rapid.ocr.instance; 2 | 3 | import com.benjaminwan.ocrlibrary.OcrResult; 4 | 5 | import io.github.mymonstercat.Model; 6 | import io.github.mymonstercat.ocr.InferenceEngine; 7 | import io.github.mymonstercat.ocr.config.HardwareConfig; 8 | 9 | public enum EngineInstance { 10 | INSTANCE; 11 | 12 | private static InferenceEngine engine; 13 | 14 | static { 15 | HardwareConfig onnxConfig = HardwareConfig.getOnnxConfig(); 16 | engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4, onnxConfig); 17 | } 18 | 19 | public static OcrResult runOcr(String imagePath) { 20 | return engine.runOcr(imagePath); 21 | } 22 | 23 | // just init 24 | public static void init() { 25 | 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /rapid-ocr-server/src/main/resources/app.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/rapid-ocr-server/src/main/resources/app.properties -------------------------------------------------------------------------------- /rapid-ocr-server/src/main/resources/images/flight_ticket.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/rapid-ocr-server/src/main/resources/images/flight_ticket.jpg -------------------------------------------------------------------------------- /rapid-ocr-server/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | ${CONSOLE_LOG_PATTERN} 12 | 13 | 14 | 15 | 16 | 17 | 18 | ${CONSOLE_LOG_PATTERN} 19 | 20 | 21 | 22 | ${LOG_HOME}/project-name-%d{yyyy-MM-dd}.log 23 | 24 | 180 25 | 26 | 27 | 28 | 10MB 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # ai-server 2 | 3 | ## 简介 4 | 5 | ai-server的目的是提供高性能的开箱即用的Api服务,目前提供的API服务有 6 | - paddle-ocr-server 7 | - whisper-asr-server -------------------------------------------------------------------------------- /s.yaml: -------------------------------------------------------------------------------- 1 | edition: 1.0.0 2 | name: my-framework-app 3 | access: default 4 | services: 5 | framework: 6 | component: fc 7 | props: 8 | region: cn-hangzhou 9 | service: 10 | name: service-ocr 11 | function: 12 | name: func-ocr 13 | description: Initialize 14 | runtime: custom 15 | environmentVariables: 16 | PATH: >- 17 | /opt/java11/bin:/usr/local/bin/apache-maven/bin:/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ruby/bin:/opt/bin:/code:/code/bin 18 | LD_LIBRARY_PATH: >- 19 | /code:/code/lib:/usr/local/lib:/opt/lib:/opt/php8.1/lib:/opt/php8.0/lib:/opt/php7.2/lib 20 | JAVA_HOME: /opt/java11 21 | layers: 22 | - acs:fc:cn-hangzhou:official:layers/Java11/versions/2 23 | memorySize: 128 24 | cpu: 0.05 25 | timeout: 60 26 | codeUri: ./ 27 | diskSize: 512 28 | caPort: 80 29 | customRuntimeConfig: 30 | command: 31 | - java 32 | args: 33 | - '-jar' 34 | - paddle-ocr/paddle-ocr-server/target/paddle-ocr-server-1.0.1.jar 35 | triggers: 36 | - name: httpTrigger 37 | type: http 38 | config: 39 | authType: anonymous 40 | methods: 41 | - GET 42 | - POST 43 | - PUT 44 | - DELETE 45 | - HEAD 46 | - PATCH -------------------------------------------------------------------------------- /whisper-asr/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | com.litongjava 5 | ai-server 6 | 1.0.0 7 | 8 | whisper-asr 9 | pom 10 | 11 | whisper-asr-server 12 | whisper-asr-service 13 | 14 | -------------------------------------------------------------------------------- /whisper-asr/readme.md: -------------------------------------------------------------------------------- 1 | ## whipser-asr-server 2 | ### 在线文档 3 | https://apifox.com/apidoc/shared-98cc5675-f1a3-4250-a940-cfe060854ef4/api-121475073 4 | 5 | ### 接口 6 | #### 测试接口 7 | 测试接口无需上传文件,使用内部自带文件进行识别 8 | 访问地址:http://localhost/whispser/asr/test 9 | 返回数据 10 | ``` 11 | { 12 | "data": [ 13 | { 14 | "end": 1088, 15 | "sentence": " And so, my fellow Americans, ask not what your country can do for you, ask what you can do for your country.", 16 | "start": 0 17 | } 18 | ], 19 | "ok": true 20 | } 21 | ``` 22 | #### 识别接口 23 | 识别接口需要上传文件音频文件 24 | 访问地址:http://localhost/whispser/asr/rec 25 | ##### POST /whispser/asr/test 26 | 27 | > Body Parameters 28 | 29 | ```yaml 30 | file: filedata 31 | inputType: wav 32 | outputType: default 33 | 34 | ``` 35 | 36 | ###### Params 37 | 38 | |Name|Location|Type|Required|Description| 39 | |---|---|---|---|---| 40 | |body|body|object| no |none| 41 | |» file|body|string(binary)| yes |上传的音频文件| 42 | |» inputType|body|string| no |上传的音频格式wav和mp3| 43 | |» outputType|body|string| no |返回的文本格式,支持default,irc,vtt,srt| 44 | |» outputFormat|body|string| no |返回的数据格式,输出的格式,默认是json,如果需要字幕数据输出txt| 45 | 46 | > Response Examples 47 | 48 | > 成功 49 | 50 | ```json 51 | { 52 | "data": [ 53 | { 54 | "end": 800, 55 | "sentence": " And so my fellow Americans, ask not what your country can do for you.", 56 | "start": 0 57 | }, 58 | { 59 | "end": 1100, 60 | "sentence": " Ask what you can do for your country.", 61 | "start": 800 62 | } 63 | ], 64 | "ok": true 65 | } 66 | ``` 67 | ### required 68 | /usr/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.21' 69 | ### build 70 | ``` 71 | # Set java version 72 | export JAVA_HOME=/usr/java/jdk-11.0.8 73 | export PATH=$JAVA_HOME/bin:$PATH 74 | 75 | #build jar 76 | mvn clean install -DskipTests -Dgpg.skip -Pdevelopment 77 | mvn clean package -DskipTests -Dgpg.skip -Pproduction -pl whisper-asr-server 78 | ``` 79 | ### run 80 | ``` 81 | java -jar whisper-asr-server/target/whisper-asr-server-1.0.1.jar 82 | ``` 83 | The default loaded model is `/root/.cache/whisper/ggml-base.en.bin` 84 | downlaod model form huggingface https://huggingface.co/ggerganov/whisper.cpp 85 | 86 | 87 | specify the model name 88 | ``` 89 | java -jar whisper-asr-server/target/whisper-asr-server-1.0.1.jar --model.name=ggml-large.bin 90 | ``` 91 | 92 | ### convert file with ffmpeg 93 | Note that the server currently runs only with 16-bit audio files, so make sure to convert your input before running the tool. 94 | For example, you can use `ffmpeg` like this: 95 | 96 | ```java 97 | ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav 98 | ``` 99 | 100 | ``` 101 | ffmpeg -i input.mp4 -ar 16000 -ac 1 -c:a pcm_s16le output.wav 102 | ``` 103 | 104 | ### 构建Docker镜像 105 | ### 封装镜像 106 | 107 | build 108 | 109 | ``` 110 | docker build -f docker/1.0.1 -t litongjava/whisper-asr-server:1.0.1 . 111 | ``` 112 | 113 | run 114 | 115 | ``` 116 | docker run -dit -p 8080:80 litongjava/whisper-asr-server:1.0.1 117 | ``` 118 | 119 | test 120 | ``` 121 | curl -v http://localhost:8080/whispser/asr/test 122 | ``` -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/docker/1.0.0-base.en: -------------------------------------------------------------------------------- 1 | FROM openjdk:11-jdk-slim 2 | 3 | WORKDIR /app 4 | 5 | COPY ./target/whisper-asr-server-1.0.0.jar /app/ 6 | 7 | COPY models/ggml-base.en.bin /root/.cache/whisper/ggml-base.en.bin 8 | 9 | CMD ["java", "-jar", "whisper-asr-server-1.0.0.jar"] 10 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/docker/1.0.0-large: -------------------------------------------------------------------------------- 1 | # Use litongjava/jdk:8u211 as the base image 2 | FROM litongjava/jdk:8u211 3 | 4 | # Set the working directory in the container 5 | WORKDIR /app 6 | 7 | # Copy the jar file into the container 8 | COPY ./target/whisper-asr-server-1.0.1.jar /app/ 9 | COPY models/ggml-large.bin /root/.cache/whisper/ggml-large.bin 10 | 11 | # Command to run the jar file 12 | CMD ["/usr/java/jdk1.8.0_211/bin/java", "-jar", "whisper-asr-server-1.0.1.jar","--model.name=ggml-large.bin"] -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/docker/1.0.1: -------------------------------------------------------------------------------- 1 | #FROM litongjava/jdk:8u211 2 | FROM openjdk:8-jdk-slim 3 | 4 | # Set the working directory in the container 5 | WORKDIR /app 6 | 7 | 8 | 9 | # Copy the jar file into the container 10 | COPY ./target/whisper-asr-server-1.0.1.jar /app/ 11 | COPY models/ggml-base.en.bin /root/.cache/whisper/ggml-base.en.bin 12 | COPY models/ggml-large.bin /root/.cache/whisper/ggml-large.bin 13 | 14 | 15 | # Command to run the jar file 16 | CMD ["java", "-jar", "whisper-asr-server-1.0.1.jar"] -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/docker/readme.md: -------------------------------------------------------------------------------- 1 | ``` 2 | docker build -t litongjava/whisper-asr-server:1.0.1 -f docker/1.0.1 . 3 | ``` 4 | 5 | run 6 | ``` 7 | docker run -p 80:80 --rm litongjava/whisper-asr-server:1.0.1 8 | ``` 9 | or 10 | ``` 11 | docker run -p 80:80 --rm litongjava/whisper-asr-server:1.0.1 java -jar whisper-asr-server-1.0.1.jar 12 | ``` -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/models/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/whisper-asr/whisper-asr-server/models/readme.md -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | com.litongjava 5 | whisper-asr 6 | 1.0.0 7 | 8 | whisper-asr-server 9 | 1.0.1 10 | 11 | UTF-8 12 | 1.8 13 | ${java.version} 14 | ${java.version} 15 | 23.1.1 16 | 1.3.3 17 | 1.18.30 18 | 1.1.9 19 | ocr-server 20 | com.litongjava.aio.server.tio.WhisperAsrServer 21 | 22 | 23 | 24 | com.litongjava 25 | whisper-asr-service 26 | 1.0.0 27 | 28 | 29 | com.litongjava 30 | tio-boot 31 | ${tio.boot.version} 32 | 33 | 34 | org.projectlombok 35 | lombok 36 | ${lombok-version} 37 | true 38 | provided 39 | 40 | 41 | 42 | 43 | 44 | development 45 | 46 | true 47 | 48 | 49 | 50 | ch.qos.logback 51 | logback-classic 52 | 1.2.3 53 | 54 | 55 | 56 | 57 | 58 | 59 | production 60 | 61 | 62 | ch.qos.logback 63 | logback-classic 64 | 1.2.3 65 | 66 | 67 | 68 | 69 | 70 | org.apache.maven.plugins 71 | maven-jar-plugin 72 | 3.2.0 73 | 74 | 75 | org.apache.maven.plugins 76 | maven-assembly-plugin 77 | 3.1.1 78 | 79 | 80 | 81 | ${main.class} 82 | 83 | 84 | 85 | jar-with-dependencies 86 | 87 | false 88 | 89 | 90 | 91 | make-assembly 92 | package 93 | 94 | single 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | native 104 | 105 | 106 | 107 | org.slf4j 108 | slf4j-jdk14 109 | 1.7.31 110 | 111 | 112 | 113 | org.graalvm.sdk 114 | graal-sdk 115 | ${graalvm.version} 116 | provided 117 | 118 | 119 | 120 | ${final.name} 121 | 122 | 123 | org.graalvm.nativeimage 124 | native-image-maven-plugin 125 | 21.2.0 126 | 127 | 128 | 129 | native-image 130 | 131 | package 132 | 133 | 134 | 135 | false 136 | ${project.build.finalName} 137 | ${main.class} 138 | 139 | -H:+RemoveSaturatedTypeFlows 140 | --allow-incomplete-classpath 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/WhisperAsrServer.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.aio.server.tio; 2 | 3 | import com.litongjava.jfinal.aop.annotation.AComponentScan; 4 | import com.litongjava.tio.boot.TioApplication; 5 | 6 | @AComponentScan 7 | public class WhisperAsrServer { 8 | 9 | public static void main(String[] args) throws Exception { 10 | long start = System.currentTimeMillis(); 11 | // 初始化服务器并启动服务器 12 | TioApplication.run(WhisperAsrServer.class, args); 13 | long end = System.currentTimeMillis(); 14 | System.out.println("started:" + (end - start) + "(ms)"); 15 | } 16 | } -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/config/WhisperAsrConfig.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.aio.server.tio.config; 2 | 3 | import com.litongjava.ai.server.property.WhiserAsrProperties; 4 | import com.litongjava.jfinal.aop.annotation.ABean; 5 | import com.litongjava.jfinal.aop.annotation.AConfiguration; 6 | import com.litongjava.tio.utils.environment.EnvironmentUtils; 7 | 8 | import lombok.extern.slf4j.Slf4j; 9 | 10 | @AConfiguration 11 | @Slf4j 12 | public class WhisperAsrConfig { 13 | 14 | @ABean 15 | public WhiserAsrProperties aiServiceProperties() { 16 | WhiserAsrProperties aiServiceProperties = new WhiserAsrProperties(); 17 | String modelName = EnvironmentUtils.get("model.name"); 18 | if (modelName != null) { 19 | log.info("modelName:{}", modelName); 20 | aiServiceProperties.setModelName(modelName); 21 | } 22 | 23 | return aiServiceProperties; 24 | 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/controller/EnviormentController.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.aio.server.tio.controller; 2 | 3 | import com.litongjava.jfinal.aop.Aop; 4 | import com.litongjava.tio.http.common.HttpRequest; 5 | import com.litongjava.tio.http.common.HttpResponse; 6 | import com.litongjava.tio.http.server.annotation.EnableCORS; 7 | import com.litongjava.tio.http.server.annotation.RequestPath; 8 | import com.litongjava.tio.http.server.util.Resps; 9 | import com.litongjava.tio.utils.environment.EnvironmentUtils; 10 | import com.litongjava.tio.utils.resp.Resp; 11 | 12 | @EnableCORS 13 | @RequestPath("/env") 14 | public class EnviormentController { 15 | @RequestPath("/{key}") 16 | public HttpResponse get(String key, HttpRequest request) { 17 | return Resps.json(request, Resp.ok(EnvironmentUtils.get(key))); 18 | } 19 | 20 | @RequestPath("/beans") 21 | public HttpResponse beans(HttpRequest request) { 22 | String[] beans = Aop.beans(); 23 | return Resps.json(request, Resp.ok(beans)); 24 | 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/controller/IndexController.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.aio.server.tio.controller; 2 | 3 | import com.litongjava.tio.http.server.annotation.EnableCORS; 4 | import com.litongjava.tio.http.server.annotation.RequestPath; 5 | 6 | @EnableCORS 7 | @RequestPath(value = "/") 8 | public class IndexController { 9 | @RequestPath() 10 | public String respText() { 11 | return "whisper-asr-server"; 12 | } 13 | } -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/controller/SystemController.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.aio.server.tio.controller; 2 | 3 | import java.util.Properties; 4 | 5 | import com.litongjava.tio.http.common.HttpRequest; 6 | import com.litongjava.tio.http.common.HttpResponse; 7 | import com.litongjava.tio.http.server.annotation.EnableCORS; 8 | import com.litongjava.tio.http.server.annotation.RequestPath; 9 | import com.litongjava.tio.http.server.util.Resps; 10 | import com.litongjava.tio.utils.resp.Resp; 11 | 12 | import cn.hutool.core.util.SystemPropsUtil; 13 | import cn.hutool.system.RuntimeInfo; 14 | import cn.hutool.system.SystemUtil; 15 | 16 | @EnableCORS 17 | @RequestPath("/system") 18 | public class SystemController { 19 | 20 | @RequestPath("/props") 21 | public HttpResponse props(HttpRequest request) { 22 | Properties props = SystemPropsUtil.getProps(); 23 | return Resps.json(request, Resp.ok(props)); 24 | } 25 | 26 | @RequestPath("/runtimeInfo") 27 | public HttpResponse runtimeInfo(HttpRequest request) { 28 | RuntimeInfo runtimeInfo = SystemUtil.getRuntimeInfo(); 29 | return Resps.json(request, Resp.ok(runtimeInfo)); 30 | } 31 | 32 | @RequestPath("/availableProcessors") 33 | public HttpResponse availableProcessors(HttpRequest request) { 34 | int availableProcessors = Runtime.getRuntime().availableProcessors(); 35 | return Resps.json(request, Resp.ok(availableProcessors)); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/controller/WhisperAsrController.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.aio.server.tio.controller; 2 | 3 | import java.net.URL; 4 | import java.util.List; 5 | 6 | import com.litongjava.ai.server.model.WhisperSegment; 7 | import com.litongjava.ai.server.service.WhisperCppBaseService; 8 | import com.litongjava.ai.server.service.WhisperCppLargeService; 9 | import com.litongjava.ai.server.service.WhisperCppService; 10 | import com.litongjava.jfinal.aop.Aop; 11 | import com.litongjava.tio.http.common.HttpRequest; 12 | import com.litongjava.tio.http.common.HttpResponse; 13 | import com.litongjava.tio.http.common.UploadFile; 14 | import com.litongjava.tio.http.server.annotation.EnableCORS; 15 | import com.litongjava.tio.http.server.annotation.RequestPath; 16 | import com.litongjava.tio.http.server.util.Resps; 17 | import com.litongjava.tio.utils.resp.Resp; 18 | 19 | import cn.hutool.core.util.ClassUtil; 20 | import io.github.givimad.whisperjni.WhisperFullParams; 21 | 22 | @EnableCORS 23 | @RequestPath("/whispser/asr") 24 | public class WhisperAsrController { 25 | private WhisperCppService whisperCppService = Aop.get(WhisperCppService.class); 26 | private WhisperCppBaseService whisperCppBaseService = Aop.get(WhisperCppBaseService.class); 27 | private WhisperCppLargeService whisperCppLargeService = Aop.get(WhisperCppLargeService.class); 28 | 29 | @RequestPath(value = "/rec") 30 | public HttpResponse index(HttpRequest request, UploadFile file, String inputType, String outputType, 31 | String outputFormat, WhisperFullParams params) throws Exception { 32 | if (file != null) { 33 | Object data = whisperCppService.index(file.getData(), inputType, outputType, params); 34 | if ("txt".equals(outputFormat)) { 35 | if (data instanceof String) { 36 | return Resps.txt(request, (String) data); 37 | } 38 | 39 | } else { 40 | return Resps.json(request, Resp.ok(data)); 41 | } 42 | 43 | } else { 44 | return Resps.json(request, Resp.fail("uplod file can't be null")); 45 | } 46 | return Resps.json(request, Resp.fail("unknow error")); 47 | } 48 | 49 | @RequestPath(value = "/rec/base") 50 | public HttpResponse recBase(HttpRequest request, UploadFile file, String inputType, String outputType, 51 | String outputFormat, WhisperFullParams params) throws Exception { 52 | if (file != null) { 53 | Object data = whisperCppBaseService.index(file.getData(), inputType, outputType, params); 54 | if ("txt".equals(outputFormat)) { 55 | if (data instanceof String) { 56 | return Resps.txt(request, (String) data); 57 | } 58 | 59 | } else { 60 | return Resps.json(request, Resp.ok(data)); 61 | } 62 | 63 | } else { 64 | return Resps.json(request, Resp.fail("uplod file can't be null")); 65 | } 66 | return Resps.json(request, Resp.fail("unknow error")); 67 | } 68 | 69 | @RequestPath(value = "/rec/large") 70 | public HttpResponse recLarge(UploadFile file, String inputType, String outputType, String outputFormat, 71 | HttpRequest request, WhisperFullParams params) throws Exception { 72 | if (file != null) { 73 | Object data = whisperCppLargeService.index(file.getData(), inputType, outputType, params); 74 | if ("txt".equals(outputFormat)) { 75 | if (data instanceof String) { 76 | return Resps.txt(request, (String) data); 77 | } 78 | 79 | } else { 80 | return Resps.json(request, Resp.ok(data)); 81 | } 82 | 83 | } else { 84 | return Resps.json(request, Resp.fail("uplod file can't be null")); 85 | } 86 | return Resps.json(request, Resp.fail("unknow error")); 87 | } 88 | 89 | @RequestPath("/test") 90 | public HttpResponse test(HttpRequest request, WhisperFullParams params) { 91 | // String urlStr = "https://raw.githubusercontent.com/litongjava/whisper.cpp/master/samples/jfk.wav"; 92 | URL resource = ClassUtil.getClassLoader().getResource("audios/jfk.wav"); 93 | if (resource != null) { 94 | List list = whisperCppService.index(resource, params); 95 | return Resps.json(request, Resp.ok(list)); 96 | } 97 | return null; 98 | } 99 | 100 | @RequestPath("/test/base") 101 | public HttpResponse testBase(HttpRequest request, WhisperFullParams params) { 102 | // String urlStr = "https://raw.githubusercontent.com/litongjava/whisper.cpp/master/samples/jfk.wav"; 103 | URL resource = ClassUtil.getClassLoader().getResource("audios/jfk.wav"); 104 | if (resource != null) { 105 | List list = whisperCppBaseService.index(resource, params); 106 | return Resps.json(request, Resp.ok(list)); 107 | } 108 | return null; 109 | } 110 | 111 | @RequestPath("/test/large") 112 | public HttpResponse testLarge(HttpRequest request, WhisperFullParams params) { 113 | // String urlStr = "https://raw.githubusercontent.com/litongjava/whisper.cpp/master/samples/jfk.wav"; 114 | URL resource = ClassUtil.getClassLoader().getResource("audios/jfk.wav"); 115 | if (resource != null) { 116 | List list = whisperCppLargeService.index(resource, params); 117 | return Resps.json(request, Resp.ok(list)); 118 | } 119 | return null; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-server/src/main/resources/app.properties: -------------------------------------------------------------------------------- 1 | # 设置最大请求大小(包含所有文件)单位 字节,这里设置为1G 2 | http.multipart.max-request-size=73741824 3 | # 设置最大文件大小,单位字节,这里设置为1G 4 | http.multipart.max-file-size=73741824 5 | 6 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | com.litongjava 5 | whisper-asr 6 | 1.0.0 7 | 8 | whisper-asr-service 9 | 10 | UTF-8 11 | 1.8 12 | ${java.version} 13 | ${java.version} 14 | 1.18.30 15 | 1.2.3 16 | 5.8.11 17 | 1.5.1-java8 18 | 1.9.5.4 19 | 1.1.8 20 | 21 | 22 | 23 | ch.qos.logback 24 | logback-classic 25 | ${logback.version} 26 | 27 | 28 | org.projectlombok 29 | lombok 30 | ${lombok.version} 31 | provided 32 | 33 | 34 | 35 | cn.hutool 36 | hutool-all 37 | ${hutool.version} 38 | 39 | 40 | 41 | 42 | io.github.givimad 43 | whisper-jni 44 | ${whisper.jni.version} 45 | 46 | 47 | 48 | com.googlecode.soundlibs 49 | mp3spi 50 | ${mp3spi.version} 51 | 52 | 53 | 54 | com.litongjava 55 | jfinal-aop 56 | ${jfinal.aop.version} 57 | 58 | 59 | 60 | junit 61 | junit 62 | 4.12 63 | test 64 | 65 | 66 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/enumeration/AudioType.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.enumeration; 2 | 3 | import cn.hutool.core.util.StrUtil; 4 | 5 | public enum AudioType { 6 | WAV("wav"), MP3("mp3"); 7 | 8 | private final String type; 9 | 10 | AudioType(String type) { 11 | this.type = type; 12 | } 13 | 14 | public String getType() { 15 | return type; 16 | } 17 | 18 | public static AudioType fromString(String text) { 19 | if (StrUtil.isEmptyIfStr(text)) { 20 | return AudioType.WAV; 21 | } 22 | 23 | for (AudioType audioType : AudioType.values()) { 24 | if (audioType.type.equalsIgnoreCase(text)) { 25 | return audioType; 26 | } 27 | } 28 | throw new IllegalArgumentException("No enum constant for text " + text); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/enumeration/TextType.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.enumeration; 2 | 3 | import cn.hutool.core.util.StrUtil; 4 | 5 | public enum TextType { 6 | DEFAULT("default"), LRC("lrc"), VTT("vtt"), SRT("srt"); 7 | 8 | private final String type; 9 | 10 | TextType(String type) { 11 | this.type = type; 12 | } 13 | 14 | public String getType() { 15 | return type; 16 | } 17 | 18 | // 根据字符串值查找相应的枚举项 19 | public static TextType fromString(String text) { 20 | if (StrUtil.isEmptyIfStr(text)) { 21 | return TextType.DEFAULT; 22 | } 23 | for (TextType audioType : TextType.values()) { 24 | if (audioType.type.equalsIgnoreCase(text)) { 25 | return audioType; 26 | } 27 | } 28 | throw new IllegalArgumentException("No enum constant for text " + text); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/model/WhisperSegment.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.model; 2 | 3 | /** 4 | * Created by litonglinux@qq.com on 10/21/2023_7:48 AM 5 | */ 6 | public class WhisperSegment { 7 | private long start, end; 8 | private String sentence; 9 | 10 | public WhisperSegment() { 11 | } 12 | 13 | public WhisperSegment(long start, long end, String sentence) { 14 | this.start = start; 15 | this.end = end; 16 | this.sentence = sentence; 17 | } 18 | 19 | public long getStart() { 20 | return start; 21 | } 22 | 23 | public long getEnd() { 24 | return end; 25 | } 26 | 27 | public String getSentence() { 28 | return sentence; 29 | } 30 | 31 | public void setStart(long start) { 32 | this.start = start; 33 | } 34 | 35 | public void setEnd(long end) { 36 | this.end = end; 37 | } 38 | 39 | public void setSentence(String sentence) { 40 | this.sentence = sentence; 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return "[" + start + " --> " + end + "]:" + sentence; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/property/WhiserAsrProperties.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.property; 2 | 3 | import lombok.Data; 4 | import lombok.NoArgsConstructor; 5 | 6 | @Data 7 | @NoArgsConstructor 8 | public class WhiserAsrProperties { 9 | private String modelName = "ggml-base.en.bin"; 10 | } 11 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/service/TextService.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.service; 2 | 3 | import java.io.IOException; 4 | import java.util.List; 5 | 6 | import com.litongjava.ai.server.model.WhisperSegment; 7 | 8 | public class TextService { 9 | public StringBuffer generateSrt(List segments) throws IOException { 10 | StringBuffer stringBuffer = new StringBuffer(); 11 | int index = 1; 12 | for (WhisperSegment segment : segments) { 13 | String startTime = convertToSRTTime(segment.getStart() * 10); 14 | String endTime = convertToSRTTime(segment.getEnd() * 10); 15 | stringBuffer.append(index + "\n"); 16 | stringBuffer.append(startTime + " --> " + endTime + "\n"); 17 | stringBuffer.append(segment.getSentence() + "\n\n"); 18 | index++; 19 | } 20 | return stringBuffer; 21 | } 22 | 23 | public String convertToSRTTime(long milliseconds) { 24 | int hours = (int) (milliseconds / (1000 * 60 * 60)); 25 | int minutes = (int) ((milliseconds % (1000 * 60 * 60)) / (1000 * 60)); 26 | int seconds = (int) ((milliseconds % (1000 * 60)) / 1000); 27 | int millis = (int) (milliseconds % 1000); 28 | return String.format("%02d:%02d:%02d,%03d", hours, minutes, seconds, millis); 29 | } 30 | 31 | public StringBuffer generateVtt(List segments) throws IOException { 32 | StringBuffer stringBuffer = new StringBuffer(); 33 | 34 | // Write the WebVTT header 35 | stringBuffer.append("WEBVTT\n\n"); 36 | 37 | int counter = 1; 38 | for (WhisperSegment segment : segments) { 39 | // Convert the start and end times from milliseconds to hh:mm:ss.sss format 40 | String startTime = millisecondsToVttTime(segment.getStart() * 10); 41 | String endTime = millisecondsToVttTime(segment.getEnd() * 10); 42 | 43 | // Write the timestamp and the sentence to the file 44 | stringBuffer.append(counter++ + "\n"); 45 | stringBuffer.append(startTime + " --> " + endTime + "\n"); 46 | stringBuffer.append(segment.getSentence() + "\n\n"); 47 | } 48 | return stringBuffer; 49 | } 50 | 51 | private String millisecondsToVttTime(long milliseconds) { 52 | long hours = milliseconds / 3600000; 53 | long minutes = (milliseconds % 3600000) / 60000; 54 | long seconds = (milliseconds % 60000) / 1000; 55 | long millis = milliseconds % 1000; 56 | return String.format("%02d:%02d:%02d.%03d", hours, minutes, seconds, millis); 57 | } 58 | 59 | public StringBuffer generateLrc(List segments) throws IOException { 60 | StringBuffer stringBuffer = new StringBuffer(); 61 | for (WhisperSegment segment : segments) { 62 | String timestamp = millisecondsToLrcTime(segment.getStart() * 10); 63 | stringBuffer.append("[" + timestamp + "]" + segment.getSentence() + "\n"); 64 | } 65 | return stringBuffer; 66 | } 67 | 68 | private String millisecondsToLrcTime(long milliseconds) { 69 | long totalSeconds = milliseconds / 1000; 70 | long minutes = totalSeconds / 60; 71 | long seconds = totalSeconds % 60; 72 | long millis = milliseconds % 1000; 73 | return String.format("%02d:%02d.%02d", minutes, seconds, millis / 10); 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/service/WhisperCppBaseService.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.service; 2 | 3 | import java.io.IOException; 4 | import java.net.URL; 5 | import java.util.List; 6 | 7 | import javax.sound.sampled.UnsupportedAudioFileException; 8 | 9 | import com.litongjava.ai.server.enumeration.AudioType; 10 | import com.litongjava.ai.server.enumeration.TextType; 11 | import com.litongjava.ai.server.model.WhisperSegment; 12 | import com.litongjava.ai.server.single.LocalBaseWhisper; 13 | import com.litongjava.ai.server.utils.Mp3Util; 14 | import com.litongjava.ai.server.utils.WhisperAudioUtils; 15 | import com.litongjava.jfinal.aop.Aop; 16 | 17 | import io.github.givimad.whisperjni.WhisperFullParams; 18 | import lombok.extern.slf4j.Slf4j; 19 | 20 | /** 21 | * 使用Base模型 22 | * @author Tong Li 23 | * 24 | */ 25 | @Slf4j 26 | public class WhisperCppBaseService { 27 | private TextService textService = Aop.get(TextService.class); 28 | 29 | public List index(URL url, WhisperFullParams params) { 30 | 31 | try { 32 | float[] floats = WhisperAudioUtils.toAudioData(url); 33 | log.info("floats size:{}", floats.length); 34 | 35 | List segments = LocalBaseWhisper.INSTANCE.fullTranscribeWithTime(floats, floats.length, params); 36 | log.info("size:{}", segments.size()); 37 | return segments; 38 | } catch (UnsupportedAudioFileException | IOException e) { 39 | e.printStackTrace(); 40 | } 41 | 42 | return null; 43 | 44 | } 45 | 46 | public List index(byte[] data, WhisperFullParams params) { 47 | float[] floats = WhisperAudioUtils.toFloat(data); 48 | return LocalBaseWhisper.INSTANCE.fullTranscribeWithTime(floats, params); 49 | } 50 | 51 | public StringBuffer outputSrt(URL url, WhisperFullParams params) throws IOException { 52 | List segments = this.index(url, params); 53 | return textService.generateSrt(segments); 54 | } 55 | 56 | public StringBuffer outputVtt(URL url, WhisperFullParams params) throws IOException { 57 | List segments = this.index(url, params); 58 | return textService.generateVtt(segments); 59 | } 60 | 61 | public StringBuffer outputLrc(URL url, WhisperFullParams params) throws IOException { 62 | List segments = this.index(url, params); 63 | return textService.generateLrc(segments); 64 | } 65 | 66 | public Object index(byte[] data, String inputType, String outputType) 67 | throws IOException, UnsupportedAudioFileException { 68 | return index(data, inputType, outputType, null); 69 | } 70 | 71 | public Object index(byte[] data, String inputType, String outputType, WhisperFullParams params) 72 | throws IOException, UnsupportedAudioFileException { 73 | log.info("intputType:{},outputType:{}", inputType, outputType); 74 | AudioType audioType = AudioType.fromString(inputType); 75 | TextType textType = TextType.fromString(outputType); 76 | if (audioType == AudioType.MP3) { 77 | // 进行格式转换 78 | log.info("进行格式转换:{}", "mp3"); 79 | data = Aop.get(Mp3Util.class).convertToWav(data, 16000, 1); 80 | } 81 | List segments = index(data, params); 82 | if (textType == TextType.SRT) { 83 | return textService.generateSrt(segments).toString(); 84 | } else if (textType == TextType.VTT) { 85 | return textService.generateVtt(segments).toString(); 86 | } else if (textType == TextType.LRC) { 87 | return textService.generateLrc(segments).toString(); 88 | } 89 | return segments; 90 | } 91 | } -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/service/WhisperCppJni.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.service; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import com.litongjava.ai.server.model.WhisperSegment; 9 | 10 | import io.github.givimad.whisperjni.WhisperContext; 11 | import io.github.givimad.whisperjni.WhisperFullParams; 12 | import io.github.givimad.whisperjni.WhisperJNI; 13 | 14 | public class WhisperCppJni { 15 | 16 | private WhisperJNI whisper = null; 17 | private WhisperContext ctx = null; 18 | 19 | public void initContext(Path path) throws IOException { 20 | whisper = new WhisperJNI(); 21 | ctx = whisper.init(path); 22 | } 23 | 24 | public List fullTranscribeWithTime(WhisperFullParams params, float[] samples, int numSamples) { 25 | int result = whisper.full(ctx, params, samples, numSamples); 26 | if (result != 0) { 27 | throw new RuntimeException("Transcription failed with code " + result); 28 | } 29 | int numSegments = whisper.fullNSegments(ctx); 30 | ArrayList segments = new ArrayList(numSegments); 31 | 32 | for (int i = 0; i < numSegments; i++) { 33 | String text = whisper.fullGetSegmentText(ctx, i); 34 | long start = whisper.fullGetSegmentTimestamp0(ctx, i); 35 | long end = whisper.fullGetSegmentTimestamp1(ctx, i); 36 | segments.add(new WhisperSegment(start, end, text)); 37 | } 38 | return segments; 39 | } 40 | 41 | public void close() { 42 | ctx.close(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/service/WhisperCppLargeService.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.service; 2 | 3 | import java.io.IOException; 4 | import java.net.URL; 5 | import java.util.List; 6 | 7 | import javax.sound.sampled.UnsupportedAudioFileException; 8 | 9 | import com.litongjava.ai.server.enumeration.AudioType; 10 | import com.litongjava.ai.server.enumeration.TextType; 11 | import com.litongjava.ai.server.model.WhisperSegment; 12 | import com.litongjava.ai.server.single.LocalLargeWhisper; 13 | import com.litongjava.ai.server.utils.Mp3Util; 14 | import com.litongjava.ai.server.utils.WhisperAudioUtils; 15 | import com.litongjava.jfinal.aop.Aop; 16 | 17 | import io.github.givimad.whisperjni.WhisperFullParams; 18 | import lombok.extern.slf4j.Slf4j; 19 | 20 | /** 21 | * 使用Base模型 22 | * @author Tong Li 23 | * 24 | */ 25 | @Slf4j 26 | public class WhisperCppLargeService { 27 | private TextService textService = Aop.get(TextService.class); 28 | 29 | public List index(URL url,WhisperFullParams params) { 30 | 31 | try { 32 | float[] floats = WhisperAudioUtils.toAudioData(url); 33 | log.info("floats size:{}", floats.length); 34 | 35 | List segments = LocalLargeWhisper.INSTANCE.fullTranscribeWithTime(floats, floats.length,params); 36 | log.info("size:{}", segments.size()); 37 | return segments; 38 | } catch (UnsupportedAudioFileException | IOException e) { 39 | e.printStackTrace(); 40 | } 41 | 42 | return null; 43 | 44 | } 45 | 46 | public List index(byte[] data, WhisperFullParams params) { 47 | float[] floats = WhisperAudioUtils.toFloat(data); 48 | return LocalLargeWhisper.INSTANCE.fullTranscribeWithTime(floats, params); 49 | } 50 | 51 | public StringBuffer outputSrt(URL url,WhisperFullParams params) throws IOException { 52 | List segments = this.index(url,params); 53 | return textService.generateSrt(segments); 54 | } 55 | 56 | public StringBuffer outputVtt(URL url,WhisperFullParams params) throws IOException { 57 | List segments = this.index(url,params); 58 | return textService.generateVtt(segments); 59 | } 60 | 61 | public StringBuffer outputLrc(URL url,WhisperFullParams params) throws IOException { 62 | List segments = this.index(url,params); 63 | return textService.generateLrc(segments); 64 | } 65 | 66 | public Object index(byte[] data, String inputType, String outputType) 67 | throws IOException, UnsupportedAudioFileException { 68 | return index(data, inputType, outputType, null); 69 | } 70 | 71 | public Object index(byte[] data, String inputType, String outputType, WhisperFullParams params) 72 | throws IOException, UnsupportedAudioFileException { 73 | log.info("intputType:{},outputType:{}", inputType, outputType); 74 | AudioType audioType = AudioType.fromString(inputType); 75 | TextType textType = TextType.fromString(outputType); 76 | if (audioType == AudioType.MP3) { 77 | // 进行格式转换 78 | log.info("进行格式转换:{}", "mp3"); 79 | data = Aop.get(Mp3Util.class).convertToWav(data, 16000, 1); 80 | } 81 | List segments = index(data, params); 82 | if (textType == TextType.SRT) { 83 | return textService.generateSrt(segments).toString(); 84 | } else if (textType == TextType.VTT) { 85 | return textService.generateVtt(segments).toString(); 86 | } else if (textType == TextType.LRC) { 87 | return textService.generateLrc(segments).toString(); 88 | } 89 | return segments; 90 | } 91 | } -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/service/WhisperCppService.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.service; 2 | 3 | import java.io.IOException; 4 | import java.net.URL; 5 | import java.util.List; 6 | 7 | import javax.sound.sampled.UnsupportedAudioFileException; 8 | 9 | import com.litongjava.ai.server.enumeration.AudioType; 10 | import com.litongjava.ai.server.enumeration.TextType; 11 | import com.litongjava.ai.server.model.WhisperSegment; 12 | import com.litongjava.ai.server.single.LocalLargeWhisper; 13 | import com.litongjava.ai.server.single.LocalWhisper; 14 | import com.litongjava.ai.server.utils.Mp3Util; 15 | import com.litongjava.ai.server.utils.WhisperAudioUtils; 16 | import com.litongjava.jfinal.aop.Aop; 17 | 18 | import io.github.givimad.whisperjni.WhisperFullParams; 19 | import lombok.extern.slf4j.Slf4j; 20 | 21 | /** 22 | * 加载自定义配置的模型 23 | * @author Tong Li 24 | * 25 | */ 26 | @Slf4j 27 | public class WhisperCppService { 28 | private TextService textService = Aop.get(TextService.class); 29 | 30 | public List index(URL url,WhisperFullParams params) { 31 | 32 | try { 33 | float[] floats = WhisperAudioUtils.toAudioData(url); 34 | log.info("floats size:{}", floats.length); 35 | 36 | List segments = LocalWhisper.INSTANCE.fullTranscribeWithTime(floats, floats.length, params); 37 | log.info("size:{}", segments.size()); 38 | return segments; 39 | } catch (UnsupportedAudioFileException | IOException e) { 40 | e.printStackTrace(); 41 | } 42 | 43 | return null; 44 | 45 | } 46 | 47 | public List index(byte[] data, WhisperFullParams params) { 48 | float[] floats = WhisperAudioUtils.toFloat(data); 49 | return LocalLargeWhisper.INSTANCE.fullTranscribeWithTime(floats, params); 50 | } 51 | 52 | public StringBuffer outputSrt(URL url,WhisperFullParams params) throws IOException { 53 | List segments = this.index(url,params); 54 | return textService.generateSrt(segments); 55 | } 56 | 57 | public StringBuffer outputVtt(URL url,WhisperFullParams params) throws IOException { 58 | List segments = this.index(url,params); 59 | return textService.generateVtt(segments); 60 | } 61 | 62 | public StringBuffer outputLrc(URL url,WhisperFullParams params) throws IOException { 63 | List segments = this.index(url,params); 64 | return textService.generateLrc(segments); 65 | } 66 | 67 | public Object index(byte[] data, String inputType, String outputType) 68 | throws IOException, UnsupportedAudioFileException { 69 | return index(data, inputType, outputType, null); 70 | } 71 | 72 | public Object index(byte[] data, String inputType, String outputType, WhisperFullParams params) 73 | throws IOException, UnsupportedAudioFileException { 74 | log.info("intputType:{},outputType:{}", inputType, outputType); 75 | AudioType audioType = AudioType.fromString(inputType); 76 | TextType textType = TextType.fromString(outputType); 77 | if (audioType == AudioType.MP3) { 78 | // 进行格式转换 79 | log.info("进行格式转换:{}", "mp3"); 80 | data = Aop.get(Mp3Util.class).convertToWav(data, 16000, 1); 81 | } 82 | List segments = index(data, params); 83 | if (textType == TextType.SRT) { 84 | return textService.generateSrt(segments).toString(); 85 | } else if (textType == TextType.VTT) { 86 | return textService.generateVtt(segments).toString(); 87 | } else if (textType == TextType.LRC) { 88 | return textService.generateLrc(segments).toString(); 89 | } 90 | return segments; 91 | } 92 | } -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/single/LocalBaseWhisper.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.single; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | import java.nio.file.Paths; 6 | import java.util.List; 7 | import java.util.concurrent.Callable; 8 | import java.util.concurrent.ExecutionException; 9 | import java.util.concurrent.ExecutorService; 10 | import java.util.concurrent.Executors; 11 | 12 | import com.litongjava.ai.server.model.WhisperSegment; 13 | import com.litongjava.ai.server.service.WhisperCppJni; 14 | 15 | import io.github.givimad.whisperjni.WhisperFullParams; 16 | import io.github.givimad.whisperjni.WhisperJNI; 17 | import lombok.extern.slf4j.Slf4j; 18 | 19 | @Slf4j 20 | public enum LocalBaseWhisper { 21 | INSTANCE; 22 | 23 | private ExecutorService executorService; 24 | private ThreadLocal threadLocalWhisper; 25 | private WhisperFullParams defaultPararams = new WhisperFullParams(); 26 | 27 | LocalBaseWhisper() { 28 | try { 29 | WhisperJNI.loadLibrary(); 30 | } catch (IOException e1) { 31 | e1.printStackTrace(); 32 | } 33 | // C:\Users\Administrator\.cache\whisper 34 | String userHome = System.getProperty("user.home"); 35 | String modelName = "ggml-base.en.bin"; 36 | Path path = Paths.get(userHome, ".cache", "whisper", modelName); 37 | 38 | this.executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1); 39 | threadLocalWhisper = ThreadLocal.withInitial(() -> { 40 | WhisperCppJni whisper = new WhisperCppJni(); 41 | try { 42 | whisper.initContext(path); 43 | } catch (IOException e) { 44 | e.printStackTrace(); 45 | } 46 | return whisper; 47 | }); 48 | defaultPararams.printProgress = false; 49 | } 50 | 51 | public List fullTranscribeWithTime(float[] audioData, int numSamples, WhisperFullParams params) { 52 | Callable> task = () -> { 53 | WhisperCppJni whisper = null; 54 | whisper = threadLocalWhisper.get(); 55 | if (params != null) { 56 | log.info("params:{}", params); 57 | return whisper.fullTranscribeWithTime(params, audioData, numSamples); 58 | } else { 59 | return whisper.fullTranscribeWithTime(defaultPararams, audioData, numSamples); 60 | } 61 | 62 | }; 63 | 64 | try { 65 | return executorService.submit(task).get(); 66 | } catch (InterruptedException | ExecutionException e) { 67 | e.printStackTrace(); 68 | } 69 | return null; 70 | } 71 | 72 | public List fullTranscribeWithTime(float[] floats, WhisperFullParams params) { 73 | return fullTranscribeWithTime(floats, floats.length, params); 74 | } 75 | } -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/single/LocalLargeWhisper.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.single; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | import java.nio.file.Paths; 6 | import java.util.List; 7 | import java.util.concurrent.Callable; 8 | import java.util.concurrent.ExecutionException; 9 | import java.util.concurrent.ExecutorService; 10 | import java.util.concurrent.Executors; 11 | 12 | import com.litongjava.ai.server.model.WhisperSegment; 13 | import com.litongjava.ai.server.service.WhisperCppJni; 14 | 15 | import io.github.givimad.whisperjni.WhisperFullParams; 16 | import io.github.givimad.whisperjni.WhisperJNI; 17 | import lombok.extern.slf4j.Slf4j; 18 | 19 | @Slf4j 20 | public enum LocalLargeWhisper { 21 | INSTANCE; 22 | 23 | private ExecutorService executorService; 24 | private ThreadLocal threadLocalWhisper; 25 | private WhisperFullParams defaultPararams = new WhisperFullParams(); 26 | 27 | LocalLargeWhisper() { 28 | try { 29 | WhisperJNI.loadLibrary(); 30 | } catch (IOException e1) { 31 | e1.printStackTrace(); 32 | } 33 | // C:\Users\Administrator\.cache\whisper 34 | String userHome = System.getProperty("user.home"); 35 | String modelName = "ggml-large.bin"; 36 | Path path = Paths.get(userHome, ".cache", "whisper", modelName); 37 | 38 | this.executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1); 39 | threadLocalWhisper = ThreadLocal.withInitial(() -> { 40 | WhisperCppJni whisper = new WhisperCppJni(); 41 | try { 42 | whisper.initContext(path); 43 | } catch (IOException e) { 44 | e.printStackTrace(); 45 | } 46 | return whisper; 47 | }); 48 | defaultPararams.printProgress = false; 49 | } 50 | 51 | public List fullTranscribeWithTime(float[] audioData, int numSamples, WhisperFullParams params) { 52 | Callable> task = () -> { 53 | WhisperCppJni whisper = null; 54 | whisper = threadLocalWhisper.get(); 55 | if (params != null) { 56 | log.info("params:{}", params); 57 | return whisper.fullTranscribeWithTime(params, audioData, numSamples); 58 | } else { 59 | return whisper.fullTranscribeWithTime(defaultPararams, audioData, numSamples); 60 | } 61 | 62 | }; 63 | 64 | try { 65 | return executorService.submit(task).get(); 66 | } catch (InterruptedException | ExecutionException e) { 67 | e.printStackTrace(); 68 | } 69 | return null; 70 | } 71 | 72 | public List fullTranscribeWithTime(float[] floats, WhisperFullParams params) { 73 | return fullTranscribeWithTime(floats, floats.length, params); 74 | } 75 | } -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/single/LocalWhisper.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.single; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | import java.nio.file.Paths; 6 | import java.util.List; 7 | import java.util.concurrent.Callable; 8 | import java.util.concurrent.ExecutionException; 9 | 10 | import com.litongjava.ai.server.model.WhisperSegment; 11 | import com.litongjava.ai.server.property.WhiserAsrProperties; 12 | import com.litongjava.ai.server.service.WhisperCppJni; 13 | import com.litongjava.ai.server.utils.WhisperExecutorServiceUtils; 14 | import com.litongjava.jfinal.aop.Aop; 15 | 16 | import io.github.givimad.whisperjni.WhisperFullParams; 17 | import io.github.givimad.whisperjni.WhisperJNI; 18 | import lombok.extern.slf4j.Slf4j; 19 | 20 | @Slf4j 21 | public enum LocalWhisper { 22 | INSTANCE; 23 | 24 | private ThreadLocal threadLocalWhisper; 25 | private WhisperFullParams defaultPararams = new WhisperFullParams(); 26 | 27 | LocalWhisper() { 28 | try { 29 | WhisperJNI.loadLibrary(); 30 | } catch (IOException e1) { 31 | e1.printStackTrace(); 32 | } 33 | // C:\Users\Administrator\.cache\whisper 34 | String userHome = System.getProperty("user.home"); 35 | String modelName = Aop.get(WhiserAsrProperties.class).getModelName(); 36 | Path path = Paths.get(userHome, ".cache", "whisper", modelName); 37 | 38 | threadLocalWhisper = ThreadLocal.withInitial(() -> { 39 | WhisperCppJni whisper = new WhisperCppJni(); 40 | try { 41 | whisper.initContext(path); 42 | } catch (IOException e) { 43 | e.printStackTrace(); 44 | } 45 | return whisper; 46 | }); 47 | defaultPararams.printProgress = false; 48 | } 49 | 50 | public List fullTranscribeWithTime(float[] audioData, int numSamples, WhisperFullParams params) { 51 | Callable> task = () -> { 52 | WhisperCppJni whisper = null; 53 | whisper = threadLocalWhisper.get(); 54 | if (params != null) { 55 | log.info("params:{}", params); 56 | return whisper.fullTranscribeWithTime(params, audioData, numSamples); 57 | } else { 58 | return whisper.fullTranscribeWithTime(defaultPararams, audioData, numSamples); 59 | } 60 | 61 | }; 62 | 63 | try { 64 | return WhisperExecutorServiceUtils.submit(task).get(); 65 | } catch (InterruptedException | ExecutionException e) { 66 | e.printStackTrace(); 67 | } 68 | return null; 69 | } 70 | 71 | public List fullTranscribeWithTime(float[] floats, WhisperFullParams params) { 72 | return fullTranscribeWithTime(floats, floats.length, params); 73 | } 74 | } -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/utils/JFramUtils.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.utils; 2 | 3 | import java.awt.image.BufferedImage; 4 | 5 | import javax.swing.JFrame; 6 | import javax.swing.WindowConstants; 7 | 8 | public class JFramUtils { 9 | 10 | public static void showBufferedImage(String title, BufferedImage image) { 11 | MatPanel panel = new MatPanel(); 12 | panel.setBufferedImage(image); 13 | // repaint自动调用paint 14 | panel.repaint(); 15 | 16 | JFrame frame = new JFrame(title); 17 | frame.setSize(image.getWidth(), image.getHeight()); 18 | frame.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); 19 | frame.setContentPane(panel); 20 | frame.setVisible(true); 21 | } 22 | 23 | public static void showBufferedImage(String title, Object dst) { 24 | showBufferedImage(title, (BufferedImage) dst); 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/utils/MatPanel.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.utils; 2 | 3 | import java.awt.Graphics; 4 | import java.awt.image.BufferedImage; 5 | 6 | import javax.swing.JPanel; 7 | 8 | @SuppressWarnings("serial") 9 | public class MatPanel extends JPanel { 10 | private BufferedImage bufferImage; 11 | 12 | @Override 13 | public void paint(Graphics g) { 14 | if (bufferImage != null) { 15 | g.drawImage(bufferImage, 0, 0, bufferImage.getWidth(), bufferImage.getHeight(), this); 16 | } 17 | } 18 | 19 | public void setBufferedImage(BufferedImage src) { 20 | this.bufferImage = src; 21 | } 22 | } -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/utils/Mp3Util.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.utils; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.ByteArrayOutputStream; 5 | import java.io.File; 6 | import java.io.IOException; 7 | 8 | import javax.sound.sampled.AudioFormat; 9 | import javax.sound.sampled.AudioInputStream; 10 | import javax.sound.sampled.AudioSystem; 11 | import javax.sound.sampled.UnsupportedAudioFileException; 12 | 13 | public class Mp3Util { 14 | 15 | public byte[] convertToWav(byte[] mp3Data, int ar, int ac) throws UnsupportedAudioFileException, IOException { 16 | // Convert byte array to AudioInputStream 17 | try (AudioInputStream ais = AudioSystem.getAudioInputStream(new ByteArrayInputStream(mp3Data))) { 18 | AudioFormat format = ais.getFormat(); 19 | 20 | // Convert to PCM_SIGNED if not already 21 | if (format.getEncoding() != AudioFormat.Encoding.PCM_SIGNED) { 22 | format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 23 | // 24 | format.getSampleRate(), 25 | // 26 | 16, 27 | // 28 | format.getChannels(), 29 | // 30 | format.getChannels() * 2, 31 | // 32 | format.getSampleRate(), 33 | // 34 | false); 35 | AudioInputStream tempAis = AudioSystem.getAudioInputStream(format, ais); 36 | return convert(tempAis, format, ar, ac); 37 | } 38 | } 39 | return null; 40 | } 41 | 42 | public byte[] convertToWav(File mp3File, int ar, int ac) throws IOException, UnsupportedAudioFileException { 43 | // Read MP3 audio 44 | try (AudioInputStream ais = AudioSystem.getAudioInputStream(mp3File)) { 45 | AudioFormat format = ais.getFormat(); 46 | 47 | // Convert to PCM_SIGNED if not already 48 | if (format.getEncoding() != AudioFormat.Encoding.PCM_SIGNED) { 49 | format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 50 | // 51 | format.getSampleRate(), 52 | // 53 | 16, 54 | // 55 | format.getChannels(), 56 | // 57 | format.getChannels() * 2, 58 | // 59 | format.getSampleRate(), 60 | // 61 | false); 62 | // convert 63 | AudioInputStream tempAis = AudioSystem.getAudioInputStream(format, ais); 64 | 65 | return convert(tempAis, format, ar, ac); 66 | } 67 | } 68 | return null; 69 | } 70 | 71 | public byte[] convert(AudioInputStream srcAis, AudioFormat srcFormat, int ar, int ac) throws IOException { 72 | // Convert to desired format 73 | AudioFormat desiredFormat = new AudioFormat(srcFormat.getEncoding(), 74 | // 75 | ar, 76 | // 77 | srcFormat.getSampleSizeInBits(), 78 | // 79 | ac, 80 | // 81 | ac * 2, 82 | // 83 | ar, 84 | // 85 | srcFormat.isBigEndian()); 86 | // convert 87 | AudioInputStream desiredAis = AudioSystem.getAudioInputStream(desiredFormat, srcAis); 88 | 89 | // Read the entire AudioInputStream into a byte array 90 | ByteArrayOutputStream tmpBaos = new ByteArrayOutputStream(); 91 | byte[] buffer = new byte[4096]; 92 | int bytesRead; 93 | while ((bytesRead = desiredAis.read(buffer)) != -1) { 94 | tmpBaos.write(buffer, 0, bytesRead); 95 | } 96 | byte[] audioBytes = tmpBaos.toByteArray(); 97 | return audioBytes; 98 | } 99 | } -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/utils/WhisperAudioUtils.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.utils; 2 | 3 | import java.io.IOException; 4 | import java.net.URL; 5 | 6 | import javax.sound.sampled.AudioInputStream; 7 | import javax.sound.sampled.AudioSystem; 8 | import javax.sound.sampled.UnsupportedAudioFileException; 9 | 10 | public class WhisperAudioUtils { 11 | 12 | public static float[] toAudioData(URL url) throws UnsupportedAudioFileException, IOException { 13 | AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(url); 14 | byte[] b = new byte[audioInputStream.available()]; 15 | try { 16 | audioInputStream.read(b); 17 | return toFloat(b); 18 | } finally { 19 | audioInputStream.close(); 20 | } 21 | } 22 | 23 | public static float[] toFloat(byte[] b) { 24 | float[] floats = new float[b.length / 2]; 25 | for (int i = 0, j = 0; i < b.length; i += 2, j++) { 26 | int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF; 27 | floats[j] = intSample / 32767.0f; 28 | } 29 | return floats; 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/utils/WhisperExecutorServiceUtils.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.utils; 2 | 3 | import java.util.concurrent.Callable; 4 | import java.util.concurrent.ExecutorService; 5 | import java.util.concurrent.Executors; 6 | import java.util.concurrent.Future; 7 | 8 | /** 9 | * 线程池类 10 | * @author Tong Li 11 | * 12 | */ 13 | public class WhisperExecutorServiceUtils { 14 | public static ExecutorService executorService = Executors 15 | .newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1); 16 | 17 | public static Future submit(Callable task) { 18 | return executorService.submit(task); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | ${CONSOLE_LOG_PATTERN} 12 | 13 | 14 | 15 | 16 | 17 | 18 | ${CONSOLE_LOG_PATTERN} 19 | 20 | 21 | 22 | ${LOG_HOME}/project-name-%d{yyyy-MM-dd}.log 23 | 24 | 180 25 | 26 | 27 | 28 | 10MB 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/test/java/com/litongjava/ai/server/service/WhisperCppServiceMultiThreadTest.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.service; 2 | 3 | import java.io.File; 4 | import java.net.MalformedURLException; 5 | import java.net.URL; 6 | 7 | import lombok.extern.slf4j.Slf4j; 8 | 9 | @Slf4j 10 | public class WhisperCppServiceMultiThreadTest { 11 | 12 | public static void main(String[] args) throws MalformedURLException { 13 | WhisperCppService whisperCppService = new WhisperCppService(); 14 | File file = new File("E:\\code\\cpp\\project-ping\\whisper.cpp\\samples\\jfk.wav"); 15 | URL url = file.toURI().toURL(); 16 | int availableProcessors = Runtime.getRuntime().availableProcessors(); 17 | log.info("availableProcessors:{}", availableProcessors); 18 | for (int i = 0; i < availableProcessors * 2; i++) { 19 | whisperCppService.index(url, null); 20 | } 21 | 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/test/java/com/litongjava/ai/server/single/LocalLargeWhisperTest.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.single; 2 | 3 | import java.nio.file.Files; 4 | import java.nio.file.Path; 5 | import java.nio.file.Paths; 6 | 7 | import org.junit.Test; 8 | 9 | public class LocalLargeWhisperTest { 10 | 11 | @Test 12 | public void test() { 13 | String userHome = System.getProperty("user.home"); 14 | Path path = Paths.get(userHome,".cache","whisper","ggml-base.en.bin"); 15 | boolean exists = Files.exists(path); 16 | System.out.println(exists); 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /whisper-asr/whisper-asr-service/src/test/java/com/litongjava/ai/server/utils/Mp3UtilTest.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.ai.server.utils; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.net.URISyntaxException; 6 | import java.net.URL; 7 | 8 | import javax.sound.sampled.LineUnavailableException; 9 | import javax.sound.sampled.UnsupportedAudioFileException; 10 | 11 | import org.junit.Test; 12 | 13 | import com.litongjava.ai.server.service.WhisperCppService; 14 | 15 | import cn.hutool.core.io.FileUtil; 16 | import cn.hutool.core.util.ClassUtil; 17 | import javazoom.jl.decoder.JavaLayerException; 18 | 19 | public class Mp3UtilTest { 20 | 21 | @Test 22 | public void test() throws URISyntaxException, IOException, UnsupportedAudioFileException, JavaLayerException, 23 | LineUnavailableException { 24 | URL resource = ClassUtil.getClassLoader().getResource("audios/test.mp3"); 25 | if (resource == null) { 26 | return; 27 | } 28 | File file = new File(resource.toURI()); 29 | byte[] mp3Data = FileUtil.readBytes(file); 30 | // Save or use wavData as needed 31 | Mp3Util mp3Util = new Mp3Util(); 32 | byte[] wavData = mp3Util.convertToWav(mp3Data, 16000, 1); 33 | WhisperCppService whisperCppService = new WhisperCppService(); 34 | Object index = whisperCppService.index(wavData, null); 35 | System.out.println(index); 36 | 37 | } 38 | 39 | @Test 40 | public void test2() throws URISyntaxException, IOException, UnsupportedAudioFileException { 41 | URL resource = ClassUtil.getClassLoader().getResource("audios/test.mp3"); 42 | if (resource == null) { 43 | return; 44 | } 45 | File file = new File(resource.toURI()); 46 | // byte[] convertToWav = Aop.get(Mp3Util.class).convertToWav(file, 16000, 1); 47 | 48 | } 49 | } 50 | --------------------------------------------------------------------------------