├── .gitignore
├── LICENSE
├── paddle-ocr
├── paddle-ocr-native-server
│ ├── pom.xml
│ ├── readme.md
│ └── src
│ │ └── main
│ │ ├── java
│ │ └── com
│ │ │ └── litongjava
│ │ │ └── ai
│ │ │ └── server
│ │ │ └── padddle
│ │ │ └── ocr
│ │ │ ├── IndexController.java
│ │ │ ├── PaddleOcrController.java
│ │ │ ├── PaddleOcrNativeServer.java
│ │ │ ├── PaddlePaddleOCRNativeV4.java
│ │ │ └── PaddlePaddleOCRNativeV4Demo.java
│ │ └── resources
│ │ └── images
│ │ └── flight_ticket.jpg
├── paddle-ocr-server
│ ├── Dockerfile
│ ├── doc
│ │ └── paddle-ocr.http
│ ├── pom.xml
│ └── src
│ │ └── main
│ │ ├── java
│ │ └── com
│ │ │ └── litongjava
│ │ │ └── ai
│ │ │ └── server
│ │ │ └── padddle
│ │ │ └── ocr
│ │ │ ├── PaddleOcrServer.java
│ │ │ ├── config
│ │ │ └── PaddleOcrConfig.java
│ │ │ └── controller
│ │ │ ├── IndexController.java
│ │ │ ├── IndexHandler.java
│ │ │ ├── PaddleOcrController.java
│ │ │ └── PaddleOcrHandler.java
│ │ └── resources
│ │ └── images
│ │ └── flight_ticket.jpg
├── paddle-ocr-service
│ ├── pom.xml
│ └── src
│ │ ├── main
│ │ ├── java
│ │ │ └── com
│ │ │ │ └── litongjava
│ │ │ │ └── ai
│ │ │ │ └── djl
│ │ │ │ └── paddle
│ │ │ │ └── ocr
│ │ │ │ └── v4
│ │ │ │ ├── OcrV4DetExample.java
│ │ │ │ ├── OcrV4RecExample.java
│ │ │ │ ├── OcrV4RecTensorExample.java
│ │ │ │ ├── PaddlePaddleOCRV4.java
│ │ │ │ ├── common
│ │ │ │ ├── ImageUtils.java
│ │ │ │ ├── RotatedBox.java
│ │ │ │ └── RotatedBoxCompX.java
│ │ │ │ ├── detection
│ │ │ │ ├── OCRDetectionTranslator.java
│ │ │ │ └── OcrV4Detection.java
│ │ │ │ ├── opencv
│ │ │ │ ├── NDArrayUtils.java
│ │ │ │ └── OpenCVUtils.java
│ │ │ │ └── recognition
│ │ │ │ ├── OcrV4Recognition.java
│ │ │ │ └── PpWordRecTranslator.java
│ │ └── resources
│ │ │ ├── logback.xml
│ │ │ └── models
│ │ │ └── readme.md
│ │ └── test
│ │ ├── java
│ │ └── com
│ │ │ └── litongjava
│ │ │ └── ai
│ │ │ └── djl
│ │ │ └── paddle
│ │ │ └── ocr
│ │ │ └── v4
│ │ │ ├── PaddlePaddleOCRV4Test.java
│ │ │ ├── gpu
│ │ │ └── GPUStudy.java
│ │ │ └── recognition
│ │ │ └── OcrV4RecognitionTest.java
│ │ └── resources
│ │ ├── 03.png
│ │ └── 2.jpg
├── pom.xml
└── readme.md
├── pom.xml
├── rapid-ocr-server
├── Dockerfile
├── deploy-win.txt
├── pom.xml
├── readme.md
└── src
│ └── main
│ ├── java
│ └── com
│ │ └── litongjava
│ │ └── ai
│ │ └── server
│ │ └── rapid
│ │ └── ocr
│ │ ├── RapidOcrServer.java
│ │ ├── config
│ │ └── RapidOcrConfig.java
│ │ ├── controller
│ │ ├── IndexHandler.java
│ │ └── RapidOcrHandler.java
│ │ └── instance
│ │ └── EngineInstance.java
│ └── resources
│ ├── app.properties
│ ├── images
│ └── flight_ticket.jpg
│ └── logback.xml
├── readme.md
├── s.yaml
└── whisper-asr
├── pom.xml
├── readme.md
├── whisper-asr-server
├── docker
│ ├── 1.0.0-base.en
│ ├── 1.0.0-large
│ ├── 1.0.1
│ └── readme.md
├── models
│ └── readme.md
├── pom.xml
└── src
│ └── main
│ ├── java
│ └── com
│ │ └── litongjava
│ │ └── aio
│ │ └── server
│ │ └── tio
│ │ ├── WhisperAsrServer.java
│ │ ├── config
│ │ └── WhisperAsrConfig.java
│ │ └── controller
│ │ ├── EnviormentController.java
│ │ ├── IndexController.java
│ │ ├── SystemController.java
│ │ └── WhisperAsrController.java
│ └── resources
│ └── app.properties
└── whisper-asr-service
├── pom.xml
└── src
├── main
├── java
│ └── com
│ │ └── litongjava
│ │ └── ai
│ │ └── server
│ │ ├── enumeration
│ │ ├── AudioType.java
│ │ └── TextType.java
│ │ ├── model
│ │ └── WhisperSegment.java
│ │ ├── property
│ │ └── WhiserAsrProperties.java
│ │ ├── service
│ │ ├── TextService.java
│ │ ├── WhisperCppBaseService.java
│ │ ├── WhisperCppJni.java
│ │ ├── WhisperCppLargeService.java
│ │ └── WhisperCppService.java
│ │ ├── single
│ │ ├── LocalBaseWhisper.java
│ │ ├── LocalLargeWhisper.java
│ │ └── LocalWhisper.java
│ │ └── utils
│ │ ├── JFramUtils.java
│ │ ├── MatPanel.java
│ │ ├── Mp3Util.java
│ │ ├── WhisperAudioUtils.java
│ │ └── WhisperExecutorServiceUtils.java
└── resources
│ └── logback.xml
└── test
└── java
└── com
└── litongjava
└── ai
└── server
├── service
└── WhisperCppServiceMultiThreadTest.java
├── single
└── LocalLargeWhisperTest.java
└── utils
└── Mp3UtilTest.java
/.gitignore:
--------------------------------------------------------------------------------
1 | ### Eclipse template
2 | *.pydevproject
3 | .metadata
4 | .gradle*
5 | classes/
6 | bin/
7 | tmp/
8 | *.tmp
9 | *.bak
10 | *.swp
11 | *~.nib
12 | local.properties
13 | .settings/
14 | .loadpath
15 | rebel.xml
16 |
17 | # Eclipse Core
18 | .project
19 |
20 | generatedsources
21 |
22 | # External tool builders
23 | .externalToolBuilders/
24 |
25 | # Locally stored "Eclipse launch configurations"
26 | *.launch
27 |
28 | # CDT-specific
29 | .cproject
30 |
31 | # JDT-specific (Eclipse Java Development Tools)
32 | .classpath
33 |
34 | # PDT-specific
35 | .buildpath
36 |
37 | # sbteclipse plugin
38 | .target
39 |
40 | # TeXlipse plugin
41 | .texlipse
42 |
43 |
44 |
45 | ### JetBrains template
46 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm
47 |
48 | *.iml
49 | .flattened-pom.xml
50 | ## Directory-based project format:
51 | .idea/
52 | # if you remove the above rule, at least ignore the following:
53 |
54 | # User-specific stuff:
55 | # .idea/workspace.xml
56 | # .idea/tasks.xml
57 | # .idea/dictionaries
58 |
59 | # Sensitive or high-churn files:
60 | # .idea/dataSources.ids
61 | # .idea/dataSources.xml
62 | # .idea/sqlDataSources.xml
63 | # .idea/dynamic.xml
64 | # .idea/uiDesigner.xml
65 |
66 | # Gradle:
67 | # .idea/gradle.xml
68 | # .idea/libraries
69 |
70 | # Mongo Explorer plugin:
71 | # .idea/mongoSettings.xml
72 |
73 | ## File-based project format:
74 | *.ipr
75 | *.iws
76 |
77 | ## Plugin-specific files:
78 |
79 | # IntelliJ
80 | /out/
81 |
82 | # mpeltonen/sbt-idea plugin
83 | .idea_modules/
84 |
85 | # JIRA plugin
86 | atlassian-ide-plugin.xml
87 |
88 | # Crashlytics plugin (for Android Studio and IntelliJ)
89 | com_crashlytics_export_strings.xml
90 | crashlytics.properties
91 | crashlytics-build.properties
92 |
93 | build/
94 |
95 | # Ignore Gradle GUI config
96 | gradle-app.setting
97 |
98 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
99 | !gradle-wrapper.jar
100 |
101 | db
102 |
103 | ### Java template
104 | *.class
105 |
106 | # Mobile Tools for Java (J2ME)
107 | .mtj.tmp/
108 |
109 | # Package Files #
110 | #*.jar
111 |
112 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
113 | hs_err_pid*
114 |
115 |
116 | ### Leiningen template
117 | target/
118 | logs/
119 | checkouts/
120 | .lein-deps-sum
121 | .lein-repl-history
122 | .lein-plugins/
123 | .lein-failures
124 | .nrepl-port
125 |
126 | querydsl/
127 |
128 | .DS_Store
129 |
130 | *.log
131 | node_modules/
132 | dist/
133 | dist.zip
134 | package-lock.json
135 | *.wav
136 | *.mp3
137 | *.onnx
138 | *.zip
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 李通
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-native-server/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | com.litongjava
5 | paddle-ocr
6 | 1.0.0
7 |
8 | paddle-ocr-native-server
9 |
10 | UTF-8
11 | 1.8
12 | ${java.version}
13 | ${java.version}
14 |
15 | 23.1.1
16 | com.litongjava.ai.server.padddle.ocr.PaddleOcrNativeServer
17 |
18 |
19 |
20 |
21 | io.github.mymonstercat
22 | rapidocr
23 | 0.0.7
24 |
25 |
26 |
27 |
28 |
29 | io.github.mymonstercat
30 | rapidocr-onnx-platform
31 | 0.0.7
32 |
33 |
34 |
35 | com.litongjava
36 | tio-http-server
37 | 3.7.3.v20231223-RELEASE
38 |
39 |
40 |
41 | ${project.artifactId}
42 |
43 |
44 |
45 | jar
46 |
47 | true
48 |
49 |
50 |
51 |
52 | ch.qos.logback
53 | logback-classic
54 | 1.2.3
55 |
56 |
57 |
58 |
59 |
60 | org.apache.maven.plugins
61 | maven-jar-plugin
62 | 3.2.0
63 |
64 |
65 | org.apache.maven.plugins
66 | maven-assembly-plugin
67 | 3.1.1
68 |
69 |
70 |
71 | ${mainClass.server}
72 |
73 |
74 |
75 | jar-with-dependencies
76 |
77 | false
78 |
79 |
80 |
81 | make-assembly
82 | package
83 |
84 | single
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | server-graalvm
94 |
95 |
96 |
97 | org.slf4j
98 | slf4j-jdk14
99 | 1.7.31
100 |
101 |
102 |
103 | org.graalvm.sdk
104 | graal-sdk
105 | ${graalvm.version}
106 | provided
107 |
108 |
109 |
110 | tio-http-server-graal
111 |
112 |
113 | org.graalvm.nativeimage
114 | native-image-maven-plugin
115 | 21.2.0
116 |
117 |
118 |
119 | native-image
120 |
121 | package
122 |
123 |
124 |
125 | false
126 | ${project.artifactId}
127 | ${mainClass.server}
128 |
129 | -H:+RemoveSaturatedTypeFlows
130 | --allow-incomplete-classpath
131 | --no-fallback
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-native-server/readme.md:
--------------------------------------------------------------------------------
1 | ##
2 | ### 打包失败,错误日志如下,可能还是需要使用jni的方案才支持编译成二进制文件
3 | ```
4 | Error: Class-path entry file:///root/.m2/repository/com/microsoft/onnxruntime/onnxruntime/1.16.0/onnxruntime-1.16.0.jar contains class ai.onnxruntime.ValueInfo. This class is part of the image builder itself (in file:///root/program/graalvm-jdk-21.0.1+12.1/lib/svm/builder/svm-enterprise.jar) and must not be passed via -cp. This can be caused by a fat-jar that illegally includes svm.jar (or graal-sdk.jar) due to its build-time dependency on it. As a workaround, -H:+AllowDeprecatedBuilderClassesOnImageClasspath allows turning this error into a warning. Note that this option is deprecated and will be removed in a future version.
5 | ```
6 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-native-server/src/main/java/com/litongjava/ai/server/padddle/ocr/IndexController.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.padddle.ocr;
2 |
3 | import com.litongjava.tio.http.common.HttpRequest;
4 | import com.litongjava.tio.http.common.HttpResponse;
5 | import com.litongjava.tio.http.server.util.Resps;
6 |
7 | public class IndexController {
8 |
9 | public HttpResponse index(HttpRequest request) {
10 | return Resps.txt(request, "paddle-ocr-native-server");
11 |
12 | }
13 |
14 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-native-server/src/main/java/com/litongjava/ai/server/padddle/ocr/PaddleOcrController.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.padddle.ocr;
2 |
3 | import java.net.URL;
4 |
5 | import com.litongjava.tio.http.common.HttpRequest;
6 | import com.litongjava.tio.http.common.HttpResponse;
7 | import com.litongjava.tio.http.common.UploadFile;
8 | import com.litongjava.tio.http.server.util.Resps;
9 | import com.litongjava.tio.utils.hutool.ResourceUtil;
10 | import com.litongjava.tio.utils.resp.RespVo;
11 |
12 | //@EnableCORS
13 | //@Controller
14 | //@RequestPath("/paddle/ocr")
15 | public class PaddleOcrController {
16 |
17 | // @RequestPath(value = "/rec")
18 | public HttpResponse rec(HttpRequest request) throws Exception {
19 | String url = request.getParam("url");
20 | UploadFile file = request.getUploadFile("file");
21 | String text = null;
22 | if (url != null) {
23 | text = PaddlePaddleOCRNativeV4.INSTANCE.ocr(url);
24 | } else if (file != null) {
25 | byte[] fileData = file.getData();
26 | text = PaddlePaddleOCRNativeV4.INSTANCE.ocr(fileData);
27 | }
28 | if (text != null) {
29 | return Resps.json(request, RespVo.ok(text));
30 | } else {
31 | return Resps.json(request, RespVo.fail());
32 | }
33 | }
34 |
35 | // @RequestPath("/test")
36 | public HttpResponse test(HttpRequest request) throws Exception {
37 | URL resource = ResourceUtil.getResource("images/flight_ticket.jpg");
38 | return Resps.json(request, RespVo.ok(PaddlePaddleOCRNativeV4.INSTANCE.ocr(resource)));
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-native-server/src/main/java/com/litongjava/ai/server/padddle/ocr/PaddleOcrNativeServer.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.padddle.ocr;
2 |
3 | import java.io.IOException;
4 |
5 | import com.litongjava.tio.http.common.HttpConfig;
6 | import com.litongjava.tio.http.common.handler.HttpRequestHandler;
7 | import com.litongjava.tio.http.server.HttpServerStarter;
8 | import com.litongjava.tio.http.server.handler.HttpRoutes;
9 | import com.litongjava.tio.http.server.handler.SimpleHttpDispatcherHandler;
10 | import com.litongjava.tio.http.server.handler.SimpleHttpRoutes;
11 |
12 | public class PaddleOcrNativeServer {
13 |
14 | public static void main(String[] args) throws IOException {
15 |
16 | // init ocr
17 | // 实例化Controller
18 | IndexController controller = new IndexController();
19 | PaddleOcrController paddleOcrController = new PaddleOcrController();
20 |
21 | // 手动添加路由
22 | HttpRoutes simpleHttpRoutes = new SimpleHttpRoutes();
23 | simpleHttpRoutes.add("/", controller::index);
24 |
25 | simpleHttpRoutes.add("/paddle/ocr/test", paddleOcrController::test);
26 | simpleHttpRoutes.add("/paddle/ocr/rec", paddleOcrController::rec);
27 |
28 | // 配置服务服务器
29 | HttpConfig httpConfig;
30 | HttpRequestHandler requestHandler;
31 | HttpServerStarter httpServerStarter;
32 |
33 | httpConfig = new HttpConfig(80, null, null, null);
34 | requestHandler = new SimpleHttpDispatcherHandler(httpConfig, simpleHttpRoutes);
35 | httpServerStarter = new HttpServerStarter(httpConfig, requestHandler);
36 | // 启动服务器
37 | httpServerStarter.start();
38 | }
39 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-native-server/src/main/java/com/litongjava/ai/server/padddle/ocr/PaddlePaddleOCRNativeV4.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.padddle.ocr;
2 |
3 | import java.net.URL;
4 |
5 | public enum PaddlePaddleOCRNativeV4 {
6 | INSTANCE;
7 |
8 | String ocr(String url) {
9 | // TODO Auto-generated method stub
10 | return null;
11 | }
12 |
13 | String ocr(byte[] fileData) {
14 | // TODO Auto-generated method stub
15 | return null;
16 | }
17 |
18 | String ocr(URL resource) {
19 | // TODO Auto-generated method stub
20 | return null;
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-native-server/src/main/java/com/litongjava/ai/server/padddle/ocr/PaddlePaddleOCRNativeV4Demo.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.padddle.ocr;
2 |
3 | import com.benjaminwan.ocrlibrary.OcrResult;
4 |
5 | import io.github.mymonstercat.Model;
6 | import io.github.mymonstercat.ocr.InferenceEngine;
7 |
8 | public class PaddlePaddleOCRNativeV4Demo {
9 |
10 | public static void main(String[] args) {
11 | InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);
12 | OcrResult ocrResult = engine.runOcr("E:\\code\\python\\project-litongjava\\cyg-v2\\img.png");
13 | System.out.println(ocrResult.getStrRes().trim());
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-native-server/src/main/resources/images/flight_ticket.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/paddle-ocr/paddle-ocr-native-server/src/main/resources/images/flight_ticket.jpg
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-server/Dockerfile:
--------------------------------------------------------------------------------
1 | # build litongjava/paddle-ocr-server:1.0.1
2 | # Use litongjava/jdk:8u211 as the base image
3 | FROM litongjava/centos-8-jdk:8u341
4 |
5 | # Set the working directory in the container
6 | WORKDIR /app
7 |
8 | # Copy the jar file into the container
9 | COPY target/paddle-ocr-server-1.0.1.jar /app/
10 |
11 | # download file
12 | RUN java -jar /app/paddle-ocr-server-1.0.1.jar --download
13 |
14 | # Command to run the jar file
15 | CMD ["java", "-jar", "paddle-ocr-server-1.0.1.jar", "--mode=prod"]
16 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-server/doc/paddle-ocr.http:
--------------------------------------------------------------------------------
1 | curl --location --request POST 'http://localhost/paddle/ocr/rec' \
2 | --form 'file=@"E:\code\python\project-litongjava\cyg-v2\img.png"
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-server/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | com.litongjava
5 | paddle-ocr
6 | 1.0.0
7 |
8 | paddle-ocr-server
9 | 1.0.4
10 |
11 | UTF-8
12 | 1.8
13 | ${java.version}
14 | ${java.version}
15 | 23.1.1
16 | 1.4.0
17 | 1.18.30
18 | 1.2.1
19 | ocr-server
20 | com.litongjava.ai.server.padddle.ocr.PaddleOcrServer
21 |
22 |
23 |
24 | com.litongjava
25 | paddle-ocr-service
26 | 1.0.0
27 |
28 |
29 | com.litongjava
30 | tio-boot
31 | ${tio.boot.version}
32 |
33 |
34 | org.projectlombok
35 | lombok
36 | ${lombok-version}
37 | true
38 | provided
39 |
40 |
41 |
42 |
43 |
44 | development
45 |
46 | true
47 |
48 |
49 |
50 | ch.qos.logback
51 | logback-classic
52 | 1.2.3
53 |
54 |
55 |
56 |
57 |
58 |
59 | production
60 |
61 |
62 | ch.qos.logback
63 | logback-classic
64 | 1.2.3
65 |
66 |
67 |
68 |
69 |
70 | org.springframework.boot
71 | spring-boot-maven-plugin
72 | 2.7.4
73 |
74 | ${main.class}
75 | org.projectlombok
76 |
77 |
78 |
79 |
80 |
81 | repackage
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 | assembly
92 |
93 |
94 | ch.qos.logback
95 | logback-classic
96 | 1.2.3
97 |
98 |
99 |
100 |
101 |
102 | org.apache.maven.plugins
103 | maven-jar-plugin
104 | 3.2.0
105 |
106 |
107 | org.apache.maven.plugins
108 | maven-assembly-plugin
109 | 3.1.1
110 |
111 |
112 |
113 | ${main.class}
114 |
115 |
116 |
117 | jar-with-dependencies
118 |
119 | false
120 |
121 |
122 |
123 | make-assembly
124 | package
125 |
126 | single
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 | native
136 |
137 |
138 |
139 | org.slf4j
140 | slf4j-jdk14
141 | 1.7.31
142 |
143 |
144 |
145 | org.graalvm.sdk
146 | graal-sdk
147 | ${graalvm.version}
148 | provided
149 |
150 |
151 |
152 | ${final.name}
153 |
154 |
155 | org.graalvm.nativeimage
156 | native-image-maven-plugin
157 | 21.2.0
158 |
159 |
160 |
161 | native-image
162 |
163 | package
164 |
165 |
166 |
167 | false
168 | ${project.build.finalName}
169 | ${main.class}
170 |
171 | -H:+RemoveSaturatedTypeFlows
172 | --allow-incomplete-classpath
173 | --no-fallback
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/PaddleOcrServer.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.padddle.ocr;
2 |
3 | import java.util.Arrays;
4 |
5 | import com.litongjava.ai.djl.paddle.ocr.v4.PaddlePaddleOCRV4;
6 | import com.litongjava.jfinal.aop.annotation.AComponentScan;
7 | import com.litongjava.tio.boot.TioApplication;
8 |
9 | import cn.hutool.core.io.resource.ResourceUtil;
10 | import lombok.extern.slf4j.Slf4j;
11 |
12 | @AComponentScan
13 | @Slf4j
14 | public class PaddleOcrServer {
15 |
16 | public static void main(String[] args) throws Exception {
17 |
18 | boolean downloadMode = Arrays.asList(args).contains("--download");
19 | if (downloadMode) {
20 | log.info("downloadMode:{}", downloadMode);
21 | downloadAndTest();
22 | } else {
23 | long start = System.currentTimeMillis();
24 | TioApplication.run(PaddleOcrServer.class, args);
25 | long end = System.currentTimeMillis();
26 | System.out.println("started:" + (end - start) + "(ms)");
27 | }
28 | }
29 |
30 | private static void downloadAndTest() throws Exception {
31 | PaddlePaddleOCRV4.INSTANCE.init();
32 | long start = System.currentTimeMillis();
33 | String ocr = PaddlePaddleOCRV4.INSTANCE.ocr(ResourceUtil.getResource("images/flight_ticket.jpg"));
34 | long end = System.currentTimeMillis();
35 | System.out.println(ocr);
36 | System.out.println("inference time:" + (end - start) + "ms");
37 | }
38 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/config/PaddleOcrConfig.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.padddle.ocr.config;
2 |
3 | import com.litongjava.ai.djl.paddle.ocr.v4.PaddlePaddleOCRV4;
4 | import com.litongjava.ai.server.padddle.ocr.controller.IndexHandler;
5 | import com.litongjava.ai.server.padddle.ocr.controller.PaddleOcrHandler;
6 | import com.litongjava.jfinal.aop.annotation.AInitialization;
7 | import com.litongjava.jfinal.aop.annotation.BeforeStartConfiguration;
8 | import com.litongjava.tio.boot.server.TioBootServer;
9 | import com.litongjava.tio.http.server.handler.SimpleHttpRoutes;
10 |
11 | @BeforeStartConfiguration
12 | public class PaddleOcrConfig {
13 | @AInitialization
14 | public void initOcr() {
15 | // init ocr
16 | PaddlePaddleOCRV4.INSTANCE.init();
17 | // init handler
18 |
19 | // 创建simpleHttpRoutes
20 | SimpleHttpRoutes simpleHttpRoutes = new SimpleHttpRoutes();
21 | // 创建controller
22 | IndexHandler indexHandler = new IndexHandler();
23 | PaddleOcrHandler paddleOcrHandler = new PaddleOcrHandler();
24 |
25 | // 添加action
26 | simpleHttpRoutes.add("/", indexHandler::index);
27 | simpleHttpRoutes.add("/paddle/ocr/rec", paddleOcrHandler::rec);
28 | simpleHttpRoutes.add("/paddle/ocr/test", paddleOcrHandler::test);
29 |
30 | // 将simpleHttpRoutes添加到TioBootServer
31 | TioBootServer.me().setHttpRoutes(simpleHttpRoutes);
32 |
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/controller/IndexController.java:
--------------------------------------------------------------------------------
1 | //package com.litongjava.ai.server.padddle.ocr.controller;
2 | //
3 | //import com.litongjava.jfinal.aop.annotation.AController;
4 | //import com.litongjava.tio.http.server.annotation.EnableCORS;
5 | //import com.litongjava.tio.http.server.annotation.RequestPath;
6 | //
7 | //@EnableCORS
8 | //@AController
9 | //@RequestPath(value = "/")
10 | //public class IndexController {
11 | // @RequestPath()
12 | // public String respText() {
13 | // return "paddle-ocr-server";
14 | // }
15 | //}
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/controller/IndexHandler.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.padddle.ocr.controller;
2 |
3 | import com.litongjava.tio.http.common.HttpRequest;
4 | import com.litongjava.tio.http.common.HttpResponse;
5 | import com.litongjava.tio.http.server.util.Resps;
6 |
7 | public class IndexHandler {
8 |
9 | public HttpResponse index(HttpRequest httpRequest) {
10 | return Resps.txt(httpRequest, "paddle-ocr-server");
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/controller/PaddleOcrController.java:
--------------------------------------------------------------------------------
1 | //package com.litongjava.ai.server.padddle.ocr.controller;
2 | //
3 | //import java.net.URL;
4 | //
5 | //import com.litongjava.ai.djl.paddle.ocr.v4.PaddlePaddleOCRV4;
6 | //import com.litongjava.jfinal.aop.annotation.AController;
7 | //import com.litongjava.tio.http.common.HttpRequest;
8 | //import com.litongjava.tio.http.common.HttpResponse;
9 | //import com.litongjava.tio.http.common.UploadFile;
10 | //import com.litongjava.tio.http.server.annotation.EnableCORS;
11 | //import com.litongjava.tio.http.server.annotation.RequestPath;
12 | //import com.litongjava.tio.http.server.util.Resps;
13 | //import com.litongjava.tio.utils.resp.RespVo;
14 | //
15 | //import cn.hutool.core.io.resource.ResourceUtil;
16 | //
17 | //@EnableCORS
18 | //@AController
19 | //@RequestPath("/paddle/ocr")
20 | //public class PaddleOcrController {
21 | //
22 | // @RequestPath(value = "/rec")
23 | // public HttpResponse index(UploadFile file, String url, HttpRequest request) throws Exception {
24 | // String text = null;
25 | // if (url != null) {
26 | // text = PaddlePaddleOCRV4.INSTANCE.ocr(url);
27 | // } else if (file != null) {
28 | // byte[] fileData = file.getData();
29 | // text = PaddlePaddleOCRV4.INSTANCE.ocr(fileData);
30 | // }
31 | // if (text != null) {
32 | // return Resps.json(request, RespVo.ok(text));
33 | // } else {
34 | // return Resps.json(request, RespVo.fail());
35 | // }
36 | // }
37 | //
38 | // @RequestPath("/test")
39 | // public HttpResponse test(HttpRequest request) throws Exception {
40 | // URL resource = ResourceUtil.getResource("images/flight_ticket.jpg");
41 | // return Resps.json(request, RespVo.ok(PaddlePaddleOCRV4.INSTANCE.ocr(resource)));
42 | // }
43 | //
44 | //}
45 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-server/src/main/java/com/litongjava/ai/server/padddle/ocr/controller/PaddleOcrHandler.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.padddle.ocr.controller;
2 |
3 | import java.net.URL;
4 |
5 | import com.litongjava.ai.djl.paddle.ocr.v4.PaddlePaddleOCRV4;
6 | import com.litongjava.tio.http.common.HttpRequest;
7 | import com.litongjava.tio.http.common.HttpResponse;
8 | import com.litongjava.tio.http.common.UploadFile;
9 | import com.litongjava.tio.http.server.model.HttpCors;
10 | import com.litongjava.tio.http.server.util.HttpServerResponseUtils;
11 | import com.litongjava.tio.http.server.util.Resps;
12 | import com.litongjava.tio.utils.hutool.ResourceUtil;
13 | import com.litongjava.tio.utils.resp.RespVo;
14 |
15 | public class PaddleOcrHandler {
16 |
17 | public HttpResponse rec(HttpRequest httprequest) throws Exception {
18 | UploadFile file = httprequest.getUploadFile("file");
19 | String url = httprequest.getParam("url");
20 | String text = null;
21 | if (url != null) {
22 | text = PaddlePaddleOCRV4.INSTANCE.ocr(url);
23 | } else if (file != null) {
24 | byte[] fileData = file.getData();
25 | text = PaddlePaddleOCRV4.INSTANCE.ocr(fileData);
26 | }
27 | HttpResponse httpResponse = null;
28 | if (text != null) {
29 | httpResponse = Resps.json(httprequest, RespVo.ok(text));
30 | return httpResponse;
31 | } else {
32 | httpResponse = Resps.json(httprequest, RespVo.fail());
33 | }
34 | HttpServerResponseUtils.enableCORS(httpResponse, new HttpCors());
35 | return httpResponse;
36 | }
37 |
38 | public HttpResponse test(HttpRequest httprequest) throws Exception {
39 | URL resource = ResourceUtil.getResource("images/flight_ticket.jpg");
40 | return Resps.json(httprequest, RespVo.ok(PaddlePaddleOCRV4.INSTANCE.ocr(resource)));
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-server/src/main/resources/images/flight_ticket.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/paddle-ocr/paddle-ocr-server/src/main/resources/images/flight_ticket.jpg
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | com.litongjava
5 | paddle-ocr
6 | 1.0.0
7 |
8 | paddle-ocr-service
9 |
10 | UTF-8
11 | 1.8
12 | ${java.version}
13 | ${java.version}
14 | 1.18.30
15 | 1.2.3
16 | 2.0.24
17 | 0.25.0
18 | 5.8.11
19 |
20 |
21 |
22 |
23 |
24 | cn.hutool
25 | hutool-all
26 | ${hutool.version}
27 |
28 |
29 |
30 | org.apache.pdfbox
31 | pdfbox
32 | ${pdfbox.version}
33 |
34 |
35 |
36 | org.projectlombok
37 | lombok
38 | ${lombok.version}
39 | provided
40 |
41 |
42 | ch.qos.logback
43 | logback-classic
44 | ${logback.version}
45 |
46 |
47 |
48 |
49 | ai.djl
50 | api
51 | ${djl.version}
52 |
53 |
54 | ai.djl
55 | basicdataset
56 | ${djl.version}
57 |
58 |
59 | ai.djl
60 | model-zoo
61 | ${djl.version}
62 |
63 |
64 |
65 |
66 | ai.djl.pytorch
67 | pytorch-engine
68 | ${djl.version}
69 | runtime
70 |
71 |
72 |
73 | ai.djl.pytorch
74 | pytorch-jni
75 | 1.13.1-0.25.0
76 | runtime
77 |
78 |
79 |
80 | ai.djl.pytorch
81 | pytorch-native-cu117
82 | win-x86_64
83 | 1.13.1
84 | runtime
85 |
86 |
87 |
88 |
89 | ai.djl.onnxruntime
90 | onnxruntime-engine
91 | ${djl.version}
92 | runtime
93 |
94 |
95 | com.microsoft.onnxruntime
96 | onnxruntime
97 |
98 |
99 |
100 |
101 |
102 |
103 | com.microsoft.onnxruntime
104 | onnxruntime_gpu
105 | 1.14.0
106 | runtime
107 |
108 |
109 |
110 | ai.djl.opencv
111 | opencv
112 | ${djl.version}
113 |
114 |
115 |
116 | junit
117 | junit
118 | 4.13.2
119 | test
120 |
121 |
122 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/OcrV4DetExample.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 | import java.nio.file.Paths;
6 |
7 | import org.opencv.core.Mat;
8 |
9 | import com.litongjava.ai.djl.paddle.ocr.v4.common.ImageUtils;
10 | import com.litongjava.ai.djl.paddle.ocr.v4.detection.OcrV4Detection;
11 |
12 | import ai.djl.ModelException;
13 | import ai.djl.inference.Predictor;
14 | import ai.djl.modality.cv.Image;
15 | import ai.djl.ndarray.NDList;
16 | import ai.djl.ndarray.NDManager;
17 | import ai.djl.opencv.OpenCVImageFactory;
18 | import ai.djl.repository.zoo.ModelZoo;
19 | import ai.djl.repository.zoo.ZooModel;
20 | import ai.djl.translate.TranslateException;
21 |
22 | public final class OcrV4DetExample {
23 |
24 | private OcrV4DetExample() {
25 | }
26 |
27 | public static void main(String[] args) throws IOException, ModelException, TranslateException {
28 | Path imageFile = Paths.get("src/test/resources/2.jpg");
29 | Image image = OpenCVImageFactory.getInstance().fromFile(imageFile);
30 |
31 | OcrV4Detection detection = new OcrV4Detection();
32 | try (@SuppressWarnings("rawtypes")
33 | ZooModel detectionModel = ModelZoo.loadModel(detection.chDetCriteria()); @SuppressWarnings("unchecked")
34 | Predictor detector = detectionModel.newPredictor();
35 | NDManager manager = NDManager.newBaseManager();) {
36 |
37 | NDList dt_boxes = detector.predict(image);
38 | // 交给 NDManager自动管理内存
39 | // attach to manager for automatic memory management
40 | dt_boxes.attach(manager);
41 |
42 | for (int i = 0; i < dt_boxes.size(); i++) {
43 | ImageUtils.drawRect((Mat) image.getWrappedImage(), dt_boxes.get(i));
44 | }
45 | ImageUtils.saveImage(image, "detect_rect.png", "build/output");
46 | ((Mat) image.getWrappedImage()).release();
47 | }
48 | }
49 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/OcrV4RecExample.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4;
2 |
3 | import java.awt.image.BufferedImage;
4 | import java.io.IOException;
5 | import java.nio.file.Path;
6 | import java.nio.file.Paths;
7 | import java.util.ArrayList;
8 | import java.util.Collections;
9 | import java.util.List;
10 |
11 | import org.opencv.core.Mat;
12 | import org.slf4j.Logger;
13 | import org.slf4j.LoggerFactory;
14 |
15 | import com.litongjava.ai.djl.paddle.ocr.v4.common.ImageUtils;
16 | import com.litongjava.ai.djl.paddle.ocr.v4.common.RotatedBox;
17 | import com.litongjava.ai.djl.paddle.ocr.v4.common.RotatedBoxCompX;
18 | import com.litongjava.ai.djl.paddle.ocr.v4.detection.OcrV4Detection;
19 | import com.litongjava.ai.djl.paddle.ocr.v4.opencv.OpenCVUtils;
20 | import com.litongjava.ai.djl.paddle.ocr.v4.recognition.OcrV4Recognition;
21 |
22 | import ai.djl.ModelException;
23 | import ai.djl.inference.Predictor;
24 | import ai.djl.modality.cv.Image;
25 | import ai.djl.ndarray.NDList;
26 | import ai.djl.ndarray.NDManager;
27 | import ai.djl.opencv.OpenCVImageFactory;
28 | import ai.djl.repository.zoo.ModelZoo;
29 | import ai.djl.repository.zoo.ZooModel;
30 | import ai.djl.translate.TranslateException;
31 |
32 | /**
33 | * OCR V4模型 文字识别. 支持文本有旋转角度
34 | * OCR V4 model for text recognition. Supports text with rotation angles.
35 | */
36 | public final class OcrV4RecExample {
37 |
38 | private static final Logger logger = LoggerFactory.getLogger(OcrV4RecExample.class);
39 |
40 | private OcrV4RecExample() {
41 | }
42 |
43 | public static void main(String[] args) throws IOException, ModelException, TranslateException {
44 | // IDEA
45 | Path imageFile = Paths.get("E:\\code\\python\\project-litongjava\\cyg-v2\\img.png");
46 | Image image = OpenCVImageFactory.getInstance().fromFile(imageFile);
47 |
48 | OcrV4Detection detection = new OcrV4Detection();
49 | OcrV4Recognition recognition = new OcrV4Recognition();
50 | try (ZooModel detectionModel = ModelZoo.loadModel(detection.chDetCriteria());
51 | Predictor detector = detectionModel.newPredictor();
52 | ZooModel recognitionModel = ModelZoo.loadModel(recognition.chRecCriteria());
53 | Predictor recognizer = recognitionModel.newPredictor();
54 | NDManager manager = NDManager.newBaseManager()) {
55 |
56 | long timeInferStart = System.currentTimeMillis();
57 | List detections = recognition.predict(manager, image, detector, recognizer);
58 |
59 | // for (int i = 0; i < 1000; i++) {
60 | // detections = recognition.predict(image, detector, recognizer);
61 | // for (RotatedBox result : detections) {
62 | // System.out.println(result.getText());
63 | // }
64 | // System.out.println("index : " + i);
65 | // }
66 |
67 | long timeInferEnd = System.currentTimeMillis();
68 | System.out.println("time: " + (timeInferEnd - timeInferStart));
69 |
70 | // 对检测结果根据坐标位置,根据从上到下,从做到右,重新排序,下面算法对图片倾斜旋转角度较小的情形适用
71 | // 如果图片旋转角度较大,则需要自行改进算法,需要根据斜率校正计算位置。
72 | // Reorder the detection results based on the coordinate positions, from top to bottom, from left to right. The algorithm below is suitable for situations where the image is slightly tilted or rotated.
73 | // If the image rotation angle is large, the algorithm needs to be improved, and the position needs to be calculated based on the slope correction.
74 | List initList = new ArrayList<>();
75 | if (detections != null) {
76 | for (RotatedBox result : detections) {
77 | // put low Y value at the head of the queue.
78 | initList.add(result);
79 | }
80 | }
81 |
82 | Collections.sort(initList);
83 |
84 | List> lines = new ArrayList<>();
85 | List line = new ArrayList<>();
86 | if (initList.size() > 0) {
87 | RotatedBoxCompX firstBox = new RotatedBoxCompX(initList.get(0).getBox(), initList.get(0).getText());
88 | line.add(firstBox);
89 | lines.add((ArrayList) line);
90 | for (int i = 1; i < initList.size(); i++) {
91 | RotatedBoxCompX tmpBox = new RotatedBoxCompX(initList.get(i).getBox(), initList.get(i).getText());
92 | float y1 = firstBox.getBox().toFloatArray()[1];
93 | float y2 = tmpBox.getBox().toFloatArray()[1];
94 | float dis = Math.abs(y2 - y1);
95 | if (dis < 20) { // 认为是同 1 行 - Considered to be in the same line
96 | line.add(tmpBox);
97 | } else { // 换行 - Line break
98 | firstBox = tmpBox;
99 | Collections.sort(line);
100 | line = new ArrayList<>();
101 | line.add(firstBox);
102 | lines.add((ArrayList) line);
103 | }
104 | }
105 | }
106 |
107 |
108 | String fullText = "";
109 | for (int i = 0; i < lines.size(); i++) {
110 | for (int j = 0; j < lines.get(i).size(); j++) {
111 | String text = lines.get(i).get(j).getText();
112 | if (text.trim().equals(""))
113 | continue;
114 | fullText += text + " ";
115 | }
116 | fullText += '\n';
117 | }
118 |
119 | System.out.println(fullText);
120 |
121 | // 转 BufferedImage 解决 Imgproc.putText 中文乱码问题
122 | Mat wrappedImage = (Mat) image.getWrappedImage();
123 | BufferedImage bufferedImage = OpenCVUtils.mat2Image(wrappedImage);
124 | for (RotatedBox result : detections) {
125 | ImageUtils.drawImageRectWithText(bufferedImage, result.getBox(), result.getText());
126 | }
127 |
128 | Mat image2Mat = OpenCVUtils.image2Mat(bufferedImage);
129 | image = OpenCVImageFactory.getInstance().fromImage(image2Mat);
130 | ImageUtils.saveImage(image, "ocr_result.png", "build/output");
131 |
132 | wrappedImage.release();
133 | image2Mat.release();
134 |
135 | logger.info("{}", detections);
136 | }
137 | }
138 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/OcrV4RecTensorExample.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4;
2 |
3 | import java.nio.file.Path;
4 | import java.nio.file.Paths;
5 |
6 | import com.litongjava.ai.djl.paddle.ocr.v4.recognition.OcrV4Recognition;
7 |
8 | import ai.djl.inference.Predictor;
9 | import ai.djl.modality.cv.Image;
10 | import ai.djl.opencv.OpenCVImageFactory;
11 | import ai.djl.repository.zoo.ModelZoo;
12 | import ai.djl.repository.zoo.ZooModel;
13 | import lombok.Cleanup;
14 |
15 | public class OcrV4RecTensorExample {
16 | public static void main(String[] args) {
17 | Path imageFile = Paths.get("E:\\code\\python\\project-litongjava\\cyg-v2\\img.png");
18 | Image image;
19 | try {
20 | image = OpenCVImageFactory.getInstance().fromFile(imageFile);
21 | OcrV4Recognition recognition = new OcrV4Recognition();
22 |
23 | @Cleanup
24 | ZooModel recognitionModel = ModelZoo.loadModel(recognition.chRecCriteria());
25 |
26 | @Cleanup
27 | Predictor newPredictor = recognitionModel.newPredictor();
28 | String predict = newPredictor.predict(image);
29 | System.out.println("result:" + predict);
30 | } catch (Exception e) {
31 | e.printStackTrace();
32 | }
33 |
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/PaddlePaddleOCRV4.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4;
2 |
3 | import java.io.ByteArrayInputStream;
4 | import java.io.File;
5 | import java.io.IOException;
6 | import java.net.URL;
7 | import java.nio.file.Path;
8 | import java.util.ArrayList;
9 | import java.util.Collections;
10 | import java.util.List;
11 |
12 | import com.litongjava.ai.djl.paddle.ocr.v4.common.RotatedBox;
13 | import com.litongjava.ai.djl.paddle.ocr.v4.common.RotatedBoxCompX;
14 | import com.litongjava.ai.djl.paddle.ocr.v4.detection.OcrV4Detection;
15 | import com.litongjava.ai.djl.paddle.ocr.v4.recognition.OcrV4Recognition;
16 |
17 | import ai.djl.MalformedModelException;
18 | import ai.djl.inference.Predictor;
19 | import ai.djl.modality.cv.Image;
20 | import ai.djl.modality.cv.ImageFactory;
21 | import ai.djl.ndarray.NDList;
22 | import ai.djl.ndarray.NDManager;
23 | import ai.djl.opencv.OpenCVImageFactory;
24 | import ai.djl.repository.zoo.ModelNotFoundException;
25 | import ai.djl.repository.zoo.ModelZoo;
26 | import ai.djl.repository.zoo.ZooModel;
27 |
28 | /**
29 | * Created by Tong Li on 11/23/2023_2:09 AM
30 | */
31 | public enum PaddlePaddleOCRV4 {
32 | INSTANCE;
33 |
34 | private OcrV4Detection detection;
35 | private OcrV4Recognition recognition;
36 | private Predictor detector;
37 | private Predictor recognizer;
38 | private NDManager manager;
39 |
40 | PaddlePaddleOCRV4() {
41 | detection = new OcrV4Detection();
42 | recognition = new OcrV4Recognition();
43 | ZooModel detectionModel = null;
44 | ZooModel recognitionModel = null;
45 | try {
46 | detectionModel = ModelZoo.loadModel(detection.chDetCriteria());
47 | recognitionModel = ModelZoo.loadModel(recognition.chRecCriteria());
48 | } catch (IOException e) {
49 | e.printStackTrace();
50 | } catch (ModelNotFoundException e) {
51 | e.printStackTrace();
52 | } catch (MalformedModelException e) {
53 | e.printStackTrace();
54 | }
55 | detector = detectionModel.newPredictor();
56 |
57 | recognizer = recognitionModel.newPredictor();
58 | manager = NDManager.newBaseManager();
59 | }
60 |
61 | // noting not to do.but init
62 | public void init() {
63 |
64 | }
65 |
66 | public String ocr(String url) throws Exception {
67 | Image image = OpenCVImageFactory.getInstance().fromUrl(url);
68 | return ocr(image);
69 | }
70 |
71 | public String ocr(URL resource) throws Exception {
72 | Image image = OpenCVImageFactory.getInstance().fromUrl(resource);
73 | return ocr(image);
74 | }
75 |
76 | public String ocr(byte[] fileData) throws Exception {
77 | ByteArrayInputStream is = new ByteArrayInputStream(fileData);
78 | Image image = ImageFactory.getInstance().fromInputStream(is);
79 | return ocr(image);
80 | }
81 |
82 | public String ocr(File imageFile) throws Exception {
83 | Path path = imageFile.toPath();
84 | Image image = OpenCVImageFactory.getInstance().fromFile(path);
85 | return ocr(image);
86 | }
87 |
88 | public String ocr(Image image) throws Exception {
89 | List detections = recognition.predict(manager, image, detector, recognizer);
90 | if (detections == null) {
91 | return null;
92 | }
93 |
94 | List initList = new ArrayList<>();
95 | for (RotatedBox result : detections) {
96 | // put low Y value at the head of the queue.
97 | initList.add(result);
98 | }
99 | Collections.sort(initList);
100 |
101 | List> lines = new ArrayList<>();
102 | List line = new ArrayList<>();
103 | RotatedBoxCompX firstBox = new RotatedBoxCompX(initList.get(0).getBox(), initList.get(0).getText());
104 | line.add(firstBox);
105 | lines.add((ArrayList) line);
106 | for (int i = 1; i < initList.size(); i++) {
107 | RotatedBoxCompX tmpBox = new RotatedBoxCompX(initList.get(i).getBox(), initList.get(i).getText());
108 | float y1 = firstBox.getBox().toFloatArray()[1];
109 | float y2 = tmpBox.getBox().toFloatArray()[1];
110 | float dis = Math.abs(y2 - y1);
111 | if (dis < 20) { // 认为是同 1 行 - Considered to be in the same line
112 | line.add(tmpBox);
113 | } else { // 换行 - Line break
114 | firstBox = tmpBox;
115 | Collections.sort(line);
116 | line = new ArrayList<>();
117 | line.add(firstBox);
118 | lines.add((ArrayList) line);
119 | }
120 | }
121 |
122 | StringBuffer fullText = new StringBuffer();
123 | for (int i = 0; i < lines.size(); i++) {
124 | for (int j = 0; j < lines.get(i).size(); j++) {
125 | String text = lines.get(i).get(j).getText();
126 | if (text.trim().equals(""))
127 | continue;
128 | fullText.append(text + " ");
129 | }
130 | fullText.append('\n');
131 | }
132 | return fullText.toString();
133 | }
134 |
135 | public void close() {
136 | detector.close();
137 | recognizer.close();
138 | }
139 |
140 | }
141 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/common/ImageUtils.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.common;
2 |
3 | import java.awt.BasicStroke;
4 | import java.awt.Color;
5 | import java.awt.Font;
6 | import java.awt.Graphics;
7 | import java.awt.Graphics2D;
8 | import java.awt.image.BufferedImage;
9 | import java.io.IOException;
10 | import java.nio.file.Files;
11 | import java.nio.file.Path;
12 | import java.nio.file.Paths;
13 | import java.util.ArrayList;
14 | import java.util.List;
15 |
16 | import org.opencv.core.Mat;
17 | import org.opencv.core.Point;
18 | import org.opencv.core.Scalar;
19 | import org.opencv.imgproc.Imgproc;
20 |
21 | import ai.djl.modality.cv.Image;
22 | import ai.djl.modality.cv.ImageFactory;
23 | import ai.djl.modality.cv.output.DetectedObjects;
24 | import ai.djl.ndarray.NDArray;
25 |
26 | /**
27 | * 图像工具类
28 | */
29 | public class ImageUtils {
30 |
31 | /**
32 | * 保存BufferedImage图片
33 | *
34 | * @param img
35 | * @param name
36 | * @param path
37 | */
38 | public static void saveImage(BufferedImage img, String name, String path) {
39 | Image djlImg = ImageFactory.getInstance().fromImage(img); // 支持多种图片格式,自动适配
40 | Path outputDir = Paths.get(path);
41 | Path imagePath = outputDir.resolve(name);
42 | // OpenJDK 不能保存 jpg 图片的 alpha channel
43 | try {
44 | djlImg.save(Files.newOutputStream(imagePath), "png");
45 | } catch (IOException e) {
46 | e.printStackTrace();
47 | }
48 | }
49 |
50 | /**
51 | * 保存DJL图片
52 | *
53 | * @param img
54 | * @param name
55 | * @param path
56 | */
57 | public static void saveImage(Image img, String name, String path) {
58 | Path outputDir = Paths.get(path);
59 | if (!Files.exists(outputDir)) {
60 | try {
61 | Files.createDirectories(outputDir);
62 | } catch (IOException e) {
63 | e.printStackTrace();
64 | }
65 | }
66 | Path imagePath = outputDir.resolve(name);
67 | // OpenJDK 不能保存 jpg 图片的 alpha channel
68 | try {
69 | img.save(Files.newOutputStream(imagePath), "png");
70 | } catch (IOException e) {
71 | e.printStackTrace();
72 | }
73 | }
74 |
75 | /**
76 | * 保存图片,含检测框
77 | *
78 | * @param img
79 | * @param detection
80 | * @param name
81 | * @param path
82 | * @throws IOException
83 | */
84 | public static void saveBoundingBoxImage(Image img, DetectedObjects detection, String name, String path)
85 | throws IOException {
86 | // Make image copy with alpha channel because original image was jpg
87 | img.drawBoundingBoxes(detection);
88 | Path outputDir = Paths.get(path);
89 | Files.createDirectories(outputDir);
90 | Path imagePath = outputDir.resolve(name);
91 | // OpenJDK can't save jpg with alpha channel
92 | img.save(Files.newOutputStream(imagePath), "png");
93 | }
94 |
95 | /**
96 | * 画矩形
97 | *
98 | * @param mat
99 | * @param box
100 | */
101 | public static void drawRect(Mat mat, NDArray box) {
102 |
103 | float[] points = box.toFloatArray();
104 | List list = new ArrayList<>();
105 |
106 | for (int i = 0; i < 4; i++) {
107 | Point point = new Point((int) points[2 * i], (int) points[2 * i + 1]);
108 | list.add(point);
109 | }
110 |
111 | Imgproc.line(mat, list.get(0), list.get(1), new Scalar(0, 255, 0), 1);
112 | Imgproc.line(mat, list.get(1), list.get(2), new Scalar(0, 255, 0), 1);
113 | Imgproc.line(mat, list.get(2), list.get(3), new Scalar(0, 255, 0), 1);
114 | Imgproc.line(mat, list.get(3), list.get(0), new Scalar(0, 255, 0), 1);
115 | }
116 |
117 | /**
118 | * 画矩形
119 | *
120 | * @param mat
121 | * @param box
122 | * @param text
123 | */
124 | public static void drawRectWithText(Mat mat, NDArray box, String text) {
125 |
126 | float[] points = box.toFloatArray();
127 | List list = new ArrayList<>();
128 |
129 | for (int i = 0; i < 4; i++) {
130 | Point point = new Point((int) points[2 * i], (int) points[2 * i + 1]);
131 | list.add(point);
132 | }
133 |
134 | Imgproc.line(mat, list.get(0), list.get(1), new Scalar(0, 255, 0), 1);
135 | Imgproc.line(mat, list.get(1), list.get(2), new Scalar(0, 255, 0), 1);
136 | Imgproc.line(mat, list.get(2), list.get(3), new Scalar(0, 255, 0), 1);
137 | Imgproc.line(mat, list.get(3), list.get(0), new Scalar(0, 255, 0), 1);
138 | // 中文乱码
139 | Imgproc.putText(mat, text, list.get(0), Imgproc.FONT_HERSHEY_SCRIPT_SIMPLEX, 1.0, new Scalar(0, 255, 0), 1);
140 | }
141 |
142 | /**
143 | * 画检测框(有倾斜角)
144 | *
145 | * @param image
146 | * @param box
147 | */
148 | public static void drawImageRect(BufferedImage image, NDArray box) {
149 | float[] points = box.toFloatArray();
150 | int[] xPoints = new int[5];
151 | int[] yPoints = new int[5];
152 |
153 | for (int i = 0; i < 4; i++) {
154 | xPoints[i] = (int) points[2 * i];
155 | yPoints[i] = (int) points[2 * i + 1];
156 | }
157 | xPoints[4] = xPoints[0];
158 | yPoints[4] = yPoints[0];
159 |
160 | // 将绘制图像转换为Graphics2D
161 | Graphics2D g = (Graphics2D) image.getGraphics();
162 | try {
163 | g.setColor(new Color(0, 255, 0));
164 | // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角
165 | BasicStroke bStroke = new BasicStroke(4, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER);
166 | g.setStroke(bStroke);
167 | g.drawPolyline(xPoints, yPoints, 5); // xPoints, yPoints, nPoints
168 | } finally {
169 | g.dispose();
170 | }
171 | }
172 |
173 | /**
174 | * 画检测框(有倾斜角)和文本
175 | *
176 | * @param image
177 | * @param box
178 | * @param text
179 | */
180 | public static void drawImageRectWithText(BufferedImage image, NDArray box, String text) {
181 | float[] points = box.toFloatArray();
182 | int[] xPoints = new int[5];
183 | int[] yPoints = new int[5];
184 |
185 | for (int i = 0; i < 4; i++) {
186 | xPoints[i] = (int) points[2 * i];
187 | yPoints[i] = (int) points[2 * i + 1];
188 | }
189 | xPoints[4] = xPoints[0];
190 | yPoints[4] = yPoints[0];
191 |
192 | // 将绘制图像转换为Graphics2D
193 | Graphics2D g = (Graphics2D) image.getGraphics();
194 | try {
195 | int fontSize = 32;
196 | Font font = new Font("楷体", Font.PLAIN, fontSize);
197 | g.setFont(font);
198 | g.setColor(new Color(0, 0, 255));
199 | // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角
200 | BasicStroke bStroke = new BasicStroke(2, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER);
201 | g.setStroke(bStroke);
202 | g.drawPolyline(xPoints, yPoints, 5); // xPoints, yPoints, nPoints
203 | g.drawString(text, xPoints[0], yPoints[0]);
204 | } finally {
205 | g.dispose();
206 | }
207 | }
208 |
209 | /**
210 | * 画检测框
211 | *
212 | * @param image
213 | * @param x
214 | * @param y
215 | * @param width
216 | * @param height
217 | */
218 | public static void drawImageRect(BufferedImage image, int x, int y, int width, int height) {
219 | // 将绘制图像转换为Graphics2D
220 | Graphics2D g = (Graphics2D) image.getGraphics();
221 | try {
222 | g.setColor(new Color(0, 255, 0));
223 | // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角
224 | BasicStroke bStroke = new BasicStroke(2, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER);
225 | g.setStroke(bStroke);
226 | g.drawRect(x, y, width, height);
227 | } finally {
228 | g.dispose();
229 | }
230 | }
231 |
232 | /**
233 | * 显示文字
234 | *
235 | * @param image
236 | * @param text
237 | * @param x
238 | * @param y
239 | */
240 | public static void drawImageText(BufferedImage image, String text, int x, int y) {
241 | Graphics graphics = image.getGraphics();
242 | int fontSize = 32;
243 | Font font = new Font("楷体", Font.PLAIN, fontSize);
244 | try {
245 | graphics.setFont(font);
246 | graphics.setColor(new Color(0, 0, 255));
247 | //int strWidth = graphics.getFontMetrics().stringWidth(text);
248 | graphics.drawString(text, x, y);
249 | } finally {
250 | graphics.dispose();
251 | }
252 | }
253 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/common/RotatedBox.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.common;
2 |
3 | import ai.djl.ndarray.NDArray;
4 |
5 | /**
6 | * 旋转检测框
7 | */
8 | public class RotatedBox implements Comparable {
9 | private NDArray box;
10 | private String text;
11 |
12 | public RotatedBox(NDArray box, String text) {
13 | this.box = box;
14 | this.text = text;
15 | }
16 |
17 | /**
18 | * 将左上角 Y 坐标升序排序
19 | *
20 | * @param o
21 | * @return
22 | */
23 | @Override
24 | public int compareTo(RotatedBox o) {
25 | NDArray lowBox = this.getBox();
26 | NDArray highBox = o.getBox();
27 | float lowY = lowBox.toFloatArray()[1];
28 | float highY = highBox.toFloatArray()[1];
29 | return (lowY < highY) ? -1 : 1;
30 | }
31 |
32 | public NDArray getBox() {
33 | return box;
34 | }
35 |
36 | public void setBox(NDArray box) {
37 | this.box = box;
38 | }
39 |
40 | public String getText() {
41 | return text;
42 | }
43 |
44 | public void setText(String text) {
45 | this.text = text;
46 | }
47 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/common/RotatedBoxCompX.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.common;
2 |
3 | import ai.djl.ndarray.NDArray;
4 |
5 | /**
6 | */
7 | public class RotatedBoxCompX implements Comparable {
8 | private NDArray box;
9 | private String text;
10 |
11 | public RotatedBoxCompX(NDArray box, String text) {
12 | this.box = box;
13 | this.text = text;
14 | }
15 |
16 | /**
17 | * 将左上角 X 坐标升序排序
18 | *
19 | * @param o
20 | * @return
21 | */
22 | @Override
23 | public int compareTo(RotatedBoxCompX o) {
24 | NDArray leftBox = this.getBox();
25 | NDArray rightBox = o.getBox();
26 | float leftX = leftBox.toFloatArray()[0];
27 | float rightX = rightBox.toFloatArray()[0];
28 | return (leftX < rightX) ? -1 : 1;
29 | }
30 |
31 | public NDArray getBox() {
32 | return box;
33 | }
34 |
35 | public void setBox(NDArray box) {
36 | this.box = box;
37 | }
38 |
39 | public String getText() {
40 | return text;
41 | }
42 |
43 | public void setText(String text) {
44 | this.text = text;
45 | }
46 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/detection/OCRDetectionTranslator.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.detection;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 | import java.util.Map;
6 |
7 | import org.opencv.core.Core;
8 | import org.opencv.core.CvType;
9 | import org.opencv.core.Mat;
10 | import org.opencv.core.MatOfPoint;
11 | import org.opencv.core.MatOfPoint2f;
12 | import org.opencv.core.RotatedRect;
13 | import org.opencv.core.Scalar;
14 | import org.opencv.imgproc.Imgproc;
15 |
16 | import com.litongjava.ai.djl.paddle.ocr.v4.opencv.NDArrayUtils;
17 |
18 | import ai.djl.modality.cv.Image;
19 | import ai.djl.modality.cv.util.NDImageUtils;
20 | import ai.djl.ndarray.NDArray;
21 | import ai.djl.ndarray.NDArrays;
22 | import ai.djl.ndarray.NDList;
23 | import ai.djl.ndarray.NDManager;
24 | import ai.djl.ndarray.index.NDIndex;
25 | import ai.djl.ndarray.types.DataType;
26 | import ai.djl.ndarray.types.Shape;
27 | import ai.djl.translate.Batchifier;
28 | import ai.djl.translate.Translator;
29 | import ai.djl.translate.TranslatorContext;
30 |
31 | /**
32 | * 文字检测前后处理
33 | */
34 | public class OCRDetectionTranslator implements Translator {
35 | // det_algorithm == "DB"
36 | private final float thresh = 0.3f;
37 | private final boolean use_dilation = false;
38 | private final String score_mode = "fast";
39 | private final String box_type = "quad";
40 |
41 | private final int limit_side_len;
42 | private final int max_candidates;
43 | private final int min_size;
44 | private final float box_thresh;
45 | private final float unclip_ratio;
46 | private float ratio_h;
47 | private float ratio_w;
48 | private int img_height;
49 | private int img_width;
50 |
51 | public OCRDetectionTranslator(Map arguments) {
52 | limit_side_len =
53 | arguments.containsKey("limit_side_len")
54 | ? Integer.parseInt(arguments.get("limit_side_len").toString())
55 | : 960;
56 | max_candidates =
57 | arguments.containsKey("max_candidates")
58 | ? Integer.parseInt(arguments.get("max_candidates").toString())
59 | : 1000;
60 | min_size =
61 | arguments.containsKey("min_size")
62 | ? Integer.parseInt(arguments.get("min_size").toString())
63 | : 3;
64 | box_thresh =
65 | arguments.containsKey("box_thresh")
66 | ? Float.parseFloat(arguments.get("box_thresh").toString())
67 | : 0.6f; // 0.5f
68 | unclip_ratio =
69 | arguments.containsKey("unclip_ratio")
70 | ? Float.parseFloat(arguments.get("unclip_ratio").toString())
71 | : 1.6f;
72 | }
73 |
74 | @Override
75 | public NDList processOutput(TranslatorContext ctx, NDList list) {
76 | NDManager manager = ctx.getNDManager();
77 | NDArray pred = list.get(0);
78 | pred = pred.squeeze();
79 | NDArray segmentation = pred.gt(thresh); // thresh=0.3 .mul(255f)
80 |
81 | segmentation = segmentation.toType(DataType.UINT8, true);
82 | Shape shape = segmentation.getShape();
83 | int rows = (int) shape.get(0);
84 | int cols = (int) shape.get(1);
85 |
86 | Mat newMask = new Mat();
87 | if (this.use_dilation) {
88 | Mat mask = new Mat();
89 | //convert from NDArray to Mat
90 | Mat srcMat = NDArrayUtils.uint8NDArrayToMat(segmentation);
91 | // size 越小,腐蚀的单位越小,图片越接近原图
92 | // Mat dilation_kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(2, 2));
93 | Mat dilation_kernel = NDArrayUtils.uint8ArrayToMat(new byte[][]{{1, 1}, {1, 1}});
94 | /**
95 | * 膨胀说明: 图像的一部分区域与指定的核进行卷积, 求核的最`大`值并赋值给指定区域。 膨胀可以理解为图像中`高亮区域`的'领域扩大'。
96 | * 意思是高亮部分会侵蚀不是高亮的部分,使高亮部分越来越多。
97 | */
98 | Imgproc.dilate(srcMat, mask, dilation_kernel);
99 | //destination Matrix
100 | Scalar scalar = new Scalar(255);
101 | Core.multiply(mask, scalar, newMask);
102 | // release Mat
103 | mask.release();
104 | srcMat.release();
105 | dilation_kernel.release();
106 | } else {
107 | Mat srcMat = NDArrayUtils.uint8NDArrayToMat(segmentation);
108 | //destination Matrix
109 | Scalar scalar = new Scalar(255);
110 | Core.multiply(srcMat, scalar, newMask);
111 | // release Mat
112 | srcMat.release();
113 | }
114 |
115 | NDList dt_boxes = null;
116 | NDArray boxes = boxes_from_bitmap(manager, pred, newMask);
117 | if (boxes != null) {
118 | //boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
119 | NDArray boxes1 = boxes.get(":, :, 0").div(ratio_w);
120 | boxes.set(new NDIndex(":, :, 0"), boxes1);
121 | //boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
122 | NDArray boxes2 = boxes.get(":, :, 1").div(ratio_h);
123 | boxes.set(new NDIndex(":, :, 1"), boxes2);
124 |
125 | dt_boxes = this.filter_tag_det_res(boxes);
126 |
127 | dt_boxes.detach();
128 | }
129 |
130 | // release Mat
131 | newMask.release();
132 |
133 | return dt_boxes;
134 | }
135 |
136 |
137 | private NDList filter_tag_det_res(NDArray dt_boxes) {
138 | NDList boxesList = new NDList();
139 |
140 | int num = (int) dt_boxes.getShape().get(0);
141 | for (int i = 0; i < num; i++) {
142 | NDArray box = dt_boxes.get(i);
143 | box = order_points_clockwise(box);
144 | box = clip_det_res(box);
145 | float[] box0 = box.get(0).toFloatArray();
146 | float[] box1 = box.get(1).toFloatArray();
147 | float[] box3 = box.get(3).toFloatArray();
148 | int rect_width = (int) Math.sqrt(Math.pow(box1[0] - box0[0], 2) + Math.pow(box1[1] - box0[1], 2));
149 | int rect_height = (int) Math.sqrt(Math.pow(box3[0] - box0[0], 2) + Math.pow(box3[1] - box0[1], 2));
150 | if (rect_width <= 3 || rect_height <= 3)
151 | continue;
152 | boxesList.add(box);
153 | }
154 |
155 | return boxesList;
156 | }
157 |
158 | private NDArray clip_det_res(NDArray points) {
159 | for (int i = 0; i < points.getShape().get(0); i++) {
160 | int value = Math.max((int) points.get(i, 0).toFloatArray()[0], 0);
161 | value = Math.min(value, img_width - 1);
162 | points.set(new NDIndex(i + ",0"), value);
163 | value = Math.max((int) points.get(i, 1).toFloatArray()[0], 0);
164 | value = Math.min(value, img_height - 1);
165 | points.set(new NDIndex(i + ",1"), value);
166 | }
167 |
168 | return points;
169 | }
170 |
171 | /**
172 | * sort the points based on their x-coordinates
173 | * 顺时针
174 | *
175 | * @param pts
176 | * @return
177 | */
178 |
179 | private NDArray order_points_clockwise(NDArray pts) {
180 | NDList list = new NDList();
181 | long[] indexes = pts.get(":, 0").argSort().toLongArray();
182 |
183 | // grab the left-most and right-most points from the sorted
184 | // x-roodinate points
185 | Shape s1 = pts.getShape();
186 | NDArray leftMost1 = pts.get(indexes[0] + ",:");
187 | NDArray leftMost2 = pts.get(indexes[1] + ",:");
188 | NDArray leftMost = leftMost1.concat(leftMost2).reshape(2, 2);
189 | NDArray rightMost1 = pts.get(indexes[2] + ",:");
190 | NDArray rightMost2 = pts.get(indexes[3] + ",:");
191 | NDArray rightMost = rightMost1.concat(rightMost2).reshape(2, 2);
192 |
193 | // now, sort the left-most coordinates according to their
194 | // y-coordinates so we can grab the top-left and bottom-left
195 | // points, respectively
196 | indexes = leftMost.get(":, 1").argSort().toLongArray();
197 | NDArray lt = leftMost.get(indexes[0] + ",:");
198 | NDArray lb = leftMost.get(indexes[1] + ",:");
199 | indexes = rightMost.get(":, 1").argSort().toLongArray();
200 | NDArray rt = rightMost.get(indexes[0] + ",:");
201 | NDArray rb = rightMost.get(indexes[1] + ",:");
202 |
203 | list.add(lt);
204 | list.add(rt);
205 | list.add(rb);
206 | list.add(lb);
207 |
208 | NDArray rect = NDArrays.concat(list).reshape(4, 2);
209 | return rect;
210 | }
211 |
212 | /**
213 | * Get boxes from the binarized image predicted by DB
214 | *
215 | * @param manager
216 | * @param pred the binarized image predicted by DB.
217 | * @param bitmap new 'pred' after threshold filtering.
218 | */
219 | private NDArray boxes_from_bitmap(NDManager manager, NDArray pred, Mat bitmap) {
220 | int dest_height = (int) pred.getShape().get(0);
221 | int dest_width = (int) pred.getShape().get(1);
222 | int height = bitmap.rows();
223 | int width = bitmap.cols();
224 |
225 | List contours = new ArrayList<>();
226 | Mat hierarchy = new Mat();
227 | // 寻找轮廓
228 | Imgproc.findContours(
229 | bitmap,
230 | contours,
231 | hierarchy,
232 | Imgproc.RETR_LIST,
233 | Imgproc.CHAIN_APPROX_SIMPLE);
234 |
235 | int num_contours = Math.min(contours.size(), max_candidates);
236 | NDList boxList = new NDList();
237 | float[] scores = new float[num_contours];
238 |
239 | for (int index = 0; index < num_contours; index++) {
240 | MatOfPoint contour = contours.get(index);
241 | MatOfPoint2f newContour = new MatOfPoint2f(contour.toArray());
242 | float[][] pointsArr = new float[4][2];
243 | int sside = get_mini_boxes(newContour, pointsArr);
244 | if (sside < this.min_size)
245 | continue;
246 | NDArray points = manager.create(pointsArr);
247 | float score = box_score_fast(manager, pred, points);
248 | if (score < this.box_thresh)
249 | continue;
250 |
251 | NDArray box = unclip(manager, points); // TODO get_mini_boxes(box)
252 |
253 | // box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
254 | NDArray boxes1 = box.get(":,0").div(width).mul(dest_width).round().clip(0, dest_width);
255 | box.set(new NDIndex(":, 0"), boxes1);
256 | // box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height)
257 | NDArray boxes2 = box.get(":,1").div(height).mul(dest_height).round().clip(0, dest_height);
258 | box.set(new NDIndex(":, 1"), boxes2);
259 |
260 | boxList.add(box);
261 | scores[index] = score;
262 |
263 | // release memory
264 | contour.release();
265 | newContour.release();
266 | }
267 |
268 | // release
269 | hierarchy.release();
270 |
271 | NDArray boxes = null;
272 | if (boxList.size() > 0) {
273 | boxes = NDArrays.stack(boxList);
274 | return boxes;
275 | }
276 |
277 | return boxes;
278 |
279 |
280 | }
281 |
282 | /**
283 | * Shrink or expand the boxaccording to 'unclip_ratio'
284 | *
285 | * @param points The predicted box.
286 | * @return uncliped box
287 | */
288 | private NDArray unclip(NDManager manager, NDArray points) {
289 | points = order_points_clockwise(points);
290 | float[] pointsArr = points.toFloatArray();
291 | float[] lt = java.util.Arrays.copyOfRange(pointsArr, 0, 2);
292 | float[] lb = java.util.Arrays.copyOfRange(pointsArr, 6, 8);
293 |
294 | float[] rt = java.util.Arrays.copyOfRange(pointsArr, 2, 4);
295 | float[] rb = java.util.Arrays.copyOfRange(pointsArr, 4, 6);
296 |
297 | float width = distance(lt, rt);
298 | float height = distance(lt, lb);
299 |
300 | if (width > height) {
301 | float k = (lt[1] - rt[1]) / (lt[0] - rt[0]); // y = k * x + b
302 |
303 | float delta_dis = height;
304 | float delta_x = (float) Math.sqrt((delta_dis * delta_dis) / (k * k + 1));
305 | float delta_y = Math.abs(k * delta_x);
306 |
307 | if (k > 0) {
308 | pointsArr[0] = lt[0] - delta_x + delta_y;
309 | pointsArr[1] = lt[1] - delta_y - delta_x;
310 | pointsArr[2] = rt[0] + delta_x + delta_y;
311 | pointsArr[3] = rt[1] + delta_y - delta_x;
312 |
313 | pointsArr[4] = rb[0] + delta_x - delta_y;
314 | pointsArr[5] = rb[1] + delta_y + delta_x;
315 | pointsArr[6] = lb[0] - delta_x - delta_y;
316 | pointsArr[7] = lb[1] - delta_y + delta_x;
317 | } else {
318 | pointsArr[0] = lt[0] - delta_x - delta_y;
319 | pointsArr[1] = lt[1] + delta_y - delta_x;
320 | pointsArr[2] = rt[0] + delta_x - delta_y;
321 | pointsArr[3] = rt[1] - delta_y - delta_x;
322 |
323 | pointsArr[4] = rb[0] + delta_x + delta_y;
324 | pointsArr[5] = rb[1] - delta_y + delta_x;
325 | pointsArr[6] = lb[0] - delta_x + delta_y;
326 | pointsArr[7] = lb[1] + delta_y + delta_x;
327 | }
328 | } else {
329 | float k = (lt[1] - rt[1]) / (lt[0] - rt[0]); // y = k * x + b
330 |
331 | float delta_dis = width;
332 | float delta_y = (float) Math.sqrt((delta_dis * delta_dis) / (k * k + 1));
333 | float delta_x = Math.abs(k * delta_y);
334 |
335 | if (k > 0) {
336 | pointsArr[0] = lt[0] + delta_x - delta_y;
337 | pointsArr[1] = lt[1] - delta_y - delta_x;
338 | pointsArr[2] = rt[0] + delta_x + delta_y;
339 | pointsArr[3] = rt[1] - delta_y + delta_x;
340 |
341 | pointsArr[4] = rb[0] - delta_x + delta_y;
342 | pointsArr[5] = rb[1] + delta_y + delta_x;
343 | pointsArr[6] = lb[0] - delta_x - delta_y;
344 | pointsArr[7] = lb[1] + delta_y - delta_x;
345 | } else {
346 | pointsArr[0] = lt[0] - delta_x - delta_y;
347 | pointsArr[1] = lt[1] - delta_y + delta_x;
348 | pointsArr[2] = rt[0] - delta_x + delta_y;
349 | pointsArr[3] = rt[1] - delta_y - delta_x;
350 |
351 | pointsArr[4] = rb[0] + delta_x + delta_y;
352 | pointsArr[5] = rb[1] + delta_y - delta_x;
353 | pointsArr[6] = lb[0] + delta_x - delta_y;
354 | pointsArr[7] = lb[1] + delta_y + delta_x;
355 | }
356 | }
357 | points = manager.create(pointsArr).reshape(4, 2);
358 |
359 | return points;
360 | }
361 |
362 | private float distance(float[] point1, float[] point2) {
363 | float disX = point1[0] - point2[0];
364 | float disY = point1[1] - point2[1];
365 | float dis = (float) Math.sqrt(disX * disX + disY * disY);
366 | return dis;
367 | }
368 |
369 | /**
370 | * Get boxes from the contour or box.
371 | *
372 | * @param contour The predicted contour.
373 | * @param pointsArr The predicted box.
374 | * @return smaller side of box
375 | */
376 | private int get_mini_boxes(MatOfPoint2f contour, float[][] pointsArr) {
377 | // https://blog.csdn.net/qq_37385726/article/details/82313558
378 | // bounding_box[1] - rect 返回矩形的长和宽
379 | RotatedRect rect = Imgproc.minAreaRect(contour);
380 | Mat points = new Mat();
381 | Imgproc.boxPoints(rect, points);
382 |
383 | float[][] fourPoints = new float[4][2];
384 | for (int row = 0; row < 4; row++) {
385 | fourPoints[row][0] = (float) points.get(row, 0)[0];
386 | fourPoints[row][1] = (float) points.get(row, 1)[0];
387 | }
388 |
389 | float[] tmpPoint = new float[2];
390 | for (int i = 0; i < 4; i++) {
391 | for (int j = i + 1; j < 4; j++) {
392 | if (fourPoints[j][0] < fourPoints[i][0]) {
393 | tmpPoint[0] = fourPoints[i][0];
394 | tmpPoint[1] = fourPoints[i][1];
395 | fourPoints[i][0] = fourPoints[j][0];
396 | fourPoints[i][1] = fourPoints[j][1];
397 | fourPoints[j][0] = tmpPoint[0];
398 | fourPoints[j][1] = tmpPoint[1];
399 | }
400 | }
401 | }
402 |
403 | int index_1 = 0;
404 | int index_2 = 1;
405 | int index_3 = 2;
406 | int index_4 = 3;
407 |
408 | if (fourPoints[1][1] > fourPoints[0][1]) {
409 | index_1 = 0;
410 | index_4 = 1;
411 | } else {
412 | index_1 = 1;
413 | index_4 = 0;
414 | }
415 |
416 | if (fourPoints[3][1] > fourPoints[2][1]) {
417 | index_2 = 2;
418 | index_3 = 3;
419 | } else {
420 | index_2 = 3;
421 | index_3 = 2;
422 | }
423 |
424 | pointsArr[0] = fourPoints[index_1];
425 | pointsArr[1] = fourPoints[index_2];
426 | pointsArr[2] = fourPoints[index_3];
427 | pointsArr[3] = fourPoints[index_4];
428 |
429 | int height = rect.boundingRect().height;
430 | int width = rect.boundingRect().width;
431 | int sside = Math.min(height, width);
432 |
433 | // release
434 | points.release();
435 |
436 | return sside;
437 | }
438 |
439 | /**
440 | * Calculate the score of box.
441 | *
442 | * @param bitmap The binarized image predicted by DB.
443 | * @param points The predicted box
444 | * @return
445 | */
446 | private float box_score_fast(NDManager manager, NDArray bitmap, NDArray points) {
447 | NDArray box = points.get(":");
448 | long h = bitmap.getShape().get(0);
449 | long w = bitmap.getShape().get(1);
450 | // xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
451 | int xmin = box.get(":, 0").min().floor().clip(0, w - 1).toType(DataType.INT32, true).toIntArray()[0];
452 | int xmax = box.get(":, 0").max().ceil().clip(0, w - 1).toType(DataType.INT32, true).toIntArray()[0];
453 | int ymin = box.get(":, 1").min().floor().clip(0, h - 1).toType(DataType.INT32, true).toIntArray()[0];
454 | int ymax = box.get(":, 1").max().ceil().clip(0, h - 1).toType(DataType.INT32, true).toIntArray()[0];
455 |
456 | NDArray mask = manager.zeros(new Shape(ymax - ymin + 1, xmax - xmin + 1), DataType.UINT8);
457 |
458 | box.set(new NDIndex(":, 0"), box.get(":, 0").sub(xmin));
459 | box.set(new NDIndex(":, 1"), box.get(":, 1").sub(ymin));
460 |
461 | //mask - convert from NDArray to Mat
462 | Mat maskMat = NDArrayUtils.uint8NDArrayToMat(mask);
463 |
464 | //mask - convert from NDArray to Mat - 4 rows, 2 cols
465 | Mat boxMat = NDArrayUtils.floatNDArrayToMat(box, CvType.CV_32S);
466 |
467 | // boxMat.reshape(1, new int[]{1, 4, 2});
468 | List pts = new ArrayList<>();
469 | MatOfPoint matOfPoint = NDArrayUtils.matToMatOfPoint(boxMat); // new MatOfPoint(boxMat);
470 | pts.add(matOfPoint);
471 | Imgproc.fillPoly(maskMat, pts, new Scalar(1));
472 |
473 |
474 | NDArray subBitMap = bitmap.get(ymin + ":" + (ymax + 1) + "," + xmin + ":" + (xmax + 1));
475 | Mat bitMapMat = NDArrayUtils.floatNDArrayToMat(subBitMap);
476 |
477 | Scalar score = Core.mean(bitMapMat, maskMat);
478 | float scoreValue = (float) score.val[0];
479 | // release
480 | maskMat.release();
481 | boxMat.release();
482 | bitMapMat.release();
483 |
484 | return scoreValue;
485 | }
486 |
487 | @Override
488 | public NDList processInput(TranslatorContext ctx, Image input) {
489 | NDArray img = input.toNDArray(ctx.getNDManager());
490 | int h = input.getHeight();
491 | int w = input.getWidth();
492 | img_height = h;
493 | img_width = w;
494 |
495 | // limit the max side
496 | float ratio = 1.0f;
497 | if (Math.max(h, w) > limit_side_len) {
498 | if (h > w) {
499 | ratio = (float) limit_side_len / (float) h;
500 | } else {
501 | ratio = (float) limit_side_len / (float) w;
502 | }
503 | }
504 |
505 | int resize_h = (int) (h * ratio);
506 | int resize_w = (int) (w * ratio);
507 |
508 | resize_h = Math.round((float) resize_h / 32f) * 32;
509 | resize_w = Math.round((float) resize_w / 32f) * 32;
510 |
511 | ratio_h = resize_h / (float) h;
512 | ratio_w = resize_w / (float) w;
513 |
514 | img = NDImageUtils.resize(img, resize_w, resize_h);
515 |
516 | img = NDImageUtils.toTensor(img);
517 |
518 | img =
519 | NDImageUtils.normalize(
520 | img,
521 | new float[]{0.485f, 0.456f, 0.406f},
522 | new float[]{0.229f, 0.224f, 0.225f});
523 |
524 | img = img.expandDims(0);
525 |
526 | return new NDList(img);
527 | }
528 |
529 | @Override
530 | public Batchifier getBatchifier() {
531 | return null;
532 | }
533 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/detection/OcrV4Detection.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.detection;
2 |
3 | import java.net.URL;
4 | import java.nio.file.Path;
5 | import java.nio.file.Paths;
6 | import java.util.concurrent.ConcurrentHashMap;
7 |
8 | import ai.djl.Device;
9 | import ai.djl.modality.cv.Image;
10 | import ai.djl.ndarray.NDList;
11 | import ai.djl.repository.zoo.Criteria;
12 | import ai.djl.repository.zoo.Criteria.Builder;
13 | import ai.djl.training.util.ProgressBar;
14 | import cn.hutool.core.io.resource.ResourceUtil;
15 |
16 | /**
17 | * 文字检测
18 | */
19 | public final class OcrV4Detection {
20 | /**
21 | * 中文文本检测
22 | *
23 | * @return
24 | */
25 | public Criteria chDetCriteria() {
26 | URL resource = ResourceUtil.getResource("models/ch_PP-OCRv4_det_infer.zip");
27 | System.out.println("resource:" + resource);
28 | Path modelPath = null;
29 | try {
30 | modelPath = Paths.get(resource.toURI());
31 | } catch (Exception e) {
32 | System.err.println(e.getMessage());
33 | }
34 |
35 | Device device = Device.gpu();
36 | Builder builder = Criteria.builder()
37 | //engine
38 | .optEngine("OnnxRuntime")
39 | //.optEngine("PyTorch")
40 |
41 | // .optModelName("inference")
42 | .setTypes(Image.class, NDList.class)
43 | .optDevice(device)
44 | .optTranslator(new OCRDetectionTranslator(new ConcurrentHashMap()))
45 | .optProgress(new ProgressBar());
46 |
47 | if (modelPath != null) {
48 | System.out.println("load from file");
49 | builder.optModelPath(modelPath).optModelName("ch_PP-OCRv4_det_infer");
50 | } else {
51 | System.out.println("load from jar");
52 | builder.optModelUrls("jar:///models/ch_PP-OCRv4_det_infer.zip");
53 | }
54 | Criteria criteria = builder.build();
55 | return criteria;
56 | }
57 |
58 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/opencv/NDArrayUtils.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.opencv;
2 |
3 | import ai.djl.ndarray.NDArray;
4 | import org.opencv.core.CvType;
5 | import org.opencv.core.Mat;
6 | import org.opencv.core.MatOfPoint;
7 | import org.opencv.core.Point;
8 |
9 | import java.util.ArrayList;
10 | import java.util.List;
11 |
12 | public class NDArrayUtils {
13 | /**
14 | * Mat To MatOfPoint
15 | *
16 | * @param mat
17 | * @return
18 | */
19 | public static MatOfPoint matToMatOfPoint(Mat mat) {
20 | int rows = mat.rows();
21 | MatOfPoint matOfPoint = new MatOfPoint();
22 |
23 | List list = new ArrayList<>();
24 | for (int i = 0; i < rows; i++) {
25 | Point point = new Point((float) mat.get(i, 0)[0], (float) mat.get(i, 1)[0]);
26 | list.add(point);
27 | }
28 | matOfPoint.fromList(list);
29 |
30 | return matOfPoint;
31 | }
32 |
33 | /**
34 | * float NDArray To float[][] Array
35 | *
36 | * @param ndArray
37 | * @return
38 | */
39 | public static float[][] floatNDArrayToArray(NDArray ndArray) {
40 | int rows = (int) (ndArray.getShape().get(0));
41 | int cols = (int) (ndArray.getShape().get(1));
42 | float[][] arr = new float[rows][cols];
43 |
44 | float[] arrs = ndArray.toFloatArray();
45 | for (int i = 0; i < rows; i++) {
46 | for (int j = 0; j < cols; j++) {
47 | arr[i][j] = arrs[i * cols + j];
48 | }
49 | }
50 | return arr;
51 | }
52 |
53 | /**
54 | * Mat To double[][] Array
55 | *
56 | * @param mat
57 | * @return
58 | */
59 | public static double[][] matToDoubleArray(Mat mat) {
60 | int rows = mat.rows();
61 | int cols = mat.cols();
62 |
63 | double[][] doubles = new double[rows][cols];
64 |
65 | for (int i = 0; i < rows; i++) {
66 | for (int j = 0; j < cols; j++) {
67 | doubles[i][j] = mat.get(i, j)[0];
68 | }
69 | }
70 |
71 | return doubles;
72 | }
73 |
74 | /**
75 | * Mat To float[][] Array
76 | *
77 | * @param mat
78 | * @return
79 | */
80 | public static float[][] matToFloatArray(Mat mat) {
81 | int rows = mat.rows();
82 | int cols = mat.cols();
83 |
84 | float[][] floats = new float[rows][cols];
85 |
86 | for (int i = 0; i < rows; i++) {
87 | for (int j = 0; j < cols; j++) {
88 | floats[i][j] = (float) mat.get(i, j)[0];
89 | }
90 | }
91 |
92 | return floats;
93 | }
94 |
95 | /**
96 | * Mat To byte[][] Array
97 | *
98 | * @param mat
99 | * @return
100 | */
101 | public static byte[][] matToUint8Array(Mat mat) {
102 | int rows = mat.rows();
103 | int cols = mat.cols();
104 |
105 | byte[][] bytes = new byte[rows][cols];
106 |
107 | for (int i = 0; i < rows; i++) {
108 | for (int j = 0; j < cols; j++) {
109 | bytes[i][j] = (byte) mat.get(i, j)[0];
110 | }
111 | }
112 |
113 | return bytes;
114 | }
115 |
116 | /**
117 | * float NDArray To float[][] Array
118 | *
119 | * @param ndArray
120 | * @param cvType
121 | * @return
122 | */
123 | public static Mat floatNDArrayToMat(NDArray ndArray, int cvType) {
124 | int rows = (int) (ndArray.getShape().get(0));
125 | int cols = (int) (ndArray.getShape().get(1));
126 | Mat mat = new Mat(rows, cols, cvType);
127 |
128 | float[] arrs = ndArray.toFloatArray();
129 | for (int i = 0; i < rows; i++) {
130 | for (int j = 0; j < cols; j++) {
131 | mat.put(i, j, arrs[i * cols + j]);
132 | }
133 | }
134 | return mat;
135 | }
136 |
137 | /**
138 | * float NDArray To Mat
139 | *
140 | * @param ndArray
141 | * @return
142 | */
143 | public static Mat floatNDArrayToMat(NDArray ndArray) {
144 | int rows = (int) (ndArray.getShape().get(0));
145 | int cols = (int) (ndArray.getShape().get(1));
146 | Mat mat = new Mat(rows, cols, CvType.CV_32F);
147 |
148 | float[] arrs = ndArray.toFloatArray();
149 | for (int i = 0; i < rows; i++) {
150 | for (int j = 0; j < cols; j++) {
151 | mat.put(i, j, arrs[i * cols + j]);
152 | }
153 | }
154 |
155 | return mat;
156 |
157 | }
158 |
159 | /**
160 | * uint8 NDArray To Mat
161 | *
162 | * @param ndArray
163 | * @return
164 | */
165 | public static Mat uint8NDArrayToMat(NDArray ndArray) {
166 | int rows = (int) (ndArray.getShape().get(0));
167 | int cols = (int) (ndArray.getShape().get(1));
168 | Mat mat = new Mat(rows, cols, CvType.CV_8U);
169 |
170 | byte[] arrs = ndArray.toByteArray();
171 |
172 | for (int i = 0; i < rows; i++) {
173 | for (int j = 0; j < cols; j++) {
174 | mat.put(i, j, arrs[i * cols + j]);
175 | }
176 | }
177 | return mat;
178 | }
179 |
180 | /**
181 | * float[][] Array To Mat
182 | *
183 | * @param arr
184 | * @return
185 | */
186 | public static Mat floatArrayToMat(float[][] arr) {
187 | int rows = arr.length;
188 | int cols = arr[0].length;
189 | Mat mat = new Mat(rows, cols, CvType.CV_32F);
190 |
191 | for (int i = 0; i < rows; i++) {
192 | for (int j = 0; j < cols; j++) {
193 | mat.put(i, j, arr[i][j]);
194 | }
195 | }
196 |
197 | return mat;
198 | }
199 |
200 | /**
201 | * byte[][] Array To Mat
202 | *
203 | * @param arr
204 | * @return
205 | */
206 | public static Mat uint8ArrayToMat(byte[][] arr) {
207 | int rows = arr.length;
208 | int cols = arr[0].length;
209 | Mat mat = new Mat(rows, cols, CvType.CV_8U);
210 |
211 | for (int i = 0; i < rows; i++) {
212 | for (int j = 0; j < cols; j++) {
213 | mat.put(i, j, arr[i][j]);
214 | }
215 | }
216 |
217 | return mat;
218 | }
219 |
220 | /**
221 | * List To Mat
222 | *
223 | * @param points
224 | * @return
225 | */
226 | public static Mat toMat(List points) {
227 | Mat mat = new Mat(points.size(), 2, CvType.CV_32F);
228 | for (int i = 0; i < points.size(); i++) {
229 | ai.djl.modality.cv.output.Point point = points.get(i);
230 | mat.put(i, 0, (float) point.getX());
231 | mat.put(i, 1, (float) point.getY());
232 | }
233 |
234 | return mat;
235 | }
236 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/opencv/OpenCVUtils.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.opencv;
2 |
3 | import org.opencv.core.CvType;
4 | import org.opencv.core.Mat;
5 | import org.opencv.imgproc.Imgproc;
6 |
7 | import java.awt.image.BufferedImage;
8 | import java.awt.image.DataBufferByte;
9 |
10 | public class OpenCVUtils {
11 |
12 | /**
13 | * 透视变换
14 | *
15 | * @param src
16 | * @param srcPoints
17 | * @param dstPoints
18 | * @return
19 | */
20 | public static Mat perspectiveTransform(Mat src, Mat srcPoints, Mat dstPoints) {
21 | Mat dst = src.clone();
22 | Mat warp_mat = Imgproc.getPerspectiveTransform(srcPoints, dstPoints);
23 | Imgproc.warpPerspective(src, dst, warp_mat, dst.size());
24 | warp_mat.release();
25 |
26 | return dst;
27 | }
28 |
29 | /**
30 | * Mat to BufferedImage
31 | *
32 | * @param mat
33 | * @return
34 | */
35 | public static BufferedImage mat2Image(Mat mat) {
36 | int width = mat.width();
37 | int height = mat.height();
38 | byte[] data = new byte[width * height * (int) mat.elemSize()];
39 | Imgproc.cvtColor(mat, mat, 4);
40 | mat.get(0, 0, data);
41 | BufferedImage ret = new BufferedImage(width, height, 5);
42 | ret.getRaster().setDataElements(0, 0, width, height, data);
43 | return ret;
44 | }
45 |
46 | /**
47 | * BufferedImage to Mat
48 | *
49 | * @param img
50 | * @return
51 | */
52 | public static Mat image2Mat(BufferedImage img) {
53 | int width = img.getWidth();
54 | int height = img.getHeight();
55 | byte[] data = ((DataBufferByte) img.getRaster().getDataBuffer()).getData();
56 | Mat mat = new Mat(height, width, CvType.CV_8UC3);
57 | mat.put(0, 0, data);
58 | return mat;
59 | }
60 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/recognition/OcrV4Recognition.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.recognition;
2 |
3 | import java.awt.image.BufferedImage;
4 | import java.net.URL;
5 | import java.nio.file.Path;
6 | import java.nio.file.Paths;
7 | import java.util.ArrayList;
8 | import java.util.List;
9 | import java.util.concurrent.ConcurrentHashMap;
10 |
11 | import org.opencv.core.Mat;
12 |
13 | import com.litongjava.ai.djl.paddle.ocr.v4.common.RotatedBox;
14 | import com.litongjava.ai.djl.paddle.ocr.v4.opencv.NDArrayUtils;
15 | import com.litongjava.ai.djl.paddle.ocr.v4.opencv.OpenCVUtils;
16 |
17 | import ai.djl.Device;
18 | import ai.djl.inference.Predictor;
19 | import ai.djl.modality.cv.Image;
20 | import ai.djl.modality.cv.ImageFactory;
21 | import ai.djl.modality.cv.output.Point;
22 | import ai.djl.modality.cv.util.NDImageUtils;
23 | import ai.djl.ndarray.NDArray;
24 | import ai.djl.ndarray.NDList;
25 | import ai.djl.ndarray.NDManager;
26 | import ai.djl.opencv.OpenCVImageFactory;
27 | import ai.djl.repository.zoo.Criteria;
28 | import ai.djl.repository.zoo.Criteria.Builder;
29 | import ai.djl.training.util.ProgressBar;
30 | import ai.djl.translate.TranslateException;
31 | import cn.hutool.core.io.resource.ResourceUtil;
32 |
33 | /**
34 | * 文字识别
35 | */
36 | public final class OcrV4Recognition {
37 |
38 | /**
39 | * 中文简体
40 | *
41 | * @return
42 | */
43 | public Criteria chRecCriteria() {
44 | URL resource = ResourceUtil.getResource("models/ch_PP-OCRv4_rec_infer.zip");
45 | System.out.println("resource:" + resource);
46 | Path modelPath = null;
47 | try {
48 | modelPath = Paths.get(resource.toURI());
49 | } catch (Exception e) {
50 | System.err.println(e.getMessage());
51 | }
52 |
53 | Device device = Device.gpu();
54 | Builder builder = Criteria.builder()
55 | // engine
56 | .optEngine("OnnxRuntime")
57 | // .optEngine("PyTorch")
58 | // .optModelName("inference")
59 | // devices
60 | .optDevice(device)
61 | // type
62 | .setTypes(Image.class, String.class).optProgress(new ProgressBar())
63 | .optTranslator(new PpWordRecTranslator(new ConcurrentHashMap()));
64 |
65 | if (modelPath != null) {
66 | System.out.println("load from file");
67 | builder.optModelPath(modelPath).optModelName("ch_PP-OCRv4_det_infer");
68 | } else {
69 | System.out.println("load from jar");
70 | builder.optModelUrls("jar:///models/ch_PP-OCRv4_rec_infer.zip");
71 | }
72 | return builder.build();
73 | }
74 |
75 | /**
76 | * 图像推理
77 | *
78 | * @param manager
79 | * @param image
80 | * @param detector
81 | * @param recognizer
82 | * @return
83 | * @throws TranslateException
84 | */
85 | public List predict(NDManager manager, Image image, Predictor detector,
86 | Predictor recognizer) throws TranslateException {
87 | NDList boxes = detector.predict(image);
88 | if (boxes == null) {
89 | return null;
90 | }
91 | // 交给 NDManager自动管理内存
92 | // attach to manager for automatic memory management
93 | boxes.attach(manager);
94 |
95 | List result = new ArrayList<>();
96 |
97 | Mat mat = (Mat) image.getWrappedImage();
98 |
99 | for (int i = 0; i < boxes.size(); i++) {
100 | NDArray box = boxes.get(i);
101 |
102 | float[] pointsArr = box.toFloatArray();
103 | float[] lt = java.util.Arrays.copyOfRange(pointsArr, 0, 2);
104 | float[] rt = java.util.Arrays.copyOfRange(pointsArr, 2, 4);
105 | float[] rb = java.util.Arrays.copyOfRange(pointsArr, 4, 6);
106 | float[] lb = java.util.Arrays.copyOfRange(pointsArr, 6, 8);
107 | int img_crop_width = (int) Math.max(distance(lt, rt), distance(rb, lb));
108 | int img_crop_height = (int) Math.max(distance(lt, lb), distance(rt, rb));
109 | List srcPoints = new ArrayList<>();
110 | srcPoints.add(new Point(lt[0], lt[1]));
111 | srcPoints.add(new Point(rt[0], rt[1]));
112 | srcPoints.add(new Point(rb[0], rb[1]));
113 | srcPoints.add(new Point(lb[0], lb[1]));
114 | List dstPoints = new ArrayList<>();
115 | dstPoints.add(new Point(0, 0));
116 | dstPoints.add(new Point(img_crop_width, 0));
117 | dstPoints.add(new Point(img_crop_width, img_crop_height));
118 | dstPoints.add(new Point(0, img_crop_height));
119 |
120 | Mat srcPoint2f = NDArrayUtils.toMat(srcPoints);
121 | Mat dstPoint2f = NDArrayUtils.toMat(dstPoints);
122 |
123 | Mat cvMat = OpenCVUtils.perspectiveTransform(mat, srcPoint2f, dstPoint2f);
124 |
125 | Image subImg = OpenCVImageFactory.getInstance().fromImage(cvMat);
126 | // ImageUtils.saveImage(subImg, i + ".png", "build/output");
127 |
128 | subImg = subImg.getSubImage(0, 0, img_crop_width, img_crop_height);
129 | if (subImg.getHeight() * 1.0 / subImg.getWidth() > 1.5) {
130 | subImg = rotateImg(manager, subImg);
131 | }
132 |
133 | String name = recognizer.predict(subImg);
134 | RotatedBox rotatedBox = new RotatedBox(box, name);
135 | result.add(rotatedBox);
136 |
137 | cvMat.release();
138 | srcPoint2f.release();
139 | dstPoint2f.release();
140 |
141 | }
142 |
143 | return result;
144 | }
145 |
146 | private BufferedImage get_rotate_crop_image(Image image, NDArray box) {
147 | return null;
148 | }
149 |
150 | /**
151 | * 欧式距离计算
152 | *
153 | * @param point1
154 | * @param point2
155 | * @return
156 | */
157 | private float distance(float[] point1, float[] point2) {
158 | float disX = point1[0] - point2[0];
159 | float disY = point1[1] - point2[1];
160 | float dis = (float) Math.sqrt(disX * disX + disY * disY);
161 | return dis;
162 | }
163 |
164 | /**
165 | * 图片旋转
166 | *
167 | * @param manager
168 | * @param image
169 | * @return
170 | */
171 | private Image rotateImg(NDManager manager, Image image) {
172 | NDArray rotated = NDImageUtils.rotate90(image.toNDArray(manager), 1);
173 | return ImageFactory.getInstance().fromNDArray(rotated);
174 | }
175 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/java/com/litongjava/ai/djl/paddle/ocr/v4/recognition/PpWordRecTranslator.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.recognition;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.util.Arrays;
6 | import java.util.List;
7 | import java.util.Map;
8 |
9 | import ai.djl.Model;
10 | import ai.djl.modality.cv.Image;
11 | import ai.djl.modality.cv.util.NDImageUtils;
12 | import ai.djl.ndarray.NDArray;
13 | import ai.djl.ndarray.NDList;
14 | import ai.djl.ndarray.index.NDIndex;
15 | import ai.djl.ndarray.types.DataType;
16 | import ai.djl.ndarray.types.Shape;
17 | import ai.djl.translate.Batchifier;
18 | import ai.djl.translate.Translator;
19 | import ai.djl.translate.TranslatorContext;
20 | import ai.djl.util.Utils;
21 |
22 | /**
23 | * 文字识别前后处理
24 | */
25 | public class PpWordRecTranslator implements Translator {
26 | private List table;
27 | private final boolean use_space_char;
28 |
29 | public PpWordRecTranslator(Map arguments) {
30 | use_space_char =
31 | arguments.containsKey("use_space_char")
32 | ? Boolean.parseBoolean(arguments.get("use_space_char").toString())
33 | : true;
34 | }
35 |
36 | @Override
37 | public void prepare(TranslatorContext ctx) throws IOException {
38 | Model model = ctx.getModel();
39 | try (InputStream is = model.getArtifact("dict.txt").openStream()) {
40 | table = Utils.readLines(is, true);
41 | table.add(0, "blank");
42 | if (use_space_char) {
43 | table.add(" ");
44 | table.add(" ");
45 | } else {
46 | table.add("");
47 | table.add("");
48 | }
49 |
50 | }
51 | }
52 |
53 | @Override
54 | public String processOutput(TranslatorContext ctx, NDList list) throws IOException {
55 | StringBuilder sb = new StringBuilder();
56 | NDArray tokens = list.singletonOrThrow();
57 |
58 | long[] indices = tokens.get(0).argMax(1).toLongArray();
59 | boolean[] selection = new boolean[indices.length];
60 | Arrays.fill(selection, true);
61 | for (int i = 1; i < indices.length; i++) {
62 | if (indices[i] == indices[i - 1]) {
63 | selection[i] = false;
64 | }
65 | }
66 |
67 | // 字符置信度
68 | // float[] probs = new float[indices.length];
69 | // for (int row = 0; row < indices.length; row++) {
70 | // NDArray value = tokens.get(0).get(new NDIndex(""+ row +":" + (row + 1) +"," + indices[row] +":" + ( indices[row] + 1)));
71 | // probs[row] = value.toFloatArray()[0];
72 | // }
73 |
74 | int lastIdx = 0;
75 | for (int i = 0; i < indices.length; i++) {
76 | if (selection[i] == true && indices[i] > 0 && !(i > 0 && indices[i] == lastIdx)) {
77 | sb.append(table.get((int) indices[i]));
78 | }
79 | }
80 | return sb.toString();
81 | }
82 |
83 | @Override
84 | public NDList processInput(TranslatorContext ctx, Image input) {
85 | NDArray img = input.toNDArray(ctx.getNDManager(), Image.Flag.COLOR);
86 | int imgC = 3;
87 | int imgH = 48;
88 | int imgW = 320;
89 |
90 | float max_wh_ratio = (float) imgW / (float) imgH;
91 |
92 | int h = input.getHeight();
93 | int w = input.getWidth();
94 | float wh_ratio = (float) w / (float) h;
95 |
96 | max_wh_ratio = Math.max(max_wh_ratio, wh_ratio);
97 | imgW = (int) (imgH * max_wh_ratio);
98 |
99 | int resized_w;
100 | if (Math.ceil(imgH * wh_ratio) > imgW) {
101 | resized_w = imgW;
102 | } else {
103 | resized_w = (int) (Math.ceil(imgH * wh_ratio));
104 | }
105 | NDArray resized_image = NDImageUtils.resize(img, resized_w, imgH);
106 | resized_image = resized_image.transpose(2, 0, 1).toType(DataType.FLOAT32, false);
107 | resized_image.divi(255f).subi(0.5f).divi(0.5f);
108 | NDArray padding_im = ctx.getNDManager().zeros(new Shape(imgC, imgH, imgW), DataType.FLOAT32);
109 | padding_im.set(new NDIndex(":,:,0:" + resized_w), resized_image);
110 |
111 | padding_im = padding_im.flip(0);
112 | padding_im = padding_im.expandDims(0);
113 | return new NDList(padding_im);
114 | }
115 |
116 | @Override
117 | public Batchifier getBatchifier() {
118 | return null;
119 | }
120 |
121 | }
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | ${CONSOLE_LOG_PATTERN}
12 |
13 |
14 |
15 |
16 |
17 |
18 | ${CONSOLE_LOG_PATTERN}
19 |
20 |
21 |
22 | ${LOG_HOME}/project-name-%d{yyyy-MM-dd}.log
23 |
24 | 180
25 |
26 |
27 |
28 | 10MB
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/main/resources/models/readme.md:
--------------------------------------------------------------------------------
1 | models path
2 | ```shell
3 | wget https://github.com/litongjava/tools-ocr/releases/download/model-ppocr-v4/ch_PP-OCRv4_det_infer-onnx.zip
4 | wget https://github.com/litongjava/tools-ocr/releases/download/model-ppocr-v4/ch_PP-OCRv4_rec_infer-onnx.zip
5 | mkdir -p models/ch_PP-OCRv4_det_infer
6 | mkdir -p models/ch_PP-OCRv4_rec_infer
7 | unzip ch_PP-OCRv4_det_infer-onnx.zip -d models/ch_PP-OCRv4_det_infer
8 | unzip ch_PP-OCRv4_rec_infer-onnx.zip -d models/ch_PP-OCRv4_rec_infer
9 | ```
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/test/java/com/litongjava/ai/djl/paddle/ocr/v4/PaddlePaddleOCRV4Test.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4;
2 |
3 | import org.junit.Test;
4 |
5 | import lombok.extern.slf4j.Slf4j;
6 |
7 | @Slf4j
8 | public class PaddlePaddleOCRV4Test {
9 |
10 | @Test
11 | public void test() {
12 | String url = "https://resources.djl.ai/images/flight_ticket.jpg";
13 | String text = null;
14 | for (int i = 0; i < 100; i++) {
15 | try {
16 | text = PaddlePaddleOCRV4.INSTANCE.ocr(url);
17 | } catch (Exception e) {
18 | // TODO Auto-generated catch block
19 | e.printStackTrace();
20 | }
21 | log.info("text:{}", text);
22 | }
23 | }
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/test/java/com/litongjava/ai/djl/paddle/ocr/v4/gpu/GPUStudy.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.gpu;
2 |
3 | import org.junit.Test;
4 |
5 | import ai.djl.Device;
6 | import ai.djl.engine.Engine;
7 | import ai.djl.ndarray.NDArray;
8 | import ai.djl.ndarray.NDManager;
9 |
10 | public class GPUStudy {
11 | /* Return the i'th GPU if it exists, otherwise return the CPU */
12 | public Device tryGpu(int i) {
13 | return Engine.getInstance().getGpuCount() > i ? Device.gpu(i) : Device.cpu();
14 | }
15 |
16 | /* Return all available GPUs or the [CPU] if no GPU exists */
17 | public Device[] tryAllGpus() {
18 | int gpuCount = Engine.getInstance().getGpuCount();
19 | if (gpuCount > 0) {
20 | Device[] devices = new Device[gpuCount];
21 | for (int i = 0; i < gpuCount; i++) {
22 | devices[i] = Device.gpu(i);
23 | }
24 | return devices;
25 | }
26 | return new Device[] { Device.cpu() };
27 | }
28 |
29 | public static void main(String[] args) {
30 | System.out.println(Device.cpu());
31 | System.out.println(Device.gpu());
32 | System.out.println(Device.gpu(1));
33 |
34 | System.out.println("GPU count: " + Engine.getInstance().getGpuCount());
35 | }
36 |
37 | @Test
38 | public void getDevices() {
39 | NDManager manager = NDManager.newBaseManager();
40 | NDArray x = manager.create(new int[] { 1, 2, 3 });
41 | Device device = x.getDevice();
42 | System.out.println(device);
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/test/java/com/litongjava/ai/djl/paddle/ocr/v4/recognition/OcrV4RecognitionTest.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.djl.paddle.ocr.v4.recognition;
2 |
3 | import java.net.URISyntaxException;
4 | import java.net.URL;
5 | import java.nio.file.Path;
6 | import java.nio.file.Paths;
7 |
8 | import org.junit.Test;
9 |
10 | import cn.hutool.core.io.resource.ResourceUtil;
11 | import lombok.extern.slf4j.Slf4j;
12 |
13 | @Slf4j
14 | public class OcrV4RecognitionTest {
15 |
16 | @Test
17 | public void testGetModelPath() {
18 | // URL resource = ClassUtil.getClassLoader().getResource();
19 | URL resource = ResourceUtil.getResource("models/ch_PP-OCRv4_rec_infer/inference.onnx");
20 | log.info("resource:{}", resource);
21 | Path modelPath = null;
22 | try {
23 | modelPath = Paths.get(resource.toURI());
24 | } catch (URISyntaxException e) {
25 | e.printStackTrace();
26 | }
27 | log.info("modelPath:{}", modelPath);
28 |
29 | }
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/test/resources/03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/paddle-ocr/paddle-ocr-service/src/test/resources/03.png
--------------------------------------------------------------------------------
/paddle-ocr/paddle-ocr-service/src/test/resources/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/paddle-ocr/paddle-ocr-service/src/test/resources/2.jpg
--------------------------------------------------------------------------------
/paddle-ocr/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | com.litongjava
5 | ai-server
6 | 1.0.0
7 |
8 | paddle-ocr
9 | pom
10 |
11 | paddle-ocr-server
12 | paddle-ocr-service
13 | paddle-ocr-native-server
14 |
15 |
--------------------------------------------------------------------------------
/paddle-ocr/readme.md:
--------------------------------------------------------------------------------
1 | # paddle-ocr-server
2 |
3 |
4 | ## 简介
5 |
6 | 基于java语言的开箱即用的ocr识别服务,用到的框架和技术
7 | - tio-boot
8 | - djl
9 | - opencv
10 | - pytorch
11 | - onnx
12 | - paddle-ocr
13 | ## require
14 | glic==2.28 or CentOS 8.4
15 |
16 | ## How to build
17 | ```
18 | git clone https://github.com/litongjava/ai-server.git
19 | or
20 | #git clone https://gitee.com/ppnt/ai-server.git
21 | #set JAVA_HOME=D:\java\jdk1.8.0_121
22 | mvn clean package -pl paddle-ocr-server -DskipTests -Pproduction
23 | ```
24 | run
25 | ```
26 | java -jar paddle-ocr-server/target/paddle-ocr-server-1.0.4.jar
27 | ```
28 |
29 | ## 库文件存储路径
30 | 启动后默认会下载pytorch,djl-pytorch库文件存储路径
31 | ```
32 | $HOME/.djl.ai/pytorch
33 | ```
34 | 如果下载太慢可以到下面的地址下载pytorch库
35 | ```
36 | https://github.com/litongjava/djl-libs/releases/tag/pytorch
37 | ```
38 |
39 | ## 接口文档
40 |
41 | ### 在线文档地址
42 | https://apifox.com/apidoc/shared-98cc5675-f1a3-4250-a940-cfe060854ef4
43 |
44 | #### 测试接口
45 | 测试接口不需要上传文件,使用程序自带的文件进行识别,用于测试环境安装是否成功
46 | 访问地址:http://localhost/paddle/ocr/test
47 | 返回数据:
48 | ```
49 | {"data":"www.997788 + 登机牌 BOARDING PASS \n航班 FLIGHT 日期 DATE 舱位 CLASS 序号SERIALNO. 座位号SEATNO \nMU 2379 03DEC W 035 12F \n目的地TO 始发地 FROM 一 登机口 GATE 登机时间BDT \n福州 TAIYUAN G11 \nFUZHOU 身份识别IDNO. \n姓名NAME \nZHANGQIWEI 票号TKTNO. \n张祺伟 \n票价FARE ETKT7813699238489/1 \n登机口于起飞前10分钟关闭 GATES CLOSE 10MINUTES BEFORE DEPARTURE TIME + \n","ok":true}
50 | ```
51 | ### 识别接口
52 | ```
53 | curl --location --request POST 'http://localhost/paddle/ocr/rec' \
54 | --header 'User-Agent: Apifox/1.0.0 (https://apifox.com)' \
55 | --form 'file=@""'
56 | ```
57 | 返回数据格式
58 | ```
59 | {
60 | "data": "text数据",
61 | "ok": true
62 | }
63 | ```
64 | ## Docker
65 | ### build
66 | ```
67 | mvn package -DskipTests -Pproduction
68 | docker build -t litongjava/paddle-ocr-server:1.0.1 .
69 | docker tag litongjava/paddle-ocr-server:1.0.1 litongjava/paddle-ocr-server
70 | ```
71 | ### run
72 | ```
73 | docker run --name ocr_server -dit -p 8080:80 litongjava/paddle-ocr-server
74 | ```
75 | ### 部署到aliyun fc
76 |
77 | #### 推送镜像到
78 | ##### 在 阿里云镜像仓库中 新建仓库
79 | - 访问https://cr.console.aliyun.com/cn-hangzhou/instances/repositories
80 | - 点击个人版
81 | - 点击镜像仓库
82 | - 点击创建镜像仓库 输入名称paddle-ocr-server
83 |
84 | ##### 推送镜像到 阿里云镜像仓库
85 | - 登录查看用户名https://cr.console.aliyun.com/cn-hangzhou/instance/credentials
86 | - 进入linux命令行
87 | - 登录 docker login registry.cn-hangzhou.aliyuncs.com
88 | - 拉取 litongjava/paddle-ocr-server
89 | - tag;docker tag litongjava/paddle-ocr-server registry.cn-hangzhou.aliyuncs.com/litongjava/paddle-ocr-server
90 | - push: docker push registry.cn-hangzhou.aliyuncs.com/litongjava/paddle-ocr-server
91 |
92 | #### 部署到 aliyun fc
93 | - 登录 阿里云
94 | - 选择"阿里云函数"计算
95 | - 选择"服务及函数"
96 | - 创建"服务"
97 | - 名称:paddle-ocr-server 确定创建
98 | - 创建函数-->使用容器镜像创建
99 | - 函数名称 ocr
100 | - web server模式 是
101 | - 请求处理程序类型 处理 Http请求
102 | - 镜像 选择刚才推送的镜像 registry-vpc.cn-hangzhou.aliyuncs.com/litongjava/paddle-ocr-server:latest
103 | - 监听端口 80
104 | - 是否使用GPU 不使用GPU
105 | - 规格方案 默认即可
106 | - 禁用公网访问 URL 否
107 | - 点击部署
108 | - 部署成功过后获取地址访问测试即可
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 | com.litongjava
4 | ai-server
5 | 1.0.0
6 | pom
7 | ai-server
8 | ai development toolkit service and server based on java language
9 | https://github.com/litongjava/ai-server
10 |
11 | paddle-ocr
12 | whisper-asr
13 |
14 |
15 |
16 | The Apache Software License, Version 2.0
17 | http://apache.org/licenses/LICENSE-2.0.txt
18 |
19 |
20 |
21 |
22 |
23 | litongjava
24 | Ping E Lee
25 | litongjava@qq.com
26 | https://github.com/litongjava
27 |
28 |
29 |
30 |
31 | scm:git:git@github.com:litongjava/ai-server.git
32 | scm:git:git@github.com:litongjava/ai-server.git
33 | git@github.com:litongjava/ai-server.git
34 |
35 |
36 |
37 |
38 | ossrh
39 | https://s01.oss.sonatype.org/content/repositories/snapshots
40 |
41 |
42 | ossrh
43 | https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/
44 |
45 |
46 |
47 |
48 |
49 |
50 | org.apache.maven.plugins
51 | maven-source-plugin
52 | 2.2.1
53 |
54 |
55 | package
56 |
57 | jar-no-fork
58 |
59 |
60 |
61 |
62 |
63 |
64 | org.apache.maven.plugins
65 | maven-javadoc-plugin
66 | 2.9.1
67 |
68 |
69 | -Xdoclint:none
70 |
71 |
72 |
73 | package
74 |
75 | jar
76 |
77 |
78 |
79 |
80 |
81 |
82 | org.apache.maven.plugins
83 | maven-gpg-plugin
84 | 1.5
85 |
86 |
87 | sign-artifacts
88 | verify
89 |
90 | sign
91 |
92 |
93 |
94 |
95 |
96 | org.sonatype.plugins
97 | nexus-staging-maven-plugin
98 | 1.6.7
99 | true
100 |
101 | ossrh
102 | https://s01.oss.sonatype.org/
103 | true
104 |
105 |
106 |
107 |
108 |
--------------------------------------------------------------------------------
/rapid-ocr-server/Dockerfile:
--------------------------------------------------------------------------------
1 | # build litongjava/paddle-ocr-server:1.0.1
2 | # Use litongjava/jdk:8u211 as the base image
3 | FROM litongjava/centos-8-jdk:8u341
4 |
5 | # Set the working directory in the container
6 | WORKDIR /app
7 |
8 | # Copy the jar file into the container
9 | COPY target/paddle-ocr-server-1.0.1.jar /app/
10 |
11 | # download file
12 | RUN java -jar /app/paddle-ocr-server-1.0.1.jar --download
13 |
14 | # Command to run the jar file
15 | CMD ["java", "-jar", "paddle-ocr-server-1.0.1.jar", "--mode=prod"]
16 |
--------------------------------------------------------------------------------
/rapid-ocr-server/deploy-win.txt:
--------------------------------------------------------------------------------
1 | set JAVA_HOME=D:\\java\\jdk1.8.0_121
2 | mvn clean package -DskipTests -Dgpg.skip -Pproduction
--------------------------------------------------------------------------------
/rapid-ocr-server/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 | com.litongjava
4 | rapid-ocr-server
5 | 1.0.4
6 |
7 | UTF-8
8 | 1.8
9 | ${java.version}
10 | ${java.version}
11 | 23.1.1
12 | 1.4.3
13 | 1.18.30
14 | 1.2.1
15 | ocr-server
16 | com.litongjava.ai.server.rapid.ocr.RapidOcrServer
17 |
18 |
19 |
20 |
21 |
22 |
23 | io.github.mymonstercat
24 | rapidocr
25 | 0.0.7
26 |
27 |
28 |
29 |
30 |
31 | io.github.mymonstercat
32 | rapidocr-onnx-platform
33 | 0.0.7
34 |
35 |
36 | com.litongjava
37 | tio-boot
38 | ${tio.boot.version}
39 |
40 |
41 | org.projectlombok
42 | lombok
43 | ${lombok-version}
44 | true
45 | provided
46 |
47 |
48 |
49 | com.alibaba.fastjson2
50 | fastjson2
51 | 2.0.12
52 |
53 |
54 |
55 |
56 |
57 | development
58 |
59 | true
60 |
61 |
62 |
63 | ch.qos.logback
64 | logback-classic
65 | 1.2.3
66 |
67 |
68 |
69 |
70 |
71 |
72 | production
73 |
74 |
75 | ch.qos.logback
76 | logback-classic
77 | 1.2.3
78 |
79 |
80 |
81 |
82 |
83 | org.springframework.boot
84 | spring-boot-maven-plugin
85 | 2.7.4
86 |
87 | ${main.class}
88 | org.projectlombok
89 |
90 |
91 |
92 |
93 |
94 | repackage
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 | assembly
105 |
106 |
107 | ch.qos.logback
108 | logback-classic
109 | 1.2.3
110 |
111 |
112 |
113 |
114 |
115 | org.apache.maven.plugins
116 | maven-jar-plugin
117 | 3.2.0
118 |
119 |
120 | org.apache.maven.plugins
121 | maven-assembly-plugin
122 | 3.1.1
123 |
124 |
125 |
126 | ${main.class}
127 |
128 |
129 |
130 | jar-with-dependencies
131 |
132 | false
133 |
134 |
135 |
136 | make-assembly
137 | package
138 |
139 | single
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 | native
149 |
150 |
151 |
152 | org.slf4j
153 | slf4j-jdk14
154 | 1.7.31
155 |
156 |
157 |
158 | org.graalvm.sdk
159 | graal-sdk
160 | ${graalvm.version}
161 | provided
162 |
163 |
164 |
165 | ${final.name}
166 |
167 |
168 | org.graalvm.nativeimage
169 | native-image-maven-plugin
170 | 21.2.0
171 |
172 |
173 |
174 | native-image
175 |
176 | package
177 |
178 |
179 |
180 | false
181 | ${project.build.finalName}
182 | ${main.class}
183 |
184 | -H:+RemoveSaturatedTypeFlows
185 | --allow-incomplete-classpath
186 | --no-fallback
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
--------------------------------------------------------------------------------
/rapid-ocr-server/readme.md:
--------------------------------------------------------------------------------
1 | # rapid-ocr-server
2 |
3 | ## requirements
4 | - java 1.8
5 |
6 | ## build
7 | 克隆并构建 TIO 工具库: 这些是 TIO 项目的辅助工具库。
8 | ```
9 | git clone https://github.com/litongjava/tio-utils.git
10 | cd tio-utils
11 | mvn clean install -DskipTests -Dgpg.skip
12 | ```
13 | 克隆并构建 TIO 核心库: 这是所有基于 TIO 项目所需的核心库。
14 | ```
15 | git clone https://github.com/litongjava/t-io.git
16 | cd t-io
17 | mvn clean install -DskipTests -Dgpg.skip
18 | ```
19 | 克隆并构建 TIO HTTP 组件: 这些组件为 TIO 应用程序启用 HTTP 功能。
20 | ```
21 | git clone https://github.com/litongjava/tio-http.git
22 | cd tio-http/tio-http-common/
23 | mvn clean install -DskipTests -Dgpg.skip
24 | ```
25 | ```
26 | cd ../tio-http-server
27 | mvn clean install -DskipTests -Dgpg.skip
28 | ```
29 | 克隆并构建 TIO Boot: TIO Boot 简化了 TIO 应用程序的引导过程。
30 | ```
31 | git clone https://github.com/litongjava/tio-boot.git
32 | cd tio-boot
33 | mvn clean install -DskipTests -Dgpg.skip
34 | ```
35 | RapidOcr-Java
36 | ```
37 | https://github.com/litongjava/RapidOcr-Java
38 | cd RapidOcr-Java
39 | mvn clean install -DskipTests -Dgpg.skip
40 | ```
41 | 构建本项目
42 | ```
43 | mvn clean package -DskipTests -Dgpg.skip -Pproduction
44 | ```
45 | ## test
46 | - http://localhost/rapid/ocr/test
47 | - http://localhost/rapid/ocr/rec
48 |
49 | ## curl
50 |
51 | ```
52 | curl --location --request POST 'http://localhost/rapid/ocr/rec' \
53 | --header 'Accept: */*' \
54 | --header 'Content-Type: multipart/form-data; boundary=--------------------------865945034672416949878658' \
55 | --form 'file=@"flight_ticket.jpg"' \
56 | --form 'responseFormat="text"'
57 | ```
--------------------------------------------------------------------------------
/rapid-ocr-server/src/main/java/com/litongjava/ai/server/rapid/ocr/RapidOcrServer.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.rapid.ocr;
2 |
3 | import com.litongjava.ai.server.rapid.ocr.config.RapidOcrConfig;
4 | import com.litongjava.tio.boot.TioApplication;
5 |
6 | public class RapidOcrServer {
7 | public static void main(String[] args) throws Exception {
8 |
9 | long start = System.currentTimeMillis();
10 | TioApplication.run(RapidOcrServer.class, new RapidOcrConfig(),args);
11 | long end = System.currentTimeMillis();
12 | System.out.println("started:" + (end - start) + "(ms)");
13 | }
14 |
15 | }
--------------------------------------------------------------------------------
/rapid-ocr-server/src/main/java/com/litongjava/ai/server/rapid/ocr/config/RapidOcrConfig.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.rapid.ocr.config;
2 |
3 | import com.litongjava.ai.server.rapid.ocr.controller.IndexHandler;
4 | import com.litongjava.ai.server.rapid.ocr.controller.RapidOcrHandler;
5 | import com.litongjava.ai.server.rapid.ocr.instance.EngineInstance;
6 | import com.litongjava.tio.boot.context.TioBootConfiguration;
7 | import com.litongjava.tio.boot.server.TioBootServer;
8 | import com.litongjava.tio.http.server.router.HttpReqeustSimpleHandlerRoute;
9 |
10 | public class RapidOcrConfig implements TioBootConfiguration {
11 | @Override
12 | public void config() {
13 | registerRouter();
14 | EngineInstance.init();
15 | }
16 |
17 | public void registerRouter() {
18 | HttpReqeustSimpleHandlerRoute r = TioBootServer.me().getHttpReqeustSimpleHandlerRoute();
19 | // 创建handler
20 | IndexHandler indexHandler = new IndexHandler();
21 | RapidOcrHandler ocrHandler = new RapidOcrHandler();
22 |
23 | // 添加action
24 | r.add("/", indexHandler::index);
25 | r.add("/rapid/ocr/rec", ocrHandler::rec);
26 | r.add("/rapid/ocr/test", ocrHandler::test);
27 |
28 | }
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/rapid-ocr-server/src/main/java/com/litongjava/ai/server/rapid/ocr/controller/IndexHandler.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.rapid.ocr.controller;
2 |
3 |
4 | import com.litongjava.tio.http.common.HttpRequest;
5 | import com.litongjava.tio.http.common.HttpResponse;
6 | import com.litongjava.tio.http.server.util.Resps;
7 |
8 | public class IndexHandler {
9 |
10 | public HttpResponse index(HttpRequest httpRequest) {
11 | return Resps.txt(httpRequest, "rapid-ocr-server");
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/rapid-ocr-server/src/main/java/com/litongjava/ai/server/rapid/ocr/controller/RapidOcrHandler.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.rapid.ocr.controller;
2 |
3 | import java.io.File;
4 | import java.net.URL;
5 |
6 | import com.benjaminwan.ocrlibrary.OcrResult;
7 | import com.litongjava.ai.server.rapid.ocr.instance.EngineInstance;
8 | import com.litongjava.tio.boot.http.TioControllerContext;
9 | import com.litongjava.tio.http.common.HttpRequest;
10 | import com.litongjava.tio.http.common.HttpResponse;
11 | import com.litongjava.tio.http.common.UploadFile;
12 | import com.litongjava.tio.http.server.model.HttpCors;
13 | import com.litongjava.tio.http.server.util.HttpServerResponseUtils;
14 | import com.litongjava.tio.http.server.util.Resps;
15 | import com.litongjava.tio.utils.http.HttpUtils;
16 | import com.litongjava.tio.utils.hutool.FileUtil;
17 | import com.litongjava.tio.utils.hutool.ResourceUtil;
18 | import com.litongjava.tio.utils.resp.RespVo;
19 |
20 | public class RapidOcrHandler {
21 |
22 | public HttpResponse rec(HttpRequest httprequest) throws Exception {
23 | HttpResponse httpResponse = TioControllerContext.getResponse();
24 | HttpServerResponseUtils.enableCORS(httpResponse, new HttpCors());
25 |
26 | UploadFile uploadFile = httprequest.getUploadFile("file");
27 | String responseFormat = httprequest.getParam("responseFormat");
28 | OcrResult runOcr = null;
29 | if (uploadFile != null) {
30 | byte[] fileData = uploadFile.getData();
31 | String name = uploadFile.getName();
32 | File file = new File(System.currentTimeMillis() + "_" + name);
33 |
34 | FileUtil.writeBytes(fileData, file);
35 | runOcr = EngineInstance.runOcr(file.getAbsolutePath());
36 | file.delete();
37 |
38 | if (runOcr != null) {
39 | if ("json".equals(responseFormat)) {
40 | httpResponse = Resps.json(httprequest, RespVo.ok(runOcr));
41 | } else {
42 | httpResponse = Resps.json(httprequest, RespVo.ok(runOcr.getStrRes().trim()));
43 | }
44 |
45 | } else {
46 | httpResponse = Resps.json(httprequest, RespVo.fail());
47 | }
48 |
49 | } else {
50 | httpResponse = Resps.json(httprequest, RespVo.fail("please upload file"));
51 | }
52 |
53 | return httpResponse;
54 | }
55 |
56 | public HttpResponse test(HttpRequest httprequest) throws Exception {
57 | URL resource = ResourceUtil.getResource("images/flight_ticket.jpg");
58 | String fileString = resource.getFile();
59 | File file = new File(fileString);
60 | // OcrResult ocrResult = engine.runOcr(resource.toString());
61 | // OcrResult ocrResult = EngineInstance.runOcr(resource.toURI().getPath());
62 | OcrResult ocrResult = EngineInstance.runOcr(file.getAbsolutePath());
63 | return Resps.json(httprequest, ocrResult.getStrRes().trim());
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/rapid-ocr-server/src/main/java/com/litongjava/ai/server/rapid/ocr/instance/EngineInstance.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.rapid.ocr.instance;
2 |
3 | import com.benjaminwan.ocrlibrary.OcrResult;
4 |
5 | import io.github.mymonstercat.Model;
6 | import io.github.mymonstercat.ocr.InferenceEngine;
7 | import io.github.mymonstercat.ocr.config.HardwareConfig;
8 |
9 | public enum EngineInstance {
10 | INSTANCE;
11 |
12 | private static InferenceEngine engine;
13 |
14 | static {
15 | HardwareConfig onnxConfig = HardwareConfig.getOnnxConfig();
16 | engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4, onnxConfig);
17 | }
18 |
19 | public static OcrResult runOcr(String imagePath) {
20 | return engine.runOcr(imagePath);
21 | }
22 |
23 | // just init
24 | public static void init() {
25 |
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/rapid-ocr-server/src/main/resources/app.properties:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/rapid-ocr-server/src/main/resources/app.properties
--------------------------------------------------------------------------------
/rapid-ocr-server/src/main/resources/images/flight_ticket.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/rapid-ocr-server/src/main/resources/images/flight_ticket.jpg
--------------------------------------------------------------------------------
/rapid-ocr-server/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | ${CONSOLE_LOG_PATTERN}
12 |
13 |
14 |
15 |
16 |
17 |
18 | ${CONSOLE_LOG_PATTERN}
19 |
20 |
21 |
22 | ${LOG_HOME}/project-name-%d{yyyy-MM-dd}.log
23 |
24 | 180
25 |
26 |
27 |
28 | 10MB
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # ai-server
2 |
3 | ## 简介
4 |
5 | ai-server的目的是提供高性能的开箱即用的Api服务,目前提供的API服务有
6 | - paddle-ocr-server
7 | - whisper-asr-server
--------------------------------------------------------------------------------
/s.yaml:
--------------------------------------------------------------------------------
1 | edition: 1.0.0
2 | name: my-framework-app
3 | access: default
4 | services:
5 | framework:
6 | component: fc
7 | props:
8 | region: cn-hangzhou
9 | service:
10 | name: service-ocr
11 | function:
12 | name: func-ocr
13 | description: Initialize
14 | runtime: custom
15 | environmentVariables:
16 | PATH: >-
17 | /opt/java11/bin:/usr/local/bin/apache-maven/bin:/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ruby/bin:/opt/bin:/code:/code/bin
18 | LD_LIBRARY_PATH: >-
19 | /code:/code/lib:/usr/local/lib:/opt/lib:/opt/php8.1/lib:/opt/php8.0/lib:/opt/php7.2/lib
20 | JAVA_HOME: /opt/java11
21 | layers:
22 | - acs:fc:cn-hangzhou:official:layers/Java11/versions/2
23 | memorySize: 128
24 | cpu: 0.05
25 | timeout: 60
26 | codeUri: ./
27 | diskSize: 512
28 | caPort: 80
29 | customRuntimeConfig:
30 | command:
31 | - java
32 | args:
33 | - '-jar'
34 | - paddle-ocr/paddle-ocr-server/target/paddle-ocr-server-1.0.1.jar
35 | triggers:
36 | - name: httpTrigger
37 | type: http
38 | config:
39 | authType: anonymous
40 | methods:
41 | - GET
42 | - POST
43 | - PUT
44 | - DELETE
45 | - HEAD
46 | - PATCH
--------------------------------------------------------------------------------
/whisper-asr/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | com.litongjava
5 | ai-server
6 | 1.0.0
7 |
8 | whisper-asr
9 | pom
10 |
11 | whisper-asr-server
12 | whisper-asr-service
13 |
14 |
--------------------------------------------------------------------------------
/whisper-asr/readme.md:
--------------------------------------------------------------------------------
1 | ## whipser-asr-server
2 | ### 在线文档
3 | https://apifox.com/apidoc/shared-98cc5675-f1a3-4250-a940-cfe060854ef4/api-121475073
4 |
5 | ### 接口
6 | #### 测试接口
7 | 测试接口无需上传文件,使用内部自带文件进行识别
8 | 访问地址:http://localhost/whispser/asr/test
9 | 返回数据
10 | ```
11 | {
12 | "data": [
13 | {
14 | "end": 1088,
15 | "sentence": " And so, my fellow Americans, ask not what your country can do for you, ask what you can do for your country.",
16 | "start": 0
17 | }
18 | ],
19 | "ok": true
20 | }
21 | ```
22 | #### 识别接口
23 | 识别接口需要上传文件音频文件
24 | 访问地址:http://localhost/whispser/asr/rec
25 | ##### POST /whispser/asr/test
26 |
27 | > Body Parameters
28 |
29 | ```yaml
30 | file: filedata
31 | inputType: wav
32 | outputType: default
33 |
34 | ```
35 |
36 | ###### Params
37 |
38 | |Name|Location|Type|Required|Description|
39 | |---|---|---|---|---|
40 | |body|body|object| no |none|
41 | |» file|body|string(binary)| yes |上传的音频文件|
42 | |» inputType|body|string| no |上传的音频格式wav和mp3|
43 | |» outputType|body|string| no |返回的文本格式,支持default,irc,vtt,srt|
44 | |» outputFormat|body|string| no |返回的数据格式,输出的格式,默认是json,如果需要字幕数据输出txt|
45 |
46 | > Response Examples
47 |
48 | > 成功
49 |
50 | ```json
51 | {
52 | "data": [
53 | {
54 | "end": 800,
55 | "sentence": " And so my fellow Americans, ask not what your country can do for you.",
56 | "start": 0
57 | },
58 | {
59 | "end": 1100,
60 | "sentence": " Ask what you can do for your country.",
61 | "start": 800
62 | }
63 | ],
64 | "ok": true
65 | }
66 | ```
67 | ### required
68 | /usr/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.21'
69 | ### build
70 | ```
71 | # Set java version
72 | export JAVA_HOME=/usr/java/jdk-11.0.8
73 | export PATH=$JAVA_HOME/bin:$PATH
74 |
75 | #build jar
76 | mvn clean install -DskipTests -Dgpg.skip -Pdevelopment
77 | mvn clean package -DskipTests -Dgpg.skip -Pproduction -pl whisper-asr-server
78 | ```
79 | ### run
80 | ```
81 | java -jar whisper-asr-server/target/whisper-asr-server-1.0.1.jar
82 | ```
83 | The default loaded model is `/root/.cache/whisper/ggml-base.en.bin`
84 | downlaod model form huggingface https://huggingface.co/ggerganov/whisper.cpp
85 |
86 |
87 | specify the model name
88 | ```
89 | java -jar whisper-asr-server/target/whisper-asr-server-1.0.1.jar --model.name=ggml-large.bin
90 | ```
91 |
92 | ### convert file with ffmpeg
93 | Note that the server currently runs only with 16-bit audio files, so make sure to convert your input before running the tool.
94 | For example, you can use `ffmpeg` like this:
95 |
96 | ```java
97 | ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
98 | ```
99 |
100 | ```
101 | ffmpeg -i input.mp4 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
102 | ```
103 |
104 | ### 构建Docker镜像
105 | ### 封装镜像
106 |
107 | build
108 |
109 | ```
110 | docker build -f docker/1.0.1 -t litongjava/whisper-asr-server:1.0.1 .
111 | ```
112 |
113 | run
114 |
115 | ```
116 | docker run -dit -p 8080:80 litongjava/whisper-asr-server:1.0.1
117 | ```
118 |
119 | test
120 | ```
121 | curl -v http://localhost:8080/whispser/asr/test
122 | ```
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/docker/1.0.0-base.en:
--------------------------------------------------------------------------------
1 | FROM openjdk:11-jdk-slim
2 |
3 | WORKDIR /app
4 |
5 | COPY ./target/whisper-asr-server-1.0.0.jar /app/
6 |
7 | COPY models/ggml-base.en.bin /root/.cache/whisper/ggml-base.en.bin
8 |
9 | CMD ["java", "-jar", "whisper-asr-server-1.0.0.jar"]
10 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/docker/1.0.0-large:
--------------------------------------------------------------------------------
1 | # Use litongjava/jdk:8u211 as the base image
2 | FROM litongjava/jdk:8u211
3 |
4 | # Set the working directory in the container
5 | WORKDIR /app
6 |
7 | # Copy the jar file into the container
8 | COPY ./target/whisper-asr-server-1.0.1.jar /app/
9 | COPY models/ggml-large.bin /root/.cache/whisper/ggml-large.bin
10 |
11 | # Command to run the jar file
12 | CMD ["/usr/java/jdk1.8.0_211/bin/java", "-jar", "whisper-asr-server-1.0.1.jar","--model.name=ggml-large.bin"]
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/docker/1.0.1:
--------------------------------------------------------------------------------
1 | #FROM litongjava/jdk:8u211
2 | FROM openjdk:8-jdk-slim
3 |
4 | # Set the working directory in the container
5 | WORKDIR /app
6 |
7 |
8 |
9 | # Copy the jar file into the container
10 | COPY ./target/whisper-asr-server-1.0.1.jar /app/
11 | COPY models/ggml-base.en.bin /root/.cache/whisper/ggml-base.en.bin
12 | COPY models/ggml-large.bin /root/.cache/whisper/ggml-large.bin
13 |
14 |
15 | # Command to run the jar file
16 | CMD ["java", "-jar", "whisper-asr-server-1.0.1.jar"]
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/docker/readme.md:
--------------------------------------------------------------------------------
1 | ```
2 | docker build -t litongjava/whisper-asr-server:1.0.1 -f docker/1.0.1 .
3 | ```
4 |
5 | run
6 | ```
7 | docker run -p 80:80 --rm litongjava/whisper-asr-server:1.0.1
8 | ```
9 | or
10 | ```
11 | docker run -p 80:80 --rm litongjava/whisper-asr-server:1.0.1 java -jar whisper-asr-server-1.0.1.jar
12 | ```
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/models/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/litongjava/ai-server/3d740e12cf2d193590a48a735c515e369da12174/whisper-asr/whisper-asr-server/models/readme.md
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | com.litongjava
5 | whisper-asr
6 | 1.0.0
7 |
8 | whisper-asr-server
9 | 1.0.1
10 |
11 | UTF-8
12 | 1.8
13 | ${java.version}
14 | ${java.version}
15 | 23.1.1
16 | 1.3.3
17 | 1.18.30
18 | 1.1.9
19 | ocr-server
20 | com.litongjava.aio.server.tio.WhisperAsrServer
21 |
22 |
23 |
24 | com.litongjava
25 | whisper-asr-service
26 | 1.0.0
27 |
28 |
29 | com.litongjava
30 | tio-boot
31 | ${tio.boot.version}
32 |
33 |
34 | org.projectlombok
35 | lombok
36 | ${lombok-version}
37 | true
38 | provided
39 |
40 |
41 |
42 |
43 |
44 | development
45 |
46 | true
47 |
48 |
49 |
50 | ch.qos.logback
51 | logback-classic
52 | 1.2.3
53 |
54 |
55 |
56 |
57 |
58 |
59 | production
60 |
61 |
62 | ch.qos.logback
63 | logback-classic
64 | 1.2.3
65 |
66 |
67 |
68 |
69 |
70 | org.apache.maven.plugins
71 | maven-jar-plugin
72 | 3.2.0
73 |
74 |
75 | org.apache.maven.plugins
76 | maven-assembly-plugin
77 | 3.1.1
78 |
79 |
80 |
81 | ${main.class}
82 |
83 |
84 |
85 | jar-with-dependencies
86 |
87 | false
88 |
89 |
90 |
91 | make-assembly
92 | package
93 |
94 | single
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 | native
104 |
105 |
106 |
107 | org.slf4j
108 | slf4j-jdk14
109 | 1.7.31
110 |
111 |
112 |
113 | org.graalvm.sdk
114 | graal-sdk
115 | ${graalvm.version}
116 | provided
117 |
118 |
119 |
120 | ${final.name}
121 |
122 |
123 | org.graalvm.nativeimage
124 | native-image-maven-plugin
125 | 21.2.0
126 |
127 |
128 |
129 | native-image
130 |
131 | package
132 |
133 |
134 |
135 | false
136 | ${project.build.finalName}
137 | ${main.class}
138 |
139 | -H:+RemoveSaturatedTypeFlows
140 | --allow-incomplete-classpath
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/WhisperAsrServer.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.aio.server.tio;
2 |
3 | import com.litongjava.jfinal.aop.annotation.AComponentScan;
4 | import com.litongjava.tio.boot.TioApplication;
5 |
6 | @AComponentScan
7 | public class WhisperAsrServer {
8 |
9 | public static void main(String[] args) throws Exception {
10 | long start = System.currentTimeMillis();
11 | // 初始化服务器并启动服务器
12 | TioApplication.run(WhisperAsrServer.class, args);
13 | long end = System.currentTimeMillis();
14 | System.out.println("started:" + (end - start) + "(ms)");
15 | }
16 | }
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/config/WhisperAsrConfig.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.aio.server.tio.config;
2 |
3 | import com.litongjava.ai.server.property.WhiserAsrProperties;
4 | import com.litongjava.jfinal.aop.annotation.ABean;
5 | import com.litongjava.jfinal.aop.annotation.AConfiguration;
6 | import com.litongjava.tio.utils.environment.EnvironmentUtils;
7 |
8 | import lombok.extern.slf4j.Slf4j;
9 |
10 | @AConfiguration
11 | @Slf4j
12 | public class WhisperAsrConfig {
13 |
14 | @ABean
15 | public WhiserAsrProperties aiServiceProperties() {
16 | WhiserAsrProperties aiServiceProperties = new WhiserAsrProperties();
17 | String modelName = EnvironmentUtils.get("model.name");
18 | if (modelName != null) {
19 | log.info("modelName:{}", modelName);
20 | aiServiceProperties.setModelName(modelName);
21 | }
22 |
23 | return aiServiceProperties;
24 |
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/controller/EnviormentController.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.aio.server.tio.controller;
2 |
3 | import com.litongjava.jfinal.aop.Aop;
4 | import com.litongjava.tio.http.common.HttpRequest;
5 | import com.litongjava.tio.http.common.HttpResponse;
6 | import com.litongjava.tio.http.server.annotation.EnableCORS;
7 | import com.litongjava.tio.http.server.annotation.RequestPath;
8 | import com.litongjava.tio.http.server.util.Resps;
9 | import com.litongjava.tio.utils.environment.EnvironmentUtils;
10 | import com.litongjava.tio.utils.resp.Resp;
11 |
12 | @EnableCORS
13 | @RequestPath("/env")
14 | public class EnviormentController {
15 | @RequestPath("/{key}")
16 | public HttpResponse get(String key, HttpRequest request) {
17 | return Resps.json(request, Resp.ok(EnvironmentUtils.get(key)));
18 | }
19 |
20 | @RequestPath("/beans")
21 | public HttpResponse beans(HttpRequest request) {
22 | String[] beans = Aop.beans();
23 | return Resps.json(request, Resp.ok(beans));
24 |
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/controller/IndexController.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.aio.server.tio.controller;
2 |
3 | import com.litongjava.tio.http.server.annotation.EnableCORS;
4 | import com.litongjava.tio.http.server.annotation.RequestPath;
5 |
6 | @EnableCORS
7 | @RequestPath(value = "/")
8 | public class IndexController {
9 | @RequestPath()
10 | public String respText() {
11 | return "whisper-asr-server";
12 | }
13 | }
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/controller/SystemController.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.aio.server.tio.controller;
2 |
3 | import java.util.Properties;
4 |
5 | import com.litongjava.tio.http.common.HttpRequest;
6 | import com.litongjava.tio.http.common.HttpResponse;
7 | import com.litongjava.tio.http.server.annotation.EnableCORS;
8 | import com.litongjava.tio.http.server.annotation.RequestPath;
9 | import com.litongjava.tio.http.server.util.Resps;
10 | import com.litongjava.tio.utils.resp.Resp;
11 |
12 | import cn.hutool.core.util.SystemPropsUtil;
13 | import cn.hutool.system.RuntimeInfo;
14 | import cn.hutool.system.SystemUtil;
15 |
16 | @EnableCORS
17 | @RequestPath("/system")
18 | public class SystemController {
19 |
20 | @RequestPath("/props")
21 | public HttpResponse props(HttpRequest request) {
22 | Properties props = SystemPropsUtil.getProps();
23 | return Resps.json(request, Resp.ok(props));
24 | }
25 |
26 | @RequestPath("/runtimeInfo")
27 | public HttpResponse runtimeInfo(HttpRequest request) {
28 | RuntimeInfo runtimeInfo = SystemUtil.getRuntimeInfo();
29 | return Resps.json(request, Resp.ok(runtimeInfo));
30 | }
31 |
32 | @RequestPath("/availableProcessors")
33 | public HttpResponse availableProcessors(HttpRequest request) {
34 | int availableProcessors = Runtime.getRuntime().availableProcessors();
35 | return Resps.json(request, Resp.ok(availableProcessors));
36 | }
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/src/main/java/com/litongjava/aio/server/tio/controller/WhisperAsrController.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.aio.server.tio.controller;
2 |
3 | import java.net.URL;
4 | import java.util.List;
5 |
6 | import com.litongjava.ai.server.model.WhisperSegment;
7 | import com.litongjava.ai.server.service.WhisperCppBaseService;
8 | import com.litongjava.ai.server.service.WhisperCppLargeService;
9 | import com.litongjava.ai.server.service.WhisperCppService;
10 | import com.litongjava.jfinal.aop.Aop;
11 | import com.litongjava.tio.http.common.HttpRequest;
12 | import com.litongjava.tio.http.common.HttpResponse;
13 | import com.litongjava.tio.http.common.UploadFile;
14 | import com.litongjava.tio.http.server.annotation.EnableCORS;
15 | import com.litongjava.tio.http.server.annotation.RequestPath;
16 | import com.litongjava.tio.http.server.util.Resps;
17 | import com.litongjava.tio.utils.resp.Resp;
18 |
19 | import cn.hutool.core.util.ClassUtil;
20 | import io.github.givimad.whisperjni.WhisperFullParams;
21 |
22 | @EnableCORS
23 | @RequestPath("/whispser/asr")
24 | public class WhisperAsrController {
25 | private WhisperCppService whisperCppService = Aop.get(WhisperCppService.class);
26 | private WhisperCppBaseService whisperCppBaseService = Aop.get(WhisperCppBaseService.class);
27 | private WhisperCppLargeService whisperCppLargeService = Aop.get(WhisperCppLargeService.class);
28 |
29 | @RequestPath(value = "/rec")
30 | public HttpResponse index(HttpRequest request, UploadFile file, String inputType, String outputType,
31 | String outputFormat, WhisperFullParams params) throws Exception {
32 | if (file != null) {
33 | Object data = whisperCppService.index(file.getData(), inputType, outputType, params);
34 | if ("txt".equals(outputFormat)) {
35 | if (data instanceof String) {
36 | return Resps.txt(request, (String) data);
37 | }
38 |
39 | } else {
40 | return Resps.json(request, Resp.ok(data));
41 | }
42 |
43 | } else {
44 | return Resps.json(request, Resp.fail("uplod file can't be null"));
45 | }
46 | return Resps.json(request, Resp.fail("unknow error"));
47 | }
48 |
49 | @RequestPath(value = "/rec/base")
50 | public HttpResponse recBase(HttpRequest request, UploadFile file, String inputType, String outputType,
51 | String outputFormat, WhisperFullParams params) throws Exception {
52 | if (file != null) {
53 | Object data = whisperCppBaseService.index(file.getData(), inputType, outputType, params);
54 | if ("txt".equals(outputFormat)) {
55 | if (data instanceof String) {
56 | return Resps.txt(request, (String) data);
57 | }
58 |
59 | } else {
60 | return Resps.json(request, Resp.ok(data));
61 | }
62 |
63 | } else {
64 | return Resps.json(request, Resp.fail("uplod file can't be null"));
65 | }
66 | return Resps.json(request, Resp.fail("unknow error"));
67 | }
68 |
69 | @RequestPath(value = "/rec/large")
70 | public HttpResponse recLarge(UploadFile file, String inputType, String outputType, String outputFormat,
71 | HttpRequest request, WhisperFullParams params) throws Exception {
72 | if (file != null) {
73 | Object data = whisperCppLargeService.index(file.getData(), inputType, outputType, params);
74 | if ("txt".equals(outputFormat)) {
75 | if (data instanceof String) {
76 | return Resps.txt(request, (String) data);
77 | }
78 |
79 | } else {
80 | return Resps.json(request, Resp.ok(data));
81 | }
82 |
83 | } else {
84 | return Resps.json(request, Resp.fail("uplod file can't be null"));
85 | }
86 | return Resps.json(request, Resp.fail("unknow error"));
87 | }
88 |
89 | @RequestPath("/test")
90 | public HttpResponse test(HttpRequest request, WhisperFullParams params) {
91 | // String urlStr = "https://raw.githubusercontent.com/litongjava/whisper.cpp/master/samples/jfk.wav";
92 | URL resource = ClassUtil.getClassLoader().getResource("audios/jfk.wav");
93 | if (resource != null) {
94 | List list = whisperCppService.index(resource, params);
95 | return Resps.json(request, Resp.ok(list));
96 | }
97 | return null;
98 | }
99 |
100 | @RequestPath("/test/base")
101 | public HttpResponse testBase(HttpRequest request, WhisperFullParams params) {
102 | // String urlStr = "https://raw.githubusercontent.com/litongjava/whisper.cpp/master/samples/jfk.wav";
103 | URL resource = ClassUtil.getClassLoader().getResource("audios/jfk.wav");
104 | if (resource != null) {
105 | List list = whisperCppBaseService.index(resource, params);
106 | return Resps.json(request, Resp.ok(list));
107 | }
108 | return null;
109 | }
110 |
111 | @RequestPath("/test/large")
112 | public HttpResponse testLarge(HttpRequest request, WhisperFullParams params) {
113 | // String urlStr = "https://raw.githubusercontent.com/litongjava/whisper.cpp/master/samples/jfk.wav";
114 | URL resource = ClassUtil.getClassLoader().getResource("audios/jfk.wav");
115 | if (resource != null) {
116 | List list = whisperCppLargeService.index(resource, params);
117 | return Resps.json(request, Resp.ok(list));
118 | }
119 | return null;
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-server/src/main/resources/app.properties:
--------------------------------------------------------------------------------
1 | # 设置最大请求大小(包含所有文件)单位 字节,这里设置为1G
2 | http.multipart.max-request-size=73741824
3 | # 设置最大文件大小,单位字节,这里设置为1G
4 | http.multipart.max-file-size=73741824
5 |
6 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | com.litongjava
5 | whisper-asr
6 | 1.0.0
7 |
8 | whisper-asr-service
9 |
10 | UTF-8
11 | 1.8
12 | ${java.version}
13 | ${java.version}
14 | 1.18.30
15 | 1.2.3
16 | 5.8.11
17 | 1.5.1-java8
18 | 1.9.5.4
19 | 1.1.8
20 |
21 |
22 |
23 | ch.qos.logback
24 | logback-classic
25 | ${logback.version}
26 |
27 |
28 | org.projectlombok
29 | lombok
30 | ${lombok.version}
31 | provided
32 |
33 |
34 |
35 | cn.hutool
36 | hutool-all
37 | ${hutool.version}
38 |
39 |
40 |
41 |
42 | io.github.givimad
43 | whisper-jni
44 | ${whisper.jni.version}
45 |
46 |
47 |
48 | com.googlecode.soundlibs
49 | mp3spi
50 | ${mp3spi.version}
51 |
52 |
53 |
54 | com.litongjava
55 | jfinal-aop
56 | ${jfinal.aop.version}
57 |
58 |
59 |
60 | junit
61 | junit
62 | 4.12
63 | test
64 |
65 |
66 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/enumeration/AudioType.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.enumeration;
2 |
3 | import cn.hutool.core.util.StrUtil;
4 |
5 | public enum AudioType {
6 | WAV("wav"), MP3("mp3");
7 |
8 | private final String type;
9 |
10 | AudioType(String type) {
11 | this.type = type;
12 | }
13 |
14 | public String getType() {
15 | return type;
16 | }
17 |
18 | public static AudioType fromString(String text) {
19 | if (StrUtil.isEmptyIfStr(text)) {
20 | return AudioType.WAV;
21 | }
22 |
23 | for (AudioType audioType : AudioType.values()) {
24 | if (audioType.type.equalsIgnoreCase(text)) {
25 | return audioType;
26 | }
27 | }
28 | throw new IllegalArgumentException("No enum constant for text " + text);
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/enumeration/TextType.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.enumeration;
2 |
3 | import cn.hutool.core.util.StrUtil;
4 |
5 | public enum TextType {
6 | DEFAULT("default"), LRC("lrc"), VTT("vtt"), SRT("srt");
7 |
8 | private final String type;
9 |
10 | TextType(String type) {
11 | this.type = type;
12 | }
13 |
14 | public String getType() {
15 | return type;
16 | }
17 |
18 | // 根据字符串值查找相应的枚举项
19 | public static TextType fromString(String text) {
20 | if (StrUtil.isEmptyIfStr(text)) {
21 | return TextType.DEFAULT;
22 | }
23 | for (TextType audioType : TextType.values()) {
24 | if (audioType.type.equalsIgnoreCase(text)) {
25 | return audioType;
26 | }
27 | }
28 | throw new IllegalArgumentException("No enum constant for text " + text);
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/model/WhisperSegment.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.model;
2 |
3 | /**
4 | * Created by litonglinux@qq.com on 10/21/2023_7:48 AM
5 | */
6 | public class WhisperSegment {
7 | private long start, end;
8 | private String sentence;
9 |
10 | public WhisperSegment() {
11 | }
12 |
13 | public WhisperSegment(long start, long end, String sentence) {
14 | this.start = start;
15 | this.end = end;
16 | this.sentence = sentence;
17 | }
18 |
19 | public long getStart() {
20 | return start;
21 | }
22 |
23 | public long getEnd() {
24 | return end;
25 | }
26 |
27 | public String getSentence() {
28 | return sentence;
29 | }
30 |
31 | public void setStart(long start) {
32 | this.start = start;
33 | }
34 |
35 | public void setEnd(long end) {
36 | this.end = end;
37 | }
38 |
39 | public void setSentence(String sentence) {
40 | this.sentence = sentence;
41 | }
42 |
43 | @Override
44 | public String toString() {
45 | return "[" + start + " --> " + end + "]:" + sentence;
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/property/WhiserAsrProperties.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.property;
2 |
3 | import lombok.Data;
4 | import lombok.NoArgsConstructor;
5 |
6 | @Data
7 | @NoArgsConstructor
8 | public class WhiserAsrProperties {
9 | private String modelName = "ggml-base.en.bin";
10 | }
11 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/service/TextService.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.service;
2 |
3 | import java.io.IOException;
4 | import java.util.List;
5 |
6 | import com.litongjava.ai.server.model.WhisperSegment;
7 |
8 | public class TextService {
9 | public StringBuffer generateSrt(List segments) throws IOException {
10 | StringBuffer stringBuffer = new StringBuffer();
11 | int index = 1;
12 | for (WhisperSegment segment : segments) {
13 | String startTime = convertToSRTTime(segment.getStart() * 10);
14 | String endTime = convertToSRTTime(segment.getEnd() * 10);
15 | stringBuffer.append(index + "\n");
16 | stringBuffer.append(startTime + " --> " + endTime + "\n");
17 | stringBuffer.append(segment.getSentence() + "\n\n");
18 | index++;
19 | }
20 | return stringBuffer;
21 | }
22 |
23 | public String convertToSRTTime(long milliseconds) {
24 | int hours = (int) (milliseconds / (1000 * 60 * 60));
25 | int minutes = (int) ((milliseconds % (1000 * 60 * 60)) / (1000 * 60));
26 | int seconds = (int) ((milliseconds % (1000 * 60)) / 1000);
27 | int millis = (int) (milliseconds % 1000);
28 | return String.format("%02d:%02d:%02d,%03d", hours, minutes, seconds, millis);
29 | }
30 |
31 | public StringBuffer generateVtt(List segments) throws IOException {
32 | StringBuffer stringBuffer = new StringBuffer();
33 |
34 | // Write the WebVTT header
35 | stringBuffer.append("WEBVTT\n\n");
36 |
37 | int counter = 1;
38 | for (WhisperSegment segment : segments) {
39 | // Convert the start and end times from milliseconds to hh:mm:ss.sss format
40 | String startTime = millisecondsToVttTime(segment.getStart() * 10);
41 | String endTime = millisecondsToVttTime(segment.getEnd() * 10);
42 |
43 | // Write the timestamp and the sentence to the file
44 | stringBuffer.append(counter++ + "\n");
45 | stringBuffer.append(startTime + " --> " + endTime + "\n");
46 | stringBuffer.append(segment.getSentence() + "\n\n");
47 | }
48 | return stringBuffer;
49 | }
50 |
51 | private String millisecondsToVttTime(long milliseconds) {
52 | long hours = milliseconds / 3600000;
53 | long minutes = (milliseconds % 3600000) / 60000;
54 | long seconds = (milliseconds % 60000) / 1000;
55 | long millis = milliseconds % 1000;
56 | return String.format("%02d:%02d:%02d.%03d", hours, minutes, seconds, millis);
57 | }
58 |
59 | public StringBuffer generateLrc(List segments) throws IOException {
60 | StringBuffer stringBuffer = new StringBuffer();
61 | for (WhisperSegment segment : segments) {
62 | String timestamp = millisecondsToLrcTime(segment.getStart() * 10);
63 | stringBuffer.append("[" + timestamp + "]" + segment.getSentence() + "\n");
64 | }
65 | return stringBuffer;
66 | }
67 |
68 | private String millisecondsToLrcTime(long milliseconds) {
69 | long totalSeconds = milliseconds / 1000;
70 | long minutes = totalSeconds / 60;
71 | long seconds = totalSeconds % 60;
72 | long millis = milliseconds % 1000;
73 | return String.format("%02d:%02d.%02d", minutes, seconds, millis / 10);
74 | }
75 |
76 | }
77 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/service/WhisperCppBaseService.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.service;
2 |
3 | import java.io.IOException;
4 | import java.net.URL;
5 | import java.util.List;
6 |
7 | import javax.sound.sampled.UnsupportedAudioFileException;
8 |
9 | import com.litongjava.ai.server.enumeration.AudioType;
10 | import com.litongjava.ai.server.enumeration.TextType;
11 | import com.litongjava.ai.server.model.WhisperSegment;
12 | import com.litongjava.ai.server.single.LocalBaseWhisper;
13 | import com.litongjava.ai.server.utils.Mp3Util;
14 | import com.litongjava.ai.server.utils.WhisperAudioUtils;
15 | import com.litongjava.jfinal.aop.Aop;
16 |
17 | import io.github.givimad.whisperjni.WhisperFullParams;
18 | import lombok.extern.slf4j.Slf4j;
19 |
20 | /**
21 | * 使用Base模型
22 | * @author Tong Li
23 | *
24 | */
25 | @Slf4j
26 | public class WhisperCppBaseService {
27 | private TextService textService = Aop.get(TextService.class);
28 |
29 | public List index(URL url, WhisperFullParams params) {
30 |
31 | try {
32 | float[] floats = WhisperAudioUtils.toAudioData(url);
33 | log.info("floats size:{}", floats.length);
34 |
35 | List segments = LocalBaseWhisper.INSTANCE.fullTranscribeWithTime(floats, floats.length, params);
36 | log.info("size:{}", segments.size());
37 | return segments;
38 | } catch (UnsupportedAudioFileException | IOException e) {
39 | e.printStackTrace();
40 | }
41 |
42 | return null;
43 |
44 | }
45 |
46 | public List index(byte[] data, WhisperFullParams params) {
47 | float[] floats = WhisperAudioUtils.toFloat(data);
48 | return LocalBaseWhisper.INSTANCE.fullTranscribeWithTime(floats, params);
49 | }
50 |
51 | public StringBuffer outputSrt(URL url, WhisperFullParams params) throws IOException {
52 | List segments = this.index(url, params);
53 | return textService.generateSrt(segments);
54 | }
55 |
56 | public StringBuffer outputVtt(URL url, WhisperFullParams params) throws IOException {
57 | List segments = this.index(url, params);
58 | return textService.generateVtt(segments);
59 | }
60 |
61 | public StringBuffer outputLrc(URL url, WhisperFullParams params) throws IOException {
62 | List segments = this.index(url, params);
63 | return textService.generateLrc(segments);
64 | }
65 |
66 | public Object index(byte[] data, String inputType, String outputType)
67 | throws IOException, UnsupportedAudioFileException {
68 | return index(data, inputType, outputType, null);
69 | }
70 |
71 | public Object index(byte[] data, String inputType, String outputType, WhisperFullParams params)
72 | throws IOException, UnsupportedAudioFileException {
73 | log.info("intputType:{},outputType:{}", inputType, outputType);
74 | AudioType audioType = AudioType.fromString(inputType);
75 | TextType textType = TextType.fromString(outputType);
76 | if (audioType == AudioType.MP3) {
77 | // 进行格式转换
78 | log.info("进行格式转换:{}", "mp3");
79 | data = Aop.get(Mp3Util.class).convertToWav(data, 16000, 1);
80 | }
81 | List segments = index(data, params);
82 | if (textType == TextType.SRT) {
83 | return textService.generateSrt(segments).toString();
84 | } else if (textType == TextType.VTT) {
85 | return textService.generateVtt(segments).toString();
86 | } else if (textType == TextType.LRC) {
87 | return textService.generateLrc(segments).toString();
88 | }
89 | return segments;
90 | }
91 | }
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/service/WhisperCppJni.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.service;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | import com.litongjava.ai.server.model.WhisperSegment;
9 |
10 | import io.github.givimad.whisperjni.WhisperContext;
11 | import io.github.givimad.whisperjni.WhisperFullParams;
12 | import io.github.givimad.whisperjni.WhisperJNI;
13 |
14 | public class WhisperCppJni {
15 |
16 | private WhisperJNI whisper = null;
17 | private WhisperContext ctx = null;
18 |
19 | public void initContext(Path path) throws IOException {
20 | whisper = new WhisperJNI();
21 | ctx = whisper.init(path);
22 | }
23 |
24 | public List fullTranscribeWithTime(WhisperFullParams params, float[] samples, int numSamples) {
25 | int result = whisper.full(ctx, params, samples, numSamples);
26 | if (result != 0) {
27 | throw new RuntimeException("Transcription failed with code " + result);
28 | }
29 | int numSegments = whisper.fullNSegments(ctx);
30 | ArrayList segments = new ArrayList(numSegments);
31 |
32 | for (int i = 0; i < numSegments; i++) {
33 | String text = whisper.fullGetSegmentText(ctx, i);
34 | long start = whisper.fullGetSegmentTimestamp0(ctx, i);
35 | long end = whisper.fullGetSegmentTimestamp1(ctx, i);
36 | segments.add(new WhisperSegment(start, end, text));
37 | }
38 | return segments;
39 | }
40 |
41 | public void close() {
42 | ctx.close();
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/service/WhisperCppLargeService.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.service;
2 |
3 | import java.io.IOException;
4 | import java.net.URL;
5 | import java.util.List;
6 |
7 | import javax.sound.sampled.UnsupportedAudioFileException;
8 |
9 | import com.litongjava.ai.server.enumeration.AudioType;
10 | import com.litongjava.ai.server.enumeration.TextType;
11 | import com.litongjava.ai.server.model.WhisperSegment;
12 | import com.litongjava.ai.server.single.LocalLargeWhisper;
13 | import com.litongjava.ai.server.utils.Mp3Util;
14 | import com.litongjava.ai.server.utils.WhisperAudioUtils;
15 | import com.litongjava.jfinal.aop.Aop;
16 |
17 | import io.github.givimad.whisperjni.WhisperFullParams;
18 | import lombok.extern.slf4j.Slf4j;
19 |
20 | /**
21 | * 使用Base模型
22 | * @author Tong Li
23 | *
24 | */
25 | @Slf4j
26 | public class WhisperCppLargeService {
27 | private TextService textService = Aop.get(TextService.class);
28 |
29 | public List index(URL url,WhisperFullParams params) {
30 |
31 | try {
32 | float[] floats = WhisperAudioUtils.toAudioData(url);
33 | log.info("floats size:{}", floats.length);
34 |
35 | List segments = LocalLargeWhisper.INSTANCE.fullTranscribeWithTime(floats, floats.length,params);
36 | log.info("size:{}", segments.size());
37 | return segments;
38 | } catch (UnsupportedAudioFileException | IOException e) {
39 | e.printStackTrace();
40 | }
41 |
42 | return null;
43 |
44 | }
45 |
46 | public List index(byte[] data, WhisperFullParams params) {
47 | float[] floats = WhisperAudioUtils.toFloat(data);
48 | return LocalLargeWhisper.INSTANCE.fullTranscribeWithTime(floats, params);
49 | }
50 |
51 | public StringBuffer outputSrt(URL url,WhisperFullParams params) throws IOException {
52 | List segments = this.index(url,params);
53 | return textService.generateSrt(segments);
54 | }
55 |
56 | public StringBuffer outputVtt(URL url,WhisperFullParams params) throws IOException {
57 | List segments = this.index(url,params);
58 | return textService.generateVtt(segments);
59 | }
60 |
61 | public StringBuffer outputLrc(URL url,WhisperFullParams params) throws IOException {
62 | List segments = this.index(url,params);
63 | return textService.generateLrc(segments);
64 | }
65 |
66 | public Object index(byte[] data, String inputType, String outputType)
67 | throws IOException, UnsupportedAudioFileException {
68 | return index(data, inputType, outputType, null);
69 | }
70 |
71 | public Object index(byte[] data, String inputType, String outputType, WhisperFullParams params)
72 | throws IOException, UnsupportedAudioFileException {
73 | log.info("intputType:{},outputType:{}", inputType, outputType);
74 | AudioType audioType = AudioType.fromString(inputType);
75 | TextType textType = TextType.fromString(outputType);
76 | if (audioType == AudioType.MP3) {
77 | // 进行格式转换
78 | log.info("进行格式转换:{}", "mp3");
79 | data = Aop.get(Mp3Util.class).convertToWav(data, 16000, 1);
80 | }
81 | List segments = index(data, params);
82 | if (textType == TextType.SRT) {
83 | return textService.generateSrt(segments).toString();
84 | } else if (textType == TextType.VTT) {
85 | return textService.generateVtt(segments).toString();
86 | } else if (textType == TextType.LRC) {
87 | return textService.generateLrc(segments).toString();
88 | }
89 | return segments;
90 | }
91 | }
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/service/WhisperCppService.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.service;
2 |
3 | import java.io.IOException;
4 | import java.net.URL;
5 | import java.util.List;
6 |
7 | import javax.sound.sampled.UnsupportedAudioFileException;
8 |
9 | import com.litongjava.ai.server.enumeration.AudioType;
10 | import com.litongjava.ai.server.enumeration.TextType;
11 | import com.litongjava.ai.server.model.WhisperSegment;
12 | import com.litongjava.ai.server.single.LocalLargeWhisper;
13 | import com.litongjava.ai.server.single.LocalWhisper;
14 | import com.litongjava.ai.server.utils.Mp3Util;
15 | import com.litongjava.ai.server.utils.WhisperAudioUtils;
16 | import com.litongjava.jfinal.aop.Aop;
17 |
18 | import io.github.givimad.whisperjni.WhisperFullParams;
19 | import lombok.extern.slf4j.Slf4j;
20 |
21 | /**
22 | * 加载自定义配置的模型
23 | * @author Tong Li
24 | *
25 | */
26 | @Slf4j
27 | public class WhisperCppService {
28 | private TextService textService = Aop.get(TextService.class);
29 |
30 | public List index(URL url,WhisperFullParams params) {
31 |
32 | try {
33 | float[] floats = WhisperAudioUtils.toAudioData(url);
34 | log.info("floats size:{}", floats.length);
35 |
36 | List segments = LocalWhisper.INSTANCE.fullTranscribeWithTime(floats, floats.length, params);
37 | log.info("size:{}", segments.size());
38 | return segments;
39 | } catch (UnsupportedAudioFileException | IOException e) {
40 | e.printStackTrace();
41 | }
42 |
43 | return null;
44 |
45 | }
46 |
47 | public List index(byte[] data, WhisperFullParams params) {
48 | float[] floats = WhisperAudioUtils.toFloat(data);
49 | return LocalLargeWhisper.INSTANCE.fullTranscribeWithTime(floats, params);
50 | }
51 |
52 | public StringBuffer outputSrt(URL url,WhisperFullParams params) throws IOException {
53 | List segments = this.index(url,params);
54 | return textService.generateSrt(segments);
55 | }
56 |
57 | public StringBuffer outputVtt(URL url,WhisperFullParams params) throws IOException {
58 | List segments = this.index(url,params);
59 | return textService.generateVtt(segments);
60 | }
61 |
62 | public StringBuffer outputLrc(URL url,WhisperFullParams params) throws IOException {
63 | List segments = this.index(url,params);
64 | return textService.generateLrc(segments);
65 | }
66 |
67 | public Object index(byte[] data, String inputType, String outputType)
68 | throws IOException, UnsupportedAudioFileException {
69 | return index(data, inputType, outputType, null);
70 | }
71 |
72 | public Object index(byte[] data, String inputType, String outputType, WhisperFullParams params)
73 | throws IOException, UnsupportedAudioFileException {
74 | log.info("intputType:{},outputType:{}", inputType, outputType);
75 | AudioType audioType = AudioType.fromString(inputType);
76 | TextType textType = TextType.fromString(outputType);
77 | if (audioType == AudioType.MP3) {
78 | // 进行格式转换
79 | log.info("进行格式转换:{}", "mp3");
80 | data = Aop.get(Mp3Util.class).convertToWav(data, 16000, 1);
81 | }
82 | List segments = index(data, params);
83 | if (textType == TextType.SRT) {
84 | return textService.generateSrt(segments).toString();
85 | } else if (textType == TextType.VTT) {
86 | return textService.generateVtt(segments).toString();
87 | } else if (textType == TextType.LRC) {
88 | return textService.generateLrc(segments).toString();
89 | }
90 | return segments;
91 | }
92 | }
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/single/LocalBaseWhisper.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.single;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 | import java.nio.file.Paths;
6 | import java.util.List;
7 | import java.util.concurrent.Callable;
8 | import java.util.concurrent.ExecutionException;
9 | import java.util.concurrent.ExecutorService;
10 | import java.util.concurrent.Executors;
11 |
12 | import com.litongjava.ai.server.model.WhisperSegment;
13 | import com.litongjava.ai.server.service.WhisperCppJni;
14 |
15 | import io.github.givimad.whisperjni.WhisperFullParams;
16 | import io.github.givimad.whisperjni.WhisperJNI;
17 | import lombok.extern.slf4j.Slf4j;
18 |
19 | @Slf4j
20 | public enum LocalBaseWhisper {
21 | INSTANCE;
22 |
23 | private ExecutorService executorService;
24 | private ThreadLocal threadLocalWhisper;
25 | private WhisperFullParams defaultPararams = new WhisperFullParams();
26 |
27 | LocalBaseWhisper() {
28 | try {
29 | WhisperJNI.loadLibrary();
30 | } catch (IOException e1) {
31 | e1.printStackTrace();
32 | }
33 | // C:\Users\Administrator\.cache\whisper
34 | String userHome = System.getProperty("user.home");
35 | String modelName = "ggml-base.en.bin";
36 | Path path = Paths.get(userHome, ".cache", "whisper", modelName);
37 |
38 | this.executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1);
39 | threadLocalWhisper = ThreadLocal.withInitial(() -> {
40 | WhisperCppJni whisper = new WhisperCppJni();
41 | try {
42 | whisper.initContext(path);
43 | } catch (IOException e) {
44 | e.printStackTrace();
45 | }
46 | return whisper;
47 | });
48 | defaultPararams.printProgress = false;
49 | }
50 |
51 | public List fullTranscribeWithTime(float[] audioData, int numSamples, WhisperFullParams params) {
52 | Callable> task = () -> {
53 | WhisperCppJni whisper = null;
54 | whisper = threadLocalWhisper.get();
55 | if (params != null) {
56 | log.info("params:{}", params);
57 | return whisper.fullTranscribeWithTime(params, audioData, numSamples);
58 | } else {
59 | return whisper.fullTranscribeWithTime(defaultPararams, audioData, numSamples);
60 | }
61 |
62 | };
63 |
64 | try {
65 | return executorService.submit(task).get();
66 | } catch (InterruptedException | ExecutionException e) {
67 | e.printStackTrace();
68 | }
69 | return null;
70 | }
71 |
72 | public List fullTranscribeWithTime(float[] floats, WhisperFullParams params) {
73 | return fullTranscribeWithTime(floats, floats.length, params);
74 | }
75 | }
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/single/LocalLargeWhisper.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.single;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 | import java.nio.file.Paths;
6 | import java.util.List;
7 | import java.util.concurrent.Callable;
8 | import java.util.concurrent.ExecutionException;
9 | import java.util.concurrent.ExecutorService;
10 | import java.util.concurrent.Executors;
11 |
12 | import com.litongjava.ai.server.model.WhisperSegment;
13 | import com.litongjava.ai.server.service.WhisperCppJni;
14 |
15 | import io.github.givimad.whisperjni.WhisperFullParams;
16 | import io.github.givimad.whisperjni.WhisperJNI;
17 | import lombok.extern.slf4j.Slf4j;
18 |
19 | @Slf4j
20 | public enum LocalLargeWhisper {
21 | INSTANCE;
22 |
23 | private ExecutorService executorService;
24 | private ThreadLocal threadLocalWhisper;
25 | private WhisperFullParams defaultPararams = new WhisperFullParams();
26 |
27 | LocalLargeWhisper() {
28 | try {
29 | WhisperJNI.loadLibrary();
30 | } catch (IOException e1) {
31 | e1.printStackTrace();
32 | }
33 | // C:\Users\Administrator\.cache\whisper
34 | String userHome = System.getProperty("user.home");
35 | String modelName = "ggml-large.bin";
36 | Path path = Paths.get(userHome, ".cache", "whisper", modelName);
37 |
38 | this.executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1);
39 | threadLocalWhisper = ThreadLocal.withInitial(() -> {
40 | WhisperCppJni whisper = new WhisperCppJni();
41 | try {
42 | whisper.initContext(path);
43 | } catch (IOException e) {
44 | e.printStackTrace();
45 | }
46 | return whisper;
47 | });
48 | defaultPararams.printProgress = false;
49 | }
50 |
51 | public List fullTranscribeWithTime(float[] audioData, int numSamples, WhisperFullParams params) {
52 | Callable> task = () -> {
53 | WhisperCppJni whisper = null;
54 | whisper = threadLocalWhisper.get();
55 | if (params != null) {
56 | log.info("params:{}", params);
57 | return whisper.fullTranscribeWithTime(params, audioData, numSamples);
58 | } else {
59 | return whisper.fullTranscribeWithTime(defaultPararams, audioData, numSamples);
60 | }
61 |
62 | };
63 |
64 | try {
65 | return executorService.submit(task).get();
66 | } catch (InterruptedException | ExecutionException e) {
67 | e.printStackTrace();
68 | }
69 | return null;
70 | }
71 |
72 | public List fullTranscribeWithTime(float[] floats, WhisperFullParams params) {
73 | return fullTranscribeWithTime(floats, floats.length, params);
74 | }
75 | }
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/single/LocalWhisper.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.single;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 | import java.nio.file.Paths;
6 | import java.util.List;
7 | import java.util.concurrent.Callable;
8 | import java.util.concurrent.ExecutionException;
9 |
10 | import com.litongjava.ai.server.model.WhisperSegment;
11 | import com.litongjava.ai.server.property.WhiserAsrProperties;
12 | import com.litongjava.ai.server.service.WhisperCppJni;
13 | import com.litongjava.ai.server.utils.WhisperExecutorServiceUtils;
14 | import com.litongjava.jfinal.aop.Aop;
15 |
16 | import io.github.givimad.whisperjni.WhisperFullParams;
17 | import io.github.givimad.whisperjni.WhisperJNI;
18 | import lombok.extern.slf4j.Slf4j;
19 |
20 | @Slf4j
21 | public enum LocalWhisper {
22 | INSTANCE;
23 |
24 | private ThreadLocal threadLocalWhisper;
25 | private WhisperFullParams defaultPararams = new WhisperFullParams();
26 |
27 | LocalWhisper() {
28 | try {
29 | WhisperJNI.loadLibrary();
30 | } catch (IOException e1) {
31 | e1.printStackTrace();
32 | }
33 | // C:\Users\Administrator\.cache\whisper
34 | String userHome = System.getProperty("user.home");
35 | String modelName = Aop.get(WhiserAsrProperties.class).getModelName();
36 | Path path = Paths.get(userHome, ".cache", "whisper", modelName);
37 |
38 | threadLocalWhisper = ThreadLocal.withInitial(() -> {
39 | WhisperCppJni whisper = new WhisperCppJni();
40 | try {
41 | whisper.initContext(path);
42 | } catch (IOException e) {
43 | e.printStackTrace();
44 | }
45 | return whisper;
46 | });
47 | defaultPararams.printProgress = false;
48 | }
49 |
50 | public List fullTranscribeWithTime(float[] audioData, int numSamples, WhisperFullParams params) {
51 | Callable> task = () -> {
52 | WhisperCppJni whisper = null;
53 | whisper = threadLocalWhisper.get();
54 | if (params != null) {
55 | log.info("params:{}", params);
56 | return whisper.fullTranscribeWithTime(params, audioData, numSamples);
57 | } else {
58 | return whisper.fullTranscribeWithTime(defaultPararams, audioData, numSamples);
59 | }
60 |
61 | };
62 |
63 | try {
64 | return WhisperExecutorServiceUtils.submit(task).get();
65 | } catch (InterruptedException | ExecutionException e) {
66 | e.printStackTrace();
67 | }
68 | return null;
69 | }
70 |
71 | public List fullTranscribeWithTime(float[] floats, WhisperFullParams params) {
72 | return fullTranscribeWithTime(floats, floats.length, params);
73 | }
74 | }
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/utils/JFramUtils.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.utils;
2 |
3 | import java.awt.image.BufferedImage;
4 |
5 | import javax.swing.JFrame;
6 | import javax.swing.WindowConstants;
7 |
8 | public class JFramUtils {
9 |
10 | public static void showBufferedImage(String title, BufferedImage image) {
11 | MatPanel panel = new MatPanel();
12 | panel.setBufferedImage(image);
13 | // repaint自动调用paint
14 | panel.repaint();
15 |
16 | JFrame frame = new JFrame(title);
17 | frame.setSize(image.getWidth(), image.getHeight());
18 | frame.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE);
19 | frame.setContentPane(panel);
20 | frame.setVisible(true);
21 | }
22 |
23 | public static void showBufferedImage(String title, Object dst) {
24 | showBufferedImage(title, (BufferedImage) dst);
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/utils/MatPanel.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.utils;
2 |
3 | import java.awt.Graphics;
4 | import java.awt.image.BufferedImage;
5 |
6 | import javax.swing.JPanel;
7 |
8 | @SuppressWarnings("serial")
9 | public class MatPanel extends JPanel {
10 | private BufferedImage bufferImage;
11 |
12 | @Override
13 | public void paint(Graphics g) {
14 | if (bufferImage != null) {
15 | g.drawImage(bufferImage, 0, 0, bufferImage.getWidth(), bufferImage.getHeight(), this);
16 | }
17 | }
18 |
19 | public void setBufferedImage(BufferedImage src) {
20 | this.bufferImage = src;
21 | }
22 | }
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/utils/Mp3Util.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.utils;
2 |
3 | import java.io.ByteArrayInputStream;
4 | import java.io.ByteArrayOutputStream;
5 | import java.io.File;
6 | import java.io.IOException;
7 |
8 | import javax.sound.sampled.AudioFormat;
9 | import javax.sound.sampled.AudioInputStream;
10 | import javax.sound.sampled.AudioSystem;
11 | import javax.sound.sampled.UnsupportedAudioFileException;
12 |
13 | public class Mp3Util {
14 |
15 | public byte[] convertToWav(byte[] mp3Data, int ar, int ac) throws UnsupportedAudioFileException, IOException {
16 | // Convert byte array to AudioInputStream
17 | try (AudioInputStream ais = AudioSystem.getAudioInputStream(new ByteArrayInputStream(mp3Data))) {
18 | AudioFormat format = ais.getFormat();
19 |
20 | // Convert to PCM_SIGNED if not already
21 | if (format.getEncoding() != AudioFormat.Encoding.PCM_SIGNED) {
22 | format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED,
23 | //
24 | format.getSampleRate(),
25 | //
26 | 16,
27 | //
28 | format.getChannels(),
29 | //
30 | format.getChannels() * 2,
31 | //
32 | format.getSampleRate(),
33 | //
34 | false);
35 | AudioInputStream tempAis = AudioSystem.getAudioInputStream(format, ais);
36 | return convert(tempAis, format, ar, ac);
37 | }
38 | }
39 | return null;
40 | }
41 |
42 | public byte[] convertToWav(File mp3File, int ar, int ac) throws IOException, UnsupportedAudioFileException {
43 | // Read MP3 audio
44 | try (AudioInputStream ais = AudioSystem.getAudioInputStream(mp3File)) {
45 | AudioFormat format = ais.getFormat();
46 |
47 | // Convert to PCM_SIGNED if not already
48 | if (format.getEncoding() != AudioFormat.Encoding.PCM_SIGNED) {
49 | format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED,
50 | //
51 | format.getSampleRate(),
52 | //
53 | 16,
54 | //
55 | format.getChannels(),
56 | //
57 | format.getChannels() * 2,
58 | //
59 | format.getSampleRate(),
60 | //
61 | false);
62 | // convert
63 | AudioInputStream tempAis = AudioSystem.getAudioInputStream(format, ais);
64 |
65 | return convert(tempAis, format, ar, ac);
66 | }
67 | }
68 | return null;
69 | }
70 |
71 | public byte[] convert(AudioInputStream srcAis, AudioFormat srcFormat, int ar, int ac) throws IOException {
72 | // Convert to desired format
73 | AudioFormat desiredFormat = new AudioFormat(srcFormat.getEncoding(),
74 | //
75 | ar,
76 | //
77 | srcFormat.getSampleSizeInBits(),
78 | //
79 | ac,
80 | //
81 | ac * 2,
82 | //
83 | ar,
84 | //
85 | srcFormat.isBigEndian());
86 | // convert
87 | AudioInputStream desiredAis = AudioSystem.getAudioInputStream(desiredFormat, srcAis);
88 |
89 | // Read the entire AudioInputStream into a byte array
90 | ByteArrayOutputStream tmpBaos = new ByteArrayOutputStream();
91 | byte[] buffer = new byte[4096];
92 | int bytesRead;
93 | while ((bytesRead = desiredAis.read(buffer)) != -1) {
94 | tmpBaos.write(buffer, 0, bytesRead);
95 | }
96 | byte[] audioBytes = tmpBaos.toByteArray();
97 | return audioBytes;
98 | }
99 | }
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/utils/WhisperAudioUtils.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.utils;
2 |
3 | import java.io.IOException;
4 | import java.net.URL;
5 |
6 | import javax.sound.sampled.AudioInputStream;
7 | import javax.sound.sampled.AudioSystem;
8 | import javax.sound.sampled.UnsupportedAudioFileException;
9 |
10 | public class WhisperAudioUtils {
11 |
12 | public static float[] toAudioData(URL url) throws UnsupportedAudioFileException, IOException {
13 | AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(url);
14 | byte[] b = new byte[audioInputStream.available()];
15 | try {
16 | audioInputStream.read(b);
17 | return toFloat(b);
18 | } finally {
19 | audioInputStream.close();
20 | }
21 | }
22 |
23 | public static float[] toFloat(byte[] b) {
24 | float[] floats = new float[b.length / 2];
25 | for (int i = 0, j = 0; i < b.length; i += 2, j++) {
26 | int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
27 | floats[j] = intSample / 32767.0f;
28 | }
29 | return floats;
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/java/com/litongjava/ai/server/utils/WhisperExecutorServiceUtils.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.utils;
2 |
3 | import java.util.concurrent.Callable;
4 | import java.util.concurrent.ExecutorService;
5 | import java.util.concurrent.Executors;
6 | import java.util.concurrent.Future;
7 |
8 | /**
9 | * 线程池类
10 | * @author Tong Li
11 | *
12 | */
13 | public class WhisperExecutorServiceUtils {
14 | public static ExecutorService executorService = Executors
15 | .newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1);
16 |
17 | public static Future submit(Callable task) {
18 | return executorService.submit(task);
19 | }
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | ${CONSOLE_LOG_PATTERN}
12 |
13 |
14 |
15 |
16 |
17 |
18 | ${CONSOLE_LOG_PATTERN}
19 |
20 |
21 |
22 | ${LOG_HOME}/project-name-%d{yyyy-MM-dd}.log
23 |
24 | 180
25 |
26 |
27 |
28 | 10MB
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/test/java/com/litongjava/ai/server/service/WhisperCppServiceMultiThreadTest.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.service;
2 |
3 | import java.io.File;
4 | import java.net.MalformedURLException;
5 | import java.net.URL;
6 |
7 | import lombok.extern.slf4j.Slf4j;
8 |
9 | @Slf4j
10 | public class WhisperCppServiceMultiThreadTest {
11 |
12 | public static void main(String[] args) throws MalformedURLException {
13 | WhisperCppService whisperCppService = new WhisperCppService();
14 | File file = new File("E:\\code\\cpp\\project-ping\\whisper.cpp\\samples\\jfk.wav");
15 | URL url = file.toURI().toURL();
16 | int availableProcessors = Runtime.getRuntime().availableProcessors();
17 | log.info("availableProcessors:{}", availableProcessors);
18 | for (int i = 0; i < availableProcessors * 2; i++) {
19 | whisperCppService.index(url, null);
20 | }
21 |
22 | }
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/test/java/com/litongjava/ai/server/single/LocalLargeWhisperTest.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.single;
2 |
3 | import java.nio.file.Files;
4 | import java.nio.file.Path;
5 | import java.nio.file.Paths;
6 |
7 | import org.junit.Test;
8 |
9 | public class LocalLargeWhisperTest {
10 |
11 | @Test
12 | public void test() {
13 | String userHome = System.getProperty("user.home");
14 | Path path = Paths.get(userHome,".cache","whisper","ggml-base.en.bin");
15 | boolean exists = Files.exists(path);
16 | System.out.println(exists);
17 | }
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/whisper-asr/whisper-asr-service/src/test/java/com/litongjava/ai/server/utils/Mp3UtilTest.java:
--------------------------------------------------------------------------------
1 | package com.litongjava.ai.server.utils;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.net.URISyntaxException;
6 | import java.net.URL;
7 |
8 | import javax.sound.sampled.LineUnavailableException;
9 | import javax.sound.sampled.UnsupportedAudioFileException;
10 |
11 | import org.junit.Test;
12 |
13 | import com.litongjava.ai.server.service.WhisperCppService;
14 |
15 | import cn.hutool.core.io.FileUtil;
16 | import cn.hutool.core.util.ClassUtil;
17 | import javazoom.jl.decoder.JavaLayerException;
18 |
19 | public class Mp3UtilTest {
20 |
21 | @Test
22 | public void test() throws URISyntaxException, IOException, UnsupportedAudioFileException, JavaLayerException,
23 | LineUnavailableException {
24 | URL resource = ClassUtil.getClassLoader().getResource("audios/test.mp3");
25 | if (resource == null) {
26 | return;
27 | }
28 | File file = new File(resource.toURI());
29 | byte[] mp3Data = FileUtil.readBytes(file);
30 | // Save or use wavData as needed
31 | Mp3Util mp3Util = new Mp3Util();
32 | byte[] wavData = mp3Util.convertToWav(mp3Data, 16000, 1);
33 | WhisperCppService whisperCppService = new WhisperCppService();
34 | Object index = whisperCppService.index(wavData, null);
35 | System.out.println(index);
36 |
37 | }
38 |
39 | @Test
40 | public void test2() throws URISyntaxException, IOException, UnsupportedAudioFileException {
41 | URL resource = ClassUtil.getClassLoader().getResource("audios/test.mp3");
42 | if (resource == null) {
43 | return;
44 | }
45 | File file = new File(resource.toURI());
46 | // byte[] convertToWav = Aop.get(Mp3Util.class).convertToWav(file, 16000, 1);
47 |
48 | }
49 | }
50 |
--------------------------------------------------------------------------------