├── .gitattributes ├── .gitignore ├── README.md ├── build.bat ├── build.gradle ├── clean.bat ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── img └── ocr.png ├── refresh.bat ├── settings.gradle └── src ├── main ├── java │ └── com │ │ └── bestvike │ │ └── ocr │ │ ├── OcrApplication.java │ │ ├── aliyun │ │ ├── AliyunApi.java │ │ ├── AliyunConfiguration.java │ │ ├── AliyunUtils.java │ │ └── entity │ │ │ ├── AliyunOcrPoint.java │ │ │ ├── AliyunOcrRequest.java │ │ │ ├── AliyunOcrResponse.java │ │ │ ├── AliyunOcrWordInfo.java │ │ │ └── AliyunResponse.java │ │ ├── collection │ │ ├── CharSequenceIterable.java │ │ └── CharSequenceIterator.java │ │ ├── controller │ │ └── OcrController.java │ │ ├── reflect │ │ ├── GenericType.java │ │ └── GenericTypeReference.java │ │ └── util │ │ ├── Convert.java │ │ ├── GridImage.java │ │ ├── HtmlUtils.java │ │ └── RestTemplateUtils.java └── resources │ ├── application.properties │ └── static │ ├── img │ ├── demo.jpg │ ├── demo_table.jpg │ └── demo_table2.jpg │ └── index.htm └── test └── java └── com └── bestvike └── ocr └── OcrApplicationTests.java /.gitattributes: -------------------------------------------------------------------------------- 1 | # Handle line endings automatically for files detected as text 2 | # and leave all files detected as binary untouched. 3 | * text=auto 4 | 5 | # 6 | # The above will handle all files NOT found below 7 | # 8 | # These files are text and should be normalized (Convert crlf => lf) 9 | *.css text 10 | *.df text 11 | *.htm text 12 | *.html text 13 | *.java text 14 | *.js text 15 | *.json text 16 | *.jsp text 17 | *.jspf text 18 | *.properties text 19 | *.sh text 20 | *.tld text 21 | *.txt text 22 | *.xml text 23 | 24 | # These files are binary and should be left untouched 25 | # (binary is a macro for -text -diff) 26 | *.class binary 27 | *.dll binary 28 | *.ear binary 29 | *.gif binary 30 | *.ico binary 31 | *.jar binary 32 | *.jpg binary 33 | *.jpeg binary 34 | *.png binary 35 | *.so binary 36 | *.war binary 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 2 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 3 | 4 | # IntelliJ 5 | *.iml 6 | .idea 7 | .gradle 8 | CI_* 9 | out/ 10 | build/ 11 | classes/ 12 | target/ 13 | log/ 14 | lib/ 15 | 16 | # mpeltonen/sbt-idea plugin 17 | .idea_modules/ 18 | 19 | # JIRA plugin 20 | atlassian-ide-plugin.xml 21 | 22 | # Crashlytics plugin (for Android Studio and IntelliJ) 23 | com_crashlytics_export_strings.xml 24 | crashlytics.properties 25 | crashlytics-build.properties 26 | fabric.properties 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # 表格 OCR 3 | 识别印刷表格内容 4 | 5 | ## 编译 6 | - 集成开发环境为 `IDEA`, 建议升级为最新版本, 低版本可能无法编译启动 7 | - 如果 `IDEA` 没有自动设置 `jdk`, 需要自己手动设置 `File/Project Structure/Project/Project SDK` 选择 `1.8` 8 | - 在 `IDEA` 中安装 `lombok` 插件, `File/Settings/Plugins` 9 | - 启用 `lombok` 插件, `File/Settings/Build, Execution, Deployment/Compiler/Annotation Processors` 勾选 `Enable annotation processing` 10 | - 设置 `gradle`, `File/Settings/Build, Execution, Deployment/Build Tools/Gradle` 选中 `Use default gradle wrapper (recommended)` 11 | 12 | ## 辅助线要求 13 | - 颜色: `RGB(255,0,0)`(红色) 14 | - 宽度: `[1px-3px]` 15 | - PhotoShop: 不能开启抗锯齿(打开抗锯齿无法识别) 16 | - 辅助线可以不横平竖直,但是必须画到边 17 | 18 | ## 代码运行 19 | - 运行程序 20 | - 访问 [传送门](http://localhost:8080/) 上传图片识别 21 | - 访问 [传送门](http://localhost:8080/demo) 查看内置示例 22 | 23 | ## 前置条件 24 | - 原图识别会出现部分列连在一起无法分割的情况 25 | - 拍摄内容非横平竖直不易区分行列 26 | - 字过大识别率较低,可能与阿里识别服务未使用较大字体训练有关 27 | 28 | ## 实现思路 29 | - 调整图片大小保证字体大小适中 30 | - 画辅助线,根据辅助线将图片分成大小不等的矩形 31 | - 调用阿里云文本识别服务,返回文字和所在矩形(**阿里云账号信息为个人账号,请勿用于生产**) 32 | - 将识别结果派分到划分的矩形中 33 | - 由于英文逗号会被识别为中文逗号,分派时将中文逗号替换为英文逗号 34 | - 为了剔除干扰识别结果,分派数据时只分派了面积最大的文本矩形,会导致一个单元格多行的只能识别最长的行 35 | 36 | ## 阿里云文本识别服务购买地址 37 | 38 | 39 | ## 示例图片识别结果 40 | ![ocr](img/ocr.png "识别结果") 41 | -------------------------------------------------------------------------------- /build.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | gradlew clean build --refresh-dependencies %* 3 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | ext { 3 | springBootVersion = '2.0.6.RELEASE' 4 | } 5 | repositories { 6 | mavenCentral() 7 | } 8 | dependencies { 9 | classpath("org.springframework.boot:spring-boot-gradle-plugin:${springBootVersion}") 10 | } 11 | } 12 | 13 | apply plugin: 'java' 14 | apply plugin: 'org.springframework.boot' 15 | apply plugin: 'io.spring.dependency-management' 16 | 17 | group = 'com.bestvike' 18 | version = '0.0.1-SNAPSHOT' 19 | sourceCompatibility = 1.8 20 | 21 | repositories { 22 | mavenCentral() 23 | } 24 | 25 | dependencies { 26 | //依赖 27 | compile 'org.springframework.boot:spring-boot-starter-web' 28 | compile 'org.apache.commons:commons-lang3:3.7' 29 | compile 'commons-collections:commons-collections:3.2.2' 30 | compile 'commons-io:commons-io:2.4' 31 | compile 'org.apache.poi:poi:4.0.0' 32 | compile 'com.bestvike:linq:2.0.0' 33 | //编译时 34 | compileOnly 'org.projectlombok:lombok' 35 | //运行时 36 | runtimeOnly 'org.springframework.boot:spring-boot-devtools' 37 | //测试 38 | testImplementation 'org.springframework.boot:spring-boot-starter-test' 39 | } 40 | -------------------------------------------------------------------------------- /clean.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | FOR /D /R %%i IN (out build log) DO ( 3 | IF EXIST %%i ( 4 | RD /S /Q %%i 5 | @ECHO deleted %%i 6 | ) 7 | ) 8 | @echo finished 9 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timandy/table_ocr/989a1c300a50744d52d48f1e971e813d461d44e8/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-4.10-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /img/ocr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timandy/table_ocr/989a1c300a50744d52d48f1e971e813d461d44e8/img/ocr.png -------------------------------------------------------------------------------- /refresh.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | gradlew --refresh-dependencies %* 3 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'ocr' 2 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/OcrApplication.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class OcrApplication { 8 | public static void main(String[] args) { 9 | SpringApplication.run(OcrApplication.class, args); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/aliyun/AliyunApi.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.aliyun; 2 | 3 | import com.bestvike.ocr.aliyun.entity.AliyunOcrRequest; 4 | import com.bestvike.ocr.aliyun.entity.AliyunOcrResponse; 5 | import com.bestvike.ocr.util.RestTemplateUtils; 6 | import org.apache.commons.codec.binary.Base64; 7 | import org.springframework.beans.factory.annotation.Autowired; 8 | import org.springframework.http.HttpHeaders; 9 | import org.springframework.http.MediaType; 10 | import org.springframework.stereotype.Component; 11 | 12 | /** 13 | * Created by 许崇雷 on 2018-10-20. 14 | */ 15 | @Component 16 | public final class AliyunApi { 17 | //服务购买 https://market.aliyun.com/products/57124001/cmapi028554.html?spm=5176.182739.954606.2.69111d8a49wk2V#sku=yuncode2255400000 18 | private static final String DOMAIN = "https://ocrapi-advanced.taobao.com"; 19 | @Autowired 20 | private AliyunConfiguration aliyunConfiguration; 21 | 22 | //阿里云 ocr 23 | public AliyunOcrResponse ocrAdvanced(byte[] imgData) { 24 | String img = Base64.encodeBase64String(imgData); 25 | AliyunOcrRequest request = new AliyunOcrRequest(); 26 | request.setImg(img); 27 | HttpHeaders headers = new HttpHeaders(); 28 | headers.set("Authorization", "APPCODE " + this.aliyunConfiguration.getAppCode()); 29 | headers.setContentType(MediaType.APPLICATION_JSON_UTF8); 30 | AliyunOcrResponse ocrResponse = RestTemplateUtils.post(DOMAIN + "/ocrservice/advanced", request, headers, AliyunOcrResponse.class); 31 | if (!ocrResponse.isSuccess()) 32 | throw new RuntimeException("调用阿里云失败:" + ocrResponse.getError_msg()); 33 | return ocrResponse; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/aliyun/AliyunConfiguration.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.aliyun; 2 | 3 | import lombok.Data; 4 | import org.springframework.boot.context.properties.ConfigurationProperties; 5 | import org.springframework.context.annotation.Configuration; 6 | 7 | /** 8 | * Created by 许崇雷 on 2018-10-21. 9 | */ 10 | @Data 11 | @Configuration 12 | @ConfigurationProperties(prefix = "app.aliyun") 13 | public class AliyunConfiguration { 14 | private String appCode; 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/aliyun/AliyunUtils.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.aliyun; 2 | 3 | import com.bestvike.ocr.aliyun.entity.AliyunOcrResponse; 4 | import org.springframework.stereotype.Component; 5 | 6 | /** 7 | * Created by 许崇雷 on 2018-10-21. 8 | */ 9 | @Component 10 | public final class AliyunUtils { 11 | private static AliyunApi aliyunApi; 12 | 13 | public AliyunUtils(AliyunApi aliyunApiInstance) { 14 | aliyunApi = aliyunApiInstance; 15 | } 16 | 17 | //阿里云 ocr 18 | public static AliyunOcrResponse ocrAdvanced(byte[] imgData) { 19 | return aliyunApi.ocrAdvanced(imgData); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/aliyun/entity/AliyunOcrPoint.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.aliyun.entity; 2 | 3 | import lombok.Data; 4 | 5 | /** 6 | * Created by 许崇雷 on 2018-10-20. 7 | */ 8 | @Data 9 | public class AliyunOcrPoint { 10 | private int x; 11 | private int y; 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/aliyun/entity/AliyunOcrRequest.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.aliyun.entity; 2 | 3 | import lombok.Data; 4 | 5 | /** 6 | * Created by 许崇雷 on 2018-10-20. 7 | */ 8 | @Data 9 | public class AliyunOcrRequest { 10 | private String img; 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/aliyun/entity/AliyunOcrResponse.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.aliyun.entity; 2 | 3 | import lombok.Data; 4 | import lombok.EqualsAndHashCode; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * Created by 许崇雷 on 2018-10-20. 10 | */ 11 | @EqualsAndHashCode(callSuper = true) 12 | @Data 13 | public class AliyunOcrResponse extends AliyunResponse { 14 | private String sid; 15 | private String prism_version; 16 | private int prism_wnum; 17 | private List prism_wordsInfo; 18 | 19 | @Override 20 | public boolean isSuccess() { 21 | return super.isSuccess() && this.prism_wordsInfo != null; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/aliyun/entity/AliyunOcrWordInfo.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.aliyun.entity; 2 | 3 | import lombok.Data; 4 | 5 | import java.awt.*; 6 | import java.util.List; 7 | 8 | /** 9 | * Created by 许崇雷 on 2018-10-20. 10 | */ 11 | @Data 12 | public class AliyunOcrWordInfo { 13 | private List pos; 14 | private String word; 15 | private Rectangle rectangle; 16 | 17 | public Rectangle getRectangle() { 18 | return this.rectangle == null 19 | ? (this.rectangle = new Rectangle(this.pos.get(0).getX(), this.pos.get(0).getY(), this.pos.get(1).getX() - this.pos.get(0).getX(), this.pos.get(2).getY() - this.pos.get(0).getY())) 20 | : this.rectangle; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/aliyun/entity/AliyunResponse.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.aliyun.entity; 2 | 3 | import lombok.Data; 4 | 5 | /** 6 | * Created by 许崇雷 on 2018-10-20. 7 | */ 8 | @Data 9 | public class AliyunResponse { 10 | public int error_code; 11 | public String error_msg; 12 | 13 | public boolean isSuccess() { 14 | return this.error_code == 0; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/collection/CharSequenceIterable.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.collection; 2 | 3 | import java.util.Iterator; 4 | 5 | /** 6 | * Created by 许崇雷 on 2016/8/2. 7 | */ 8 | public final class CharSequenceIterable implements Iterable { 9 | /** 10 | * The charSequence to iterate over 11 | */ 12 | private final CharSequence charSequence; 13 | /** 14 | * The start index to loop from 15 | */ 16 | private final int startIndex; 17 | /** 18 | * The end index to loop to 19 | */ 20 | private final int endIndex; 21 | 22 | /** 23 | * Constructs an CharSequenceIterable that will iterate over the values in the 24 | * specified charSequence. 25 | * 26 | * @param charSequence the charSequence to iterate over. 27 | */ 28 | public CharSequenceIterable(final CharSequence charSequence) { 29 | this(charSequence, 0); 30 | } 31 | 32 | /** 33 | * Constructs an CharSequenceIterable that will iterate over the values in the 34 | * specified charSequence from a specific start index. 35 | * 36 | * @param charSequence the charSequence to iterate over. 37 | * @param startIndex the index to start iterating at. 38 | */ 39 | public CharSequenceIterable(final CharSequence charSequence, final int startIndex) { 40 | this(charSequence, startIndex, charSequence.length()); 41 | } 42 | 43 | /** 44 | * Construct an CharSequenceIterable that will iterate over a range of values 45 | * in the specified charSequence. 46 | * 47 | * @param charSequence the charSequence to iterate over. 48 | * @param startIndex the index to start iterating at. 49 | * @param endIndex the index to finish iterating at. 50 | */ 51 | public CharSequenceIterable(final CharSequence charSequence, final int startIndex, final int endIndex) { 52 | super(); 53 | 54 | this.charSequence = charSequence; 55 | this.startIndex = startIndex; 56 | this.endIndex = endIndex; 57 | } 58 | 59 | /** 60 | * return an Iterator that will iterate over the values in the specified charSequence. 61 | * 62 | * @return the Iterator. 63 | * @throws NullPointerException if charSequence is null 64 | * @throws IndexOutOfBoundsException if either index is invalid 65 | */ 66 | @Override 67 | public Iterator iterator() { 68 | return new CharSequenceIterator(this.charSequence, this.startIndex, this.endIndex); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/collection/CharSequenceIterator.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.collection; 2 | 3 | import java.lang.reflect.Array; 4 | import java.util.Iterator; 5 | 6 | /** 7 | * Created by 许崇雷 on 2016/8/2. 8 | */ 9 | public final class CharSequenceIterator implements Iterator { 10 | /** 11 | * The charSequence to iterate over 12 | */ 13 | private final CharSequence charSequence; 14 | /** 15 | * The start index to loop from 16 | */ 17 | private final int startIndex; 18 | /** 19 | * The end index to loop to 20 | */ 21 | private final int endIndex; 22 | /** 23 | * The current iterator index 24 | */ 25 | private int index = 0; 26 | 27 | 28 | /** 29 | * Constructs an CharSequenceIterator that will iterate over the values in the 30 | * specified charSequence. 31 | * 32 | * @param charSequence the charSequence to iterate over. 33 | * @throws NullPointerException if charSequence is null 34 | */ 35 | public CharSequenceIterator(final CharSequence charSequence) { 36 | this(charSequence, 0); 37 | } 38 | 39 | /** 40 | * Constructs an CharSequenceIterator that will iterate over the values in the 41 | * specified charSequence from a specific start index. 42 | * 43 | * @param charSequence the charSequence to iterate over. 44 | * @param startIndex the index to start iterating at. 45 | * @throws NullPointerException if charSequence is null 46 | * @throws IndexOutOfBoundsException if the index is invalid 47 | */ 48 | public CharSequenceIterator(final CharSequence charSequence, final int startIndex) { 49 | this(charSequence, startIndex, Array.getLength(charSequence)); 50 | } 51 | 52 | /** 53 | * Construct an CharSequenceIterator that will iterate over a range of values 54 | * in the specified charSequence. 55 | * 56 | * @param charSequence the charSequence to iterate over. 57 | * @param startIndex the index to start iterating at. 58 | * @param endIndex the index to finish iterating at. 59 | * @throws NullPointerException if charSequence is null 60 | * @throws IndexOutOfBoundsException if either index is invalid 61 | */ 62 | public CharSequenceIterator(final CharSequence charSequence, final int startIndex, final int endIndex) { 63 | super(); 64 | 65 | this.charSequence = charSequence; 66 | this.startIndex = startIndex; 67 | this.endIndex = endIndex; 68 | this.index = startIndex; 69 | 70 | final int len = charSequence.length(); 71 | this.checkBound(startIndex, len, "start"); 72 | this.checkBound(endIndex, len, "end"); 73 | if (endIndex < startIndex) 74 | throw new IllegalArgumentException("End index must not be less than start index."); 75 | } 76 | 77 | /** 78 | * Gets the charSequence that this iterator is iterating over. 79 | * 80 | * @return the charSequence this iterator iterates over. 81 | */ 82 | public CharSequence getCharSequence() { 83 | return this.charSequence; 84 | } 85 | 86 | /** 87 | * Gets the start index to loop from. 88 | * 89 | * @return the start index 90 | * @since 4.0 91 | */ 92 | public int getStartIndex() { 93 | return this.startIndex; 94 | } 95 | 96 | /** 97 | * Gets the end index to loop to. 98 | * 99 | * @return the end index 100 | * @since 4.0 101 | */ 102 | public int getEndIndex() { 103 | return this.endIndex; 104 | } 105 | 106 | /** 107 | * Checks whether the index is valid or not. 108 | * 109 | * @param bound the index to check 110 | * @param len the length of the charSequence 111 | * @param type the index type (for error messages) 112 | * @throws IndexOutOfBoundsException if the index is invalid 113 | */ 114 | protected void checkBound(final int bound, final int len, final String type) { 115 | if (bound > len) 116 | throw new ArrayIndexOutOfBoundsException("Attempt to make an ArrayIterator that " + type + "s beyond the end of the charSequence. "); 117 | if (bound < 0) 118 | throw new ArrayIndexOutOfBoundsException("Attempt to make an ArrayIterator that " + type + "s before the start of the charSequence. "); 119 | } 120 | 121 | /** 122 | * Returns true if there are more elements to return from the charSequence. 123 | * 124 | * @return true if there is a next element to return 125 | */ 126 | @Override 127 | public boolean hasNext() { 128 | return this.index < this.endIndex; 129 | } 130 | 131 | /** 132 | * Returns the next element in the charSequence. 133 | * 134 | * @return the next element in the charSequence 135 | * @throws IndexOutOfBoundsException if all the elements in the charSequence 136 | * have already been returned 137 | */ 138 | @Override 139 | public Character next() { 140 | return this.charSequence.charAt(this.index++); 141 | } 142 | 143 | /** 144 | * Throws {@link UnsupportedOperationException}. 145 | * 146 | * @throws UnsupportedOperationException always 147 | */ 148 | @Override 149 | public void remove() { 150 | throw new UnsupportedOperationException("remove() method is not supported"); 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/controller/OcrController.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.controller; 2 | 3 | import com.bestvike.ocr.util.GridImage; 4 | import org.apache.commons.io.IOUtils; 5 | import org.springframework.core.io.ClassPathResource; 6 | import org.springframework.stereotype.Controller; 7 | import org.springframework.web.bind.annotation.GetMapping; 8 | import org.springframework.web.bind.annotation.PostMapping; 9 | import org.springframework.web.bind.annotation.RequestParam; 10 | import org.springframework.web.multipart.MultipartFile; 11 | 12 | import javax.servlet.ServletOutputStream; 13 | import javax.servlet.http.HttpServletResponse; 14 | import java.io.IOException; 15 | import java.io.InputStream; 16 | 17 | /** 18 | * Created by 许崇雷 on 2018-10-20. 19 | */ 20 | @Controller 21 | public class OcrController { 22 | private byte[] getBytes(MultipartFile file) throws IOException { 23 | if (file == null) 24 | throw new RuntimeException("图片不能为空"); 25 | byte[] bytes = file.getBytes(); 26 | if (bytes.length == 0) 27 | throw new RuntimeException("图片长度不能为 0"); 28 | return bytes; 29 | } 30 | 31 | @GetMapping("/") 32 | public String index() { 33 | return "index.htm"; 34 | } 35 | 36 | @PostMapping("/preview") 37 | public void ocr(@RequestParam("file") MultipartFile file, HttpServletResponse response) throws IOException { 38 | byte[] bytes = this.getBytes(file); 39 | this.preview(bytes, response); 40 | } 41 | 42 | @PostMapping("/excel") 43 | public void excel(@RequestParam MultipartFile file, HttpServletResponse response) throws IOException { 44 | byte[] bytes = this.getBytes(file); 45 | GridImage image = new GridImage(bytes); 46 | response.setHeader("Content-Disposition", "attachment;filename=demo.xls"); 47 | response.setContentType("application/force-download");//应用程序强制下载 48 | try (ServletOutputStream outputStream = response.getOutputStream()) { 49 | image.saveAsExcel(outputStream, "demo"); 50 | } 51 | } 52 | 53 | @GetMapping("/demo") 54 | public void ocrDemo(HttpServletResponse response) throws IOException { 55 | try (InputStream demoStream = new ClassPathResource("static/img/demo_table.jpg").getInputStream()) { 56 | this.preview(IOUtils.toByteArray(demoStream), response); 57 | } 58 | } 59 | 60 | private void preview(byte[] bytes, HttpServletResponse response) throws IOException { 61 | GridImage image = new GridImage(bytes); 62 | String html = image.preview(); 63 | try (ServletOutputStream outputStream = response.getOutputStream()) { 64 | IOUtils.write(html, outputStream, "utf-8"); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/reflect/GenericType.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.reflect; 2 | 3 | import com.fasterxml.jackson.databind.util.ArrayIterator; 4 | import sun.reflect.generics.reflectiveObjects.ParameterizedTypeImpl; 5 | 6 | import java.lang.reflect.ParameterizedType; 7 | import java.lang.reflect.Type; 8 | import java.util.Arrays; 9 | import java.util.Iterator; 10 | import java.util.Objects; 11 | 12 | /** 13 | * 泛型类型 14 | * Created by 许崇雷 on 2017-10-18. 15 | */ 16 | @SuppressWarnings("unused") 17 | public abstract class GenericType implements ParameterizedType { 18 | private static final Type[] EMPTY_TYPE_ARRAY = new Type[0]; 19 | private final Type[] actualTypeArguments; 20 | private final Class rawType; 21 | private final Type ownerType; 22 | 23 | public GenericType() { 24 | ParameterizedType superClass = (ParameterizedType) this.getClass().getGenericSuperclass(); 25 | Type type = superClass.getActualTypeArguments()[0]; 26 | if (type instanceof Class) { 27 | this.actualTypeArguments = EMPTY_TYPE_ARRAY; 28 | this.rawType = (Class) type; 29 | this.ownerType = null; 30 | } else if (type instanceof ParameterizedTypeImpl) { 31 | ParameterizedTypeImpl parameterizedType = (ParameterizedTypeImpl) type; 32 | this.actualTypeArguments = parameterizedType.getActualTypeArguments(); 33 | this.rawType = parameterizedType.getRawType(); 34 | this.ownerType = parameterizedType.getOwnerType(); 35 | } else { 36 | throw new RuntimeException("type must be instance of Class or ParameterizedTypeImpl."); 37 | } 38 | } 39 | 40 | @Override 41 | public Type[] getActualTypeArguments() { 42 | return this.actualTypeArguments; 43 | } 44 | 45 | @Override 46 | public Class getRawType() { 47 | return this.rawType; 48 | } 49 | 50 | @Override 51 | public Type getOwnerType() { 52 | return this.ownerType; 53 | } 54 | 55 | @Override 56 | public boolean equals(Object obj) { 57 | if (this == obj) 58 | return true; 59 | if (!(obj instanceof GenericType)) 60 | return false; 61 | GenericType that = (GenericType) obj; 62 | return Arrays.equals(this.actualTypeArguments, that.actualTypeArguments) 63 | && Objects.equals(this.rawType, that.rawType) 64 | && Objects.equals(this.ownerType, that.ownerType); 65 | } 66 | 67 | @Override 68 | public int hashCode() { 69 | final int prime = 31; 70 | int result = 1; 71 | result = prime * result + (this.actualTypeArguments == null ? 0 : Arrays.hashCode(this.actualTypeArguments)); 72 | result = prime * result + (this.rawType == null ? 0 : this.rawType.hashCode()); 73 | result = prime * result + (this.ownerType == null ? 0 : this.ownerType.hashCode()); 74 | return result; 75 | } 76 | 77 | @Override 78 | public String toString() { 79 | StringBuilder builder = new StringBuilder(); 80 | if (this.ownerType != null) { 81 | if (this.ownerType instanceof Class) 82 | builder.append(((Class) this.ownerType).getName()); 83 | else 84 | builder.append(this.ownerType.toString()); 85 | builder.append("."); 86 | if (this.ownerType instanceof ParameterizedTypeImpl) 87 | builder.append(this.rawType.getName().replace(((ParameterizedTypeImpl) this.ownerType).getRawType().getName() + "$", "")); 88 | else 89 | builder.append(this.rawType.getName()); 90 | } else { 91 | builder.append(this.rawType.getName()); 92 | } 93 | 94 | if (this.actualTypeArguments != null && this.actualTypeArguments.length > 0) { 95 | builder.append("<"); 96 | Iterator iterator = new ArrayIterator<>(this.actualTypeArguments); 97 | if (iterator.hasNext()) { 98 | builder.append(iterator.next().getTypeName()); 99 | while (iterator.hasNext()) { 100 | builder.append(", "); 101 | builder.append(iterator.next().getTypeName()); 102 | } 103 | } 104 | builder.append(">"); 105 | } 106 | 107 | return builder.toString(); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/reflect/GenericTypeReference.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.reflect; 2 | 3 | import org.springframework.core.ParameterizedTypeReference; 4 | import org.springframework.util.Assert; 5 | 6 | import java.lang.reflect.Type; 7 | 8 | /** 9 | * Created by 许崇雷 on 2017-10-10. 10 | */ 11 | public final class GenericTypeReference extends ParameterizedTypeReference { 12 | private final Type type; 13 | 14 | public GenericTypeReference(Type type) { 15 | super(); 16 | Assert.notNull(type, "type can not be null"); 17 | this.type = type; 18 | } 19 | 20 | @Override 21 | public Type getType() { 22 | return this.type; 23 | } 24 | 25 | @Override 26 | public boolean equals(Object obj) { 27 | return this == obj || obj instanceof org.springframework.core.ParameterizedTypeReference && this.type.equals(((GenericTypeReference) obj).type); 28 | } 29 | 30 | @Override 31 | public int hashCode() { 32 | return this.type.hashCode(); 33 | } 34 | 35 | @Override 36 | public String toString() { 37 | return "GenericTypeReference<" + this.type + ">"; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/util/Convert.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.util; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | 5 | import java.math.BigDecimal; 6 | import java.sql.Timestamp; 7 | import java.text.SimpleDateFormat; 8 | import java.util.Date; 9 | 10 | /** 11 | * Created by 许崇雷 on 2016/6/17. 12 | * 类型转换 13 | */ 14 | @SuppressWarnings({"unused", "WeakerAccess"}) 15 | public final class Convert { 16 | private static final boolean BOOLEAN_DEFAULT = false; 17 | private static final byte BYTE_DEFAULT = 0; 18 | private static final short SHORT_DEFAULT = 0; 19 | private static final int INT_DEFAULT = 0; 20 | private static final long LONG_DEFAULT = 0L; 21 | private static final float FLOAT_DEFAULT = 0f; 22 | private static final double DOUBLE_DEFAULT = 0d; 23 | private static final BigDecimal DECIMAL_DEFAULT = BigDecimal.ZERO; 24 | private static final char CHAR_DEFAULT = 0; 25 | private static final String STRING_DEFAULT = StringUtils.EMPTY; 26 | 27 | private static final ThreadLocal FORMAT_DATE = ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd")); 28 | private static final ThreadLocal FORMAT_DATE_TIME = ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")); 29 | private static final ThreadLocal FORMAT_DATE_TIME_MS = ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")); 30 | 31 | 32 | //region as 拆箱,失败返回 null 33 | 34 | public static Boolean asBoolean(Object value) { 35 | try { 36 | return (Boolean) value; 37 | } catch (Exception e) { 38 | return null; 39 | } 40 | } 41 | 42 | public static Byte asByte(Object value) { 43 | try { 44 | return (Byte) value; 45 | } catch (Exception e) { 46 | return null; 47 | } 48 | } 49 | 50 | public static Short asShort(Object value) { 51 | try { 52 | return (Short) value; 53 | } catch (Exception e) { 54 | return null; 55 | } 56 | } 57 | 58 | public static Integer asInteger(Object value) { 59 | try { 60 | return (Integer) value; 61 | } catch (Exception e) { 62 | return null; 63 | } 64 | } 65 | 66 | public static Long asLong(Object value) { 67 | try { 68 | return (Long) value; 69 | } catch (Exception e) { 70 | return null; 71 | } 72 | } 73 | 74 | public static Float asFloat(Object value) { 75 | try { 76 | return (Float) value; 77 | } catch (Exception e) { 78 | return null; 79 | } 80 | } 81 | 82 | public static Double asDouble(Object value) { 83 | try { 84 | return (Double) value; 85 | } catch (Exception e) { 86 | return null; 87 | } 88 | } 89 | 90 | public static BigDecimal asDecimal(Object value) { 91 | try { 92 | return (BigDecimal) value; 93 | } catch (Exception e) { 94 | return null; 95 | } 96 | } 97 | 98 | public static Character asCharacter(Object value) { 99 | try { 100 | return (Character) value; 101 | } catch (Exception e) { 102 | return null; 103 | } 104 | } 105 | 106 | public static String asString(Object value) { 107 | try { 108 | return (String) value; 109 | } catch (Exception e) { 110 | return null; 111 | } 112 | } 113 | 114 | public static Date asDate(Object value) { 115 | try { 116 | return (Date) value; 117 | } catch (Exception e) { 118 | return null; 119 | } 120 | } 121 | 122 | public static Timestamp asTimestamp(Object value) { 123 | try { 124 | return (Timestamp) value; 125 | } catch (Exception e) { 126 | return null; 127 | } 128 | } 129 | 130 | public static T asType(Object value, Class clazz) { 131 | if (clazz == null) 132 | throw new NullPointerException("clazz can not be null"); 133 | 134 | //noinspection unchecked 135 | return clazz.isInstance(value) ? (T) value : null; 136 | } 137 | 138 | //endregion 139 | 140 | 141 | //region to 转换,失败抛出异常 142 | 143 | public static boolean toBoolean(Object value) { 144 | if (value instanceof Boolean) 145 | return (Boolean) value; 146 | String str = toString(value); 147 | switch (str.length()) { 148 | case 1: { 149 | char ch0 = str.charAt(0); 150 | if (ch0 == 'y' || ch0 == 'Y' || ch0 == 't' || ch0 == 'T') 151 | return true; 152 | if (ch0 == 'n' || ch0 == 'N' || ch0 == 'f' || ch0 == 'F') 153 | return false; 154 | break; 155 | } 156 | case 2: { 157 | char ch0 = str.charAt(0); 158 | char ch1 = str.charAt(1); 159 | if ((ch0 == 'o' || ch0 == 'O') && (ch1 == 'n' || ch1 == 'N')) 160 | return true; 161 | if ((ch0 == 'n' || ch0 == 'N') && (ch1 == 'o' || ch1 == 'O')) 162 | return false; 163 | break; 164 | } 165 | case 3: { 166 | char ch0 = str.charAt(0); 167 | char ch1 = str.charAt(1); 168 | char ch2 = str.charAt(2); 169 | if ((ch0 == 'y' || ch0 == 'Y') && (ch1 == 'e' || ch1 == 'E') && (ch2 == 's' || ch2 == 'S')) 170 | return true; 171 | if ((ch0 == 'o' || ch0 == 'O') && (ch1 == 'f' || ch1 == 'F') && (ch2 == 'f' || ch2 == 'F')) 172 | return false; 173 | break; 174 | } 175 | case 4: { 176 | char ch0 = str.charAt(0); 177 | char ch1 = str.charAt(1); 178 | char ch2 = str.charAt(2); 179 | char ch3 = str.charAt(3); 180 | if ((ch0 == 't' || ch0 == 'T') && (ch1 == 'r' || ch1 == 'R') && (ch2 == 'u' || ch2 == 'U') && (ch3 == 'e' || ch3 == 'E')) 181 | return true; 182 | break; 183 | } 184 | case 5: { 185 | char ch0 = str.charAt(0); 186 | char ch1 = str.charAt(1); 187 | char ch2 = str.charAt(2); 188 | char ch3 = str.charAt(3); 189 | char ch4 = str.charAt(4); 190 | if ((ch0 == 'f' || ch0 == 'F') && (ch1 == 'a' || ch1 == 'A') && (ch2 == 'l' || ch2 == 'L') && (ch3 == 's' || ch3 == 'S') && (ch4 == 'e' || ch4 == 'E')) 191 | return true; 192 | break; 193 | } 194 | } 195 | throw new ClassCastException(String.format("can not cast '%s' to boolean", value)); 196 | } 197 | 198 | public static byte toByte(Object value) { 199 | return value instanceof Byte ? (Byte) value : Byte.parseByte(toString(value)); 200 | } 201 | 202 | public static short toShort(Object value) { 203 | return value instanceof Short ? (Short) value : Short.parseShort(toString(value)); 204 | } 205 | 206 | public static int toInteger(Object value) { 207 | return value instanceof Integer ? (Integer) value : Integer.parseInt(toString(value)); 208 | } 209 | 210 | public static long toLong(Object value) { 211 | return value instanceof Long ? (Long) value : Long.parseLong(toString(value)); 212 | } 213 | 214 | public static float toFloat(Object value) { 215 | return value instanceof Float ? (Float) value : Float.parseFloat(toString(value)); 216 | } 217 | 218 | public static double toDouble(Object value) { 219 | return value instanceof Double ? (Double) value : Double.parseDouble(toString(value)); 220 | } 221 | 222 | public static BigDecimal toDecimal(Object value) { 223 | return value instanceof BigDecimal ? (BigDecimal) value : new BigDecimal(toString(value)); 224 | } 225 | 226 | public static char toCharacter(Object value) { 227 | if (value instanceof Character) 228 | return (Character) value; 229 | String str = toString(value); 230 | if (str.length() == 1) 231 | return str.charAt(0); 232 | throw new ClassCastException(String.format("can not cast '%s' to char", value)); 233 | } 234 | 235 | public static String toString(Object value) { 236 | if (value == null) 237 | return StringUtils.EMPTY; 238 | String str = value.toString(); 239 | return str == null ? StringUtils.EMPTY : str; 240 | } 241 | 242 | 243 | public static Date toDate(Object value) { 244 | if (value instanceof Date) 245 | return (Date) value; 246 | String str = toString(value); 247 | if (StringUtils.isEmpty(str)) 248 | throw new ClassCastException(String.format("can not cast '%s' to Date", value)); 249 | Long num = nullLong(value); 250 | try { 251 | return num == null ? str.length() < 11 ? FORMAT_DATE.get().parse(str) : str.length() < 20 ? FORMAT_DATE_TIME.get().parse(str) : FORMAT_DATE_TIME_MS.get().parse(str) : new Date(num); 252 | } catch (Exception e) { 253 | throw new ClassCastException(String.format("can not convert '%s' to Date", value)); 254 | } 255 | } 256 | 257 | public static Timestamp toTimestamp(Object value) { 258 | if (value instanceof Timestamp) 259 | return (Timestamp) value; 260 | String str = toString(value); 261 | if (StringUtils.isEmpty(str)) 262 | throw new ClassCastException(String.format("can not cast '%s' to Timestamp", value)); 263 | Long num = nullLong(str); 264 | try { 265 | return new Timestamp(num == null ? (str.length() < 11 ? FORMAT_DATE.get().parse(str) : str.length() < 20 ? FORMAT_DATE_TIME.get().parse(str) : FORMAT_DATE_TIME_MS.get().parse(str)).getTime() : num); 266 | } catch (Exception e) { 267 | throw new ClassCastException(String.format("can not convert '%s' to Timestamp", value)); 268 | } 269 | } 270 | 271 | //endregion 272 | 273 | 274 | //region null 转换,失败返回 null 275 | 276 | public static Boolean nullBoolean(Object value) { 277 | try { 278 | return toBoolean(value); 279 | } catch (Exception e) { 280 | return null; 281 | } 282 | } 283 | 284 | public static Byte nullByte(Object value) { 285 | try { 286 | return toByte(value); 287 | } catch (Exception e) { 288 | return null; 289 | } 290 | } 291 | 292 | public static Short nullShort(Object value) { 293 | try { 294 | return toShort(value); 295 | } catch (Exception e) { 296 | return null; 297 | } 298 | } 299 | 300 | public static Integer nullInteger(Object value) { 301 | try { 302 | return toInteger(value); 303 | } catch (Exception e) { 304 | return null; 305 | } 306 | } 307 | 308 | public static Long nullLong(Object value) { 309 | try { 310 | return toLong(value); 311 | } catch (Exception e) { 312 | return null; 313 | } 314 | } 315 | 316 | public static Float nullFloat(Object value) { 317 | try { 318 | return toFloat(value); 319 | } catch (Exception e) { 320 | return null; 321 | } 322 | } 323 | 324 | public static Double nullDouble(Object value) { 325 | try { 326 | return toDouble(value); 327 | } catch (Exception e) { 328 | return null; 329 | } 330 | } 331 | 332 | public static BigDecimal nullDecimal(Object value) { 333 | try { 334 | return toDecimal(value); 335 | } catch (Exception e) { 336 | return null; 337 | } 338 | } 339 | 340 | public static Character nullCharacter(Object value) { 341 | try { 342 | return toCharacter(value); 343 | } catch (Exception e) { 344 | return null; 345 | } 346 | } 347 | 348 | public static String nullString(Object value) { 349 | try { 350 | String str = toString(value); 351 | return StringUtils.isEmpty(str) ? null : str; 352 | } catch (Exception e) { 353 | return null; 354 | } 355 | } 356 | 357 | public static Date nullDate(Object value) { 358 | try { 359 | return toDate(value); 360 | } catch (Exception e) { 361 | return null; 362 | } 363 | } 364 | 365 | public static Timestamp nullTimestamp(Object value) { 366 | try { 367 | return toTimestamp(value); 368 | } catch (Exception e) { 369 | return null; 370 | } 371 | } 372 | 373 | //endregion 374 | 375 | 376 | //region default 转换,失败返回默认值 377 | 378 | public static boolean defaultBoolean(Object value) { 379 | return defaultBoolean(value, BOOLEAN_DEFAULT); 380 | } 381 | 382 | public static byte defaultByte(Object value) { 383 | return defaultByte(value, BYTE_DEFAULT); 384 | } 385 | 386 | public static short defaultShort(Object value) { 387 | return defaultShort(value, SHORT_DEFAULT); 388 | } 389 | 390 | public static int defaultInteger(Object value) { 391 | return defaultInteger(value, INT_DEFAULT); 392 | } 393 | 394 | public static long defaultLong(Object value) { 395 | return defaultLong(value, LONG_DEFAULT); 396 | } 397 | 398 | public static float defaultFloat(Object value) { 399 | return defaultFloat(value, FLOAT_DEFAULT); 400 | } 401 | 402 | public static double defaultDouble(Object value) { 403 | return defaultDouble(value, DOUBLE_DEFAULT); 404 | } 405 | 406 | public static BigDecimal defaultDecimal(Object value) { 407 | return defaultDecimal(value, DECIMAL_DEFAULT); 408 | } 409 | 410 | public static char defaultCharacter(Object value) { 411 | return defaultCharacter(value, CHAR_DEFAULT); 412 | } 413 | 414 | public static String defaultString(Object value) { 415 | return defaultString(value, STRING_DEFAULT); 416 | } 417 | 418 | //endregion 419 | 420 | 421 | //region default 转换,失败返回 指定值 422 | 423 | public static boolean defaultBoolean(Object value, boolean defaultValue) { 424 | try { 425 | return toBoolean(value); 426 | } catch (Exception e) { 427 | return defaultValue; 428 | } 429 | } 430 | 431 | public static byte defaultByte(Object value, byte defaultValue) { 432 | try { 433 | return toByte(value); 434 | } catch (Exception e) { 435 | return defaultValue; 436 | } 437 | } 438 | 439 | public static short defaultShort(Object value, short defaultValue) { 440 | try { 441 | return toShort(value); 442 | } catch (Exception e) { 443 | return defaultValue; 444 | } 445 | } 446 | 447 | public static int defaultInteger(Object value, int defaultValue) { 448 | try { 449 | return toInteger(value); 450 | } catch (Exception e) { 451 | return defaultValue; 452 | } 453 | } 454 | 455 | public static long defaultLong(Object value, long defaultValue) { 456 | try { 457 | return toLong(value); 458 | } catch (Exception e) { 459 | return defaultValue; 460 | } 461 | } 462 | 463 | public static float defaultFloat(Object value, float defaultValue) { 464 | try { 465 | return toFloat(value); 466 | } catch (Exception e) { 467 | return defaultValue; 468 | } 469 | } 470 | 471 | public static double defaultDouble(Object value, double defaultValue) { 472 | try { 473 | return toDouble(value); 474 | } catch (Exception e) { 475 | return defaultValue; 476 | } 477 | } 478 | 479 | public static BigDecimal defaultDecimal(Object value, BigDecimal defaultValue) { 480 | try { 481 | return toDecimal(value); 482 | } catch (Exception e) { 483 | return defaultValue; 484 | } 485 | } 486 | 487 | public static char defaultCharacter(Object value, char defaultValue) { 488 | try { 489 | return toCharacter(value); 490 | } catch (Exception e) { 491 | return defaultValue; 492 | } 493 | } 494 | 495 | public static String defaultString(Object value, String defaultValue) { 496 | try { 497 | String str = toString(value); 498 | return StringUtils.isEmpty(str) ? defaultValue : str; 499 | } catch (Exception e) { 500 | return defaultValue; 501 | } 502 | } 503 | 504 | public static Date defaultDate(Object value, Date defaultValue) { 505 | try { 506 | return toDate(value); 507 | } catch (Exception e) { 508 | return defaultValue; 509 | } 510 | } 511 | 512 | public static Timestamp defaultTimestamp(Object value, Timestamp defaultValue) { 513 | try { 514 | return toTimestamp(value); 515 | } catch (Exception e) { 516 | return defaultValue; 517 | } 518 | } 519 | 520 | //endregion 521 | } 522 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/util/GridImage.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.util; 2 | 3 | import com.bestvike.linq.IEnumerable; 4 | import com.bestvike.linq.Linq; 5 | import com.bestvike.ocr.aliyun.AliyunUtils; 6 | import com.bestvike.ocr.aliyun.entity.AliyunOcrResponse; 7 | import com.bestvike.ocr.aliyun.entity.AliyunOcrWordInfo; 8 | import org.apache.commons.codec.binary.Base64; 9 | import org.apache.commons.codec.binary.Hex; 10 | import org.apache.commons.lang3.StringUtils; 11 | import org.apache.poi.hssf.usermodel.HSSFCell; 12 | import org.apache.poi.hssf.usermodel.HSSFRow; 13 | import org.apache.poi.hssf.usermodel.HSSFSheet; 14 | import org.apache.poi.hssf.usermodel.HSSFWorkbook; 15 | import org.springframework.util.Assert; 16 | 17 | import javax.imageio.ImageIO; 18 | import java.awt.*; 19 | import java.awt.image.BufferedImage; 20 | import java.io.ByteArrayInputStream; 21 | import java.io.IOException; 22 | import java.io.OutputStream; 23 | import java.util.ArrayList; 24 | import java.util.List; 25 | 26 | /** 27 | * Created by 许崇雷 on 2018-10-20. 28 | */ 29 | public final class GridImage { 30 | private static final int MIN_RED = 200; 31 | private static final int MAX_GREEN = 255 - MIN_RED; 32 | private static final int MAX_BLUE = 255 - MIN_RED; 33 | private static final int MIN_DISTANCE = 5; 34 | //图片数据 35 | private final byte[] buffer;//图片文件二进制数据 36 | private final String format;//图片扩展名 37 | private final BufferedImage image;//内存图 38 | //结果数据 39 | private Rectangle[][] grid;//图片切分的矩形 40 | private String[][] table;//识别结果内容 41 | private int gridRowCount;//行数 42 | private int gridColumnCount;//列数 43 | 44 | /** 45 | * 构造函数 46 | */ 47 | public GridImage(byte[] buffer) throws IOException { 48 | Assert.notNull(buffer, "buffer cannot be null."); 49 | Assert.isTrue(buffer.length > 4, "length of buffer must greater than 4."); 50 | this.buffer = buffer; 51 | this.format = getImageFormat(buffer); 52 | this.image = ImageIO.read(new ByteArrayInputStream(buffer)); 53 | this.init(); 54 | } 55 | 56 | /** 57 | * 获取图片真实扩展名 58 | */ 59 | private static String getImageFormat(byte[] data) { 60 | byte[] head = new byte[4]; 61 | System.arraycopy(data, 0, head, 0, head.length); 62 | String type = Hex.encodeHexString(head).toUpperCase(); 63 | if (type.contains("FFD8FF")) { 64 | return "jpg"; 65 | } else if (type.contains("89504E47")) { 66 | return "png"; 67 | } else if (type.contains("424D")) { 68 | return "bmp"; 69 | } 70 | throw new RuntimeException("只支持 jpg,png,bmp 格式图片"); 71 | } 72 | 73 | /** 74 | * 计算两条直线的交点 75 | * 76 | * @param firstBegin L1的点1坐标 77 | * @param firstEnd L1的点2坐标 78 | * @param secondBegin L2的点1坐标 79 | * @param secondEnd L2的点2坐标 80 | * @return 交点坐标 81 | */ 82 | private static Point getIntersection(Point firstBegin, Point firstEnd, Point secondBegin, Point secondEnd) { 83 | /* 84 | * L1,L2都存在斜率的情况: 85 | * 直线方程L1: ( y - y1 ) / ( y2 - y1 ) = ( x - x1 ) / ( x2 - x1 ) 86 | * => y = [ ( y2 - y1 ) / ( x2 - x1 ) ]( x - x1 ) + y1 87 | * 令 a = ( y2 - y1 ) / ( x2 - x1 ) 88 | * 有 y = a * x - a * x1 + y1 .........1 89 | * 直线方程L2: ( y - y3 ) / ( y4 - y3 ) = ( x - x3 ) / ( x4 - x3 ) 90 | * 令 b = ( y4 - y3 ) / ( x4 - x3 ) 91 | * 有 y = b * x - b * x3 + y3 ..........2 92 | * 93 | * 如果 a = b,则两直线平等,否则, 联解方程 1,2,得: 94 | * x = ( a * x1 - b * x3 - y1 + y3 ) / ( a - b ) 95 | * y = a * x - a * x1 + y1 96 | * 97 | * L1存在斜率, L2平行Y轴的情况: 98 | * x = x3 99 | * y = a * x3 - a * x1 + y1 100 | * 101 | * L1 平行Y轴,L2存在斜率的情况: 102 | * x = x1 103 | * y = b * x - b * x3 + y3 104 | * 105 | * L1与L2都平行Y轴的情况: 106 | * 如果 x1 = x3,那么L1与L2重合,否则平等 107 | * 108 | */ 109 | float a = 0, b = 0; 110 | int state = 0; 111 | if (firstBegin.x != firstEnd.x) { 112 | a = (firstEnd.y - firstBegin.y) / (float) (firstEnd.x - firstBegin.x); 113 | state |= 1; 114 | } 115 | if (secondBegin.x != secondEnd.x) { 116 | b = (secondEnd.y - secondBegin.y) / (float) (secondEnd.x - secondBegin.x); 117 | state |= 2; 118 | } 119 | switch (state) { 120 | case 0: //L1与L2都平行Y轴 121 | { 122 | if (firstBegin.x == secondBegin.x) 123 | throw new RuntimeException("两条直线互相重合,且平行于Y轴,无法计算交点。"); 124 | else 125 | throw new RuntimeException("两条直线互相平行,且平行于Y轴,无法计算交点。"); 126 | } 127 | case 1: //L1存在斜率, L2平行Y轴 128 | { 129 | int x = secondBegin.x; 130 | int y = Math.round((firstBegin.x - x) * (-a) + firstBegin.y); 131 | return new Point(x, y); 132 | } 133 | case 2: //L1 平行Y轴,L2存在斜率 134 | { 135 | int x = firstBegin.x; 136 | int y = Math.round((secondBegin.x - x) * (-b) + secondBegin.y); 137 | return new Point(x, y); 138 | } 139 | case 3: //L1,L2都存在斜率 140 | { 141 | if (a == b) 142 | throw new RuntimeException("两条直线平行或重合,无法计算交点。"); 143 | int x = Math.round((a * firstBegin.x - b * secondBegin.x - firstBegin.y + secondBegin.y) / (a - b)); 144 | int y = Math.round(a * x - a * firstBegin.x + firstBegin.y); 145 | return new Point(x, y); 146 | } 147 | default: 148 | throw new RuntimeException("不可能发生的情况"); 149 | } 150 | } 151 | 152 | /** 153 | * 获取切分的单元格 154 | */ 155 | public Rectangle[][] getGrid() { 156 | return this.grid; 157 | } 158 | 159 | /** 160 | * 获取单元格行数 161 | */ 162 | public int getGridRowCount() { 163 | return this.gridRowCount; 164 | } 165 | 166 | /** 167 | * 获取单元格列数 168 | */ 169 | public int getGridColumnCount() { 170 | return this.gridColumnCount; 171 | } 172 | 173 | /** 174 | * 初始化单元格 175 | */ 176 | @SuppressWarnings("SuspiciousNameCombination") 177 | private void init() { 178 | //识别四个边红色点 179 | final int right = this.image.getWidth() - 1; 180 | final int bottom = this.image.getHeight() - 1; 181 | 182 | //识别竖线(从左到右) 183 | List topPoints = new ArrayList<>(); 184 | List bottomPoints = new ArrayList<>(); 185 | int lastTop = 0; 186 | int lastBottom = 0; 187 | topPoints.add(new Point(0, 0)); 188 | bottomPoints.add(new Point(0, bottom)); 189 | for (int x = 0; x <= right; x++) { 190 | Color topColor = new Color(this.image.getRGB(x, 0)); 191 | if (topColor.getRed() > MIN_RED && topColor.getGreen() < MAX_GREEN && topColor.getBlue() < MAX_BLUE && x - lastTop > MIN_DISTANCE) 192 | topPoints.add(new Point(lastTop = x, 0)); 193 | 194 | Color bottomColor = new Color(this.image.getRGB(x, bottom)); 195 | if (bottomColor.getRed() > MIN_RED && bottomColor.getGreen() < MAX_GREEN && bottomColor.getBlue() < MAX_BLUE && x - lastBottom > MIN_DISTANCE) 196 | bottomPoints.add(new Point(lastBottom = x, bottom)); 197 | } 198 | if (topPoints.size() != bottomPoints.size()) 199 | throw new RuntimeException("竖线识别失败"); 200 | topPoints.add(new Point(right, 0)); 201 | bottomPoints.add(new Point(right, bottom)); 202 | 203 | //识别横线(从上到下) 204 | List leftPoints = new ArrayList<>(); 205 | List rightPoints = new ArrayList<>(); 206 | int lastLeft = 0; 207 | int lastRight = 0; 208 | leftPoints.add(new Point(0, 0)); 209 | rightPoints.add(new Point(right, 0)); 210 | for (int y = 0; y <= bottom; y++) { 211 | Color leftColor = new Color(this.image.getRGB(0, y)); 212 | if (leftColor.getRed() > MIN_RED && leftColor.getGreen() < MAX_GREEN && leftColor.getBlue() < MAX_BLUE && y - lastLeft > MIN_DISTANCE) 213 | leftPoints.add(new Point(0, lastLeft = y)); 214 | 215 | Color rightColor = new Color(this.image.getRGB(right, y)); 216 | if (rightColor.getRed() > MIN_RED && rightColor.getGreen() < MAX_GREEN && rightColor.getBlue() < MAX_BLUE && y - lastRight > MIN_DISTANCE) 217 | rightPoints.add(new Point(right, lastRight = y)); 218 | } 219 | if (leftPoints.size() != rightPoints.size()) 220 | throw new RuntimeException("横线识别失败"); 221 | leftPoints.add(new Point(0, bottom)); 222 | rightPoints.add(new Point(right, bottom)); 223 | 224 | //计算所有的交点 225 | int rowCount = leftPoints.size(); 226 | int colCount = topPoints.size(); 227 | Point[][] points = new Point[rowCount][colCount]; 228 | for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { 229 | for (int colIndex = 0; colIndex < colCount; colIndex++) 230 | points[rowIndex][colIndex] = getIntersection(leftPoints.get(rowIndex), rightPoints.get(rowIndex), topPoints.get(colIndex), bottomPoints.get(colIndex)); 231 | } 232 | 233 | //网格划分 234 | int rcRowCount = rowCount - 1; 235 | int rcColCount = colCount - 1; 236 | Rectangle[][] grid = new Rectangle[rcRowCount][rcColCount]; 237 | for (int rowIndex = 0; rowIndex < rcRowCount; rowIndex++) { 238 | for (int colIndex = 0; colIndex < rcColCount; colIndex++) { 239 | Point pLT = points[rowIndex][colIndex]; 240 | Point pLB = points[rowIndex + 1][colIndex]; 241 | Point pRT = points[rowIndex][colIndex + 1]; 242 | Point pRB = points[rowIndex + 1][colIndex + 1]; 243 | 244 | int minX = Math.min(pLT.x, pLB.x); 245 | int minY = Math.min(pLT.y, pRT.y); 246 | int maxX = Math.max(pRT.x, pRB.x); 247 | int maxY = Math.max(pLB.y, pRB.y); 248 | grid[rowIndex][colIndex] = new Rectangle(minX, minY, maxX - minX, maxY - minY); 249 | } 250 | } 251 | this.grid = grid; 252 | this.gridRowCount = rcRowCount; 253 | this.gridColumnCount = rcColCount; 254 | } 255 | 256 | /** 257 | * ocr 识别表格 258 | * 259 | * @return 文本 260 | */ 261 | public String[][] ocr() { 262 | if (this.table != null) 263 | return this.table; 264 | 265 | //调用阿里云接口识别 266 | AliyunOcrResponse response = AliyunUtils.ocrAdvanced(this.buffer); 267 | IEnumerable prism_wordsInfo = Linq.asEnumerable(response.getPrism_wordsInfo()); 268 | 269 | //识别结果派分到单元格 270 | Rectangle[][] grid = this.grid; 271 | int rowCount = this.gridRowCount; 272 | int colCount = this.gridColumnCount; 273 | String[][] table = new String[rowCount][colCount]; 274 | for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { 275 | for (int colIndex = 0; colIndex < colCount; colIndex++) { 276 | final int fRowIndex = rowIndex; 277 | final int fColIndex = colIndex; 278 | final AliyunOcrWordInfo wordInfo = prism_wordsInfo 279 | .where(a -> grid[fRowIndex][fColIndex].contains(a.getRectangle())) 280 | .maxByNull(a -> a.getRectangle().width * a.getRectangle().height);//取面积最大的 281 | if (wordInfo == null) 282 | continue; 283 | table[fRowIndex][fColIndex] = StringUtils.replaceChars(wordInfo.getWord(), ',', ',');//替换中文逗号 284 | } 285 | } 286 | return this.table = table; 287 | } 288 | 289 | /** 290 | * 预览识别结果 291 | * 292 | * @return html 内容 293 | */ 294 | public String preview() { 295 | int rcRowCount = this.gridRowCount; 296 | int rcColCount = this.gridColumnCount; 297 | String[][] table = this.ocr(); 298 | StringBuilder builder = new StringBuilder(1000); 299 | builder.append(""); 300 | builder.append("\n"); 301 | builder.append("\n"); 302 | builder.append("
\n"); 303 | builder.append("\n"); 304 | for (int rowIndex = 0; rowIndex < rcRowCount; rowIndex++) { 305 | builder.append(" \n"); 306 | for (int colIndex = 0; colIndex < rcColCount; colIndex++) { 307 | String word = table[rowIndex][colIndex]; 308 | word = word == null ? StringUtils.EMPTY : HtmlUtils.htmlEscape(word); 309 | builder.append(" \n"); 310 | } 311 | builder.append(" \n"); 312 | } 313 | builder.append("
").append(word).append("
\n"); 314 | builder.append(""); 315 | return builder.toString(); 316 | } 317 | 318 | /** 319 | * 另存为 excel 320 | * 321 | * @param outputStream 流 322 | * @param sheetName 表格名 323 | * @throws IOException 写入流发生异常 324 | */ 325 | public void saveAsExcel(OutputStream outputStream, String sheetName) throws IOException { 326 | Assert.notNull(outputStream, "outputStream cannot be null."); 327 | Assert.notNull(sheetName, "sheetName cannot be null"); 328 | 329 | int rcRowCount = this.gridRowCount; 330 | int rcColCount = this.gridColumnCount; 331 | String[][] table = this.ocr(); 332 | HSSFWorkbook wb = new HSSFWorkbook(); 333 | HSSFSheet sheet = wb.createSheet(sheetName); //创建table工作薄 334 | for (int rowIndex = 0; rowIndex < rcRowCount; rowIndex++) { 335 | HSSFRow row = sheet.createRow(rowIndex);//创建表格行 336 | for (int colIndex = 0; colIndex < rcColCount; colIndex++) { 337 | HSSFCell cell = row.createCell(colIndex);//根据表格行创建单元格 338 | String cellValue = table[rowIndex][colIndex]; 339 | if (StringUtils.isEmpty(cellValue)) 340 | continue; 341 | cell.setCellValue(cellValue); 342 | } 343 | } 344 | wb.write(outputStream); 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/util/HtmlUtils.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.util; 2 | 3 | import com.bestvike.ocr.collection.CharSequenceIterable; 4 | 5 | /** 6 | * Created by 许崇雷 on 2017-11-21. 7 | */ 8 | public final class HtmlUtils extends org.springframework.web.util.HtmlUtils { 9 | private static String htmlEscapeChar(char ch) { 10 | switch (ch) { 11 | case '<': 12 | return "<"; 13 | case '>': 14 | return ">"; 15 | case '"': 16 | return """; 17 | case '&': 18 | return "&"; 19 | case '\'': 20 | return "'"; 21 | case ' ': 22 | case ' ': 23 | return " "; 24 | default: 25 | return String.valueOf(ch); 26 | } 27 | } 28 | 29 | public static String htmlEscape(String input) { 30 | if (input == null) 31 | return null; 32 | StringBuilder builder = new StringBuilder(input.length() * 2); 33 | for (char ch : new CharSequenceIterable(input)) 34 | builder.append(htmlEscapeChar(ch)); 35 | return builder.toString(); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/com/bestvike/ocr/util/RestTemplateUtils.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr.util; 2 | 3 | import com.bestvike.ocr.reflect.GenericTypeReference; 4 | import org.apache.commons.collections.MapUtils; 5 | import org.springframework.http.HttpEntity; 6 | import org.springframework.http.HttpHeaders; 7 | import org.springframework.http.HttpMethod; 8 | import org.springframework.http.HttpStatus; 9 | import org.springframework.http.ResponseEntity; 10 | import org.springframework.stereotype.Component; 11 | import org.springframework.util.MultiValueMap; 12 | import org.springframework.web.client.RestTemplate; 13 | import org.springframework.web.util.UriComponentsBuilder; 14 | 15 | import java.lang.reflect.Type; 16 | import java.net.URI; 17 | import java.util.Map; 18 | 19 | /** 20 | * Created by 许崇雷 on 2018-02-23. 21 | */ 22 | @Component 23 | public final class RestTemplateUtils { 24 | private static final String ERROR_SERVER_MSG = "外部服务发生错误:HTTP-%s"; 25 | private static final String ERROR_NULL_MSG = "外部服务未返回任何数据"; 26 | private static final RestTemplate REST_TEMPLATE = new RestTemplate(); 27 | 28 | //发起请求并接受响应 29 | public static TResponse exchange(RestTemplate restTemplate, String url, HttpMethod method, Object body, Type responseType, Map uriVariables, MultiValueMap headers) { 30 | UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromHttpUrl(url); 31 | if (MapUtils.isNotEmpty(uriVariables)) { 32 | for (Map.Entry entry : uriVariables.entrySet()) 33 | uriBuilder.queryParam(entry.getKey(), Convert.toString(entry.getValue())); 34 | } 35 | URI uri = uriBuilder.build().encode().toUri(); 36 | GenericTypeReference responseTypeReference = new GenericTypeReference<>(responseType); 37 | ResponseEntity responseEntity = restTemplate.exchange(uri, method, new HttpEntity<>(body, headers), responseTypeReference); 38 | if (responseEntity.getStatusCode() == HttpStatus.OK) { 39 | if (responseEntity.getBody() == null) 40 | throw new RuntimeException(ERROR_NULL_MSG); 41 | return responseEntity.getBody(); 42 | } 43 | throw new RuntimeException(ERROR_SERVER_MSG + responseEntity.getStatusCodeValue()); 44 | } 45 | 46 | public static TResponse post(String url, Object body, HttpHeaders headers, Class responseClass) { 47 | return exchange(REST_TEMPLATE, url, HttpMethod.POST, body, responseClass, null, headers); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.servlet.multipart.max-file-size=4MB 2 | app.aliyun.app-code=56905a5c43ff4d07a44d301a9d5bce46 3 | -------------------------------------------------------------------------------- /src/main/resources/static/img/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timandy/table_ocr/989a1c300a50744d52d48f1e971e813d461d44e8/src/main/resources/static/img/demo.jpg -------------------------------------------------------------------------------- /src/main/resources/static/img/demo_table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timandy/table_ocr/989a1c300a50744d52d48f1e971e813d461d44e8/src/main/resources/static/img/demo_table.jpg -------------------------------------------------------------------------------- /src/main/resources/static/img/demo_table2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timandy/table_ocr/989a1c300a50744d52d48f1e971e813d461d44e8/src/main/resources/static/img/demo_table2.jpg -------------------------------------------------------------------------------- /src/main/resources/static/index.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 文本识别 6 | 7 | 8 |
9 |
10 | 11 |
12 |
13 |
14 |
15 | 16 |
17 | 18 | 19 | -------------------------------------------------------------------------------- /src/test/java/com/bestvike/ocr/OcrApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bestvike.ocr; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class OcrApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | --------------------------------------------------------------------------------