├── .classpath ├── .gitattributes ├── .gitignore ├── .idea ├── .name ├── compiler.xml ├── copyright │ └── profiles_settings.xml ├── dictionaries │ └── zhanghong.xml ├── encodings.xml ├── libraries │ └── libs.xml ├── misc.xml ├── modules.xml ├── scopes │ └── scope_settings.xml ├── vcs.xml └── workspace.xml ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── Java_OCR.iml ├── README.md ├── bin ├── com │ ├── recognition │ │ └── software │ │ │ └── jdeskew │ │ │ ├── ImageDeskew$HoughLine.class │ │ │ ├── ImageDeskew.class │ │ │ └── ImageUtil.class │ └── ricky │ │ └── java │ │ └── ocr │ │ ├── TessAPITest$TessDllAPIImpl.class │ │ ├── TessAPITest.class │ │ ├── Tesseract1Test$Tess1Extension.class │ │ ├── Tesseract1Test$Word.class │ │ └── Tesseract1Test.class ├── gsdll64.dll ├── liblept168.dll ├── libtesseract302.dll ├── net │ └── sourceforge │ │ ├── tess4j │ │ ├── ITesseract.class │ │ ├── TessAPI$ETEXT_DESC.class │ │ ├── TessAPI$TessBaseAPI.class │ │ ├── TessAPI$TessMutableIterator.class │ │ ├── TessAPI$TessOcrEngineMode.class │ │ ├── TessAPI$TessOrientation.class │ │ ├── TessAPI$TessPageIterator.class │ │ ├── TessAPI$TessPageIteratorLevel.class │ │ ├── TessAPI$TessPageSegMode.class │ │ ├── TessAPI$TessPolyBlockType.class │ │ ├── TessAPI$TessResultIterator.class │ │ ├── TessAPI$TessTextlineOrder.class │ │ ├── TessAPI$TessWritingDirection.class │ │ ├── TessAPI.class │ │ ├── TessAPI1$ETEXT_DESC.class │ │ ├── TessAPI1$TessBaseAPI.class │ │ ├── TessAPI1$TessMutableIterator.class │ │ ├── TessAPI1$TessOcrEngineMode.class │ │ ├── TessAPI1$TessOrientation.class │ │ ├── TessAPI1$TessPageIterator.class │ │ ├── TessAPI1$TessPageIteratorLevel.class │ │ ├── TessAPI1$TessPageSegMode.class │ │ ├── TessAPI1$TessPolyBlockType.class │ │ ├── TessAPI1$TessResultIterator.class │ │ ├── TessAPI1$TessTextlineOrder.class │ │ ├── TessAPI1$TessWritingDirection.class │ │ ├── TessAPI1.class │ │ ├── Tesseract.class │ │ ├── Tesseract1.class │ │ ├── TesseractException.class │ │ └── util │ │ │ └── Utils.class │ │ └── vietocr │ │ ├── ImageHelper.class │ │ ├── ImageIOHelper.class │ │ ├── PdfUtilities$1.class │ │ ├── PdfUtilities$2.class │ │ └── PdfUtilities.class └── testimg │ ├── OCR_tesseract.class │ ├── TesseractExp.class │ └── TestOCR.class ├── chi.jpg ├── chi1.jpg ├── eurotext.bmp ├── eurotext.gif ├── eurotext.pdf ├── eurotext.png ├── eurotext.tif ├── libs ├── ghost4j-0.5.1.jar ├── hamcrest-core-1.3.jar ├── jai_imageio.jar ├── jna-4.1.0.jar └── log4j-1.2.17.jar ├── src ├── com │ └── recognition │ │ └── software │ │ └── jdeskew │ │ ├── ImageDeskew.java │ │ └── ImageUtil.java ├── gsdll64.dll ├── liblept168.dll ├── libtesseract302.dll └── net │ └── sourceforge │ ├── tess4j │ ├── ITesseract.java │ ├── TessAPI.java │ ├── TessAPI1.java │ ├── Tesseract.java │ ├── Tesseract1.java │ ├── TesseractException.java │ └── util │ │ └── Utils.java │ └── vietocr │ ├── ImageHelper.java │ ├── ImageIOHelper.java │ └── PdfUtilities.java ├── tessdata ├── chi_sim.traineddata ├── chi_tra.traineddata ├── configs │ ├── api_config │ ├── digits │ └── hocr └── eng.traineddata └── test ├── com └── ricky │ └── java │ └── ocr │ ├── TessAPITest.java │ └── Tesseract1Test.java └── testimg ├── OCR_tesseract.java ├── TesseractExp.java └── TestOCR.java /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | Java_OCR -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/dictionaries/zhanghong.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/libraries/libs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | Abstraction issues 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/scopes/scope_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | Java_OCR 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 5 | org.eclipse.jdt.core.compiler.compliance=1.7 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 11 | org.eclipse.jdt.core.compiler.source=1.7 12 | -------------------------------------------------------------------------------- /Java_OCR.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Java_OCR 2 | tess4j_图片文字识别。 3 | 4 | 源项目来源:http://blog.csdn.net/top_code/article/details/39667299 5 | 6 | 添加中文语言包,识别微博图片长文。 7 | -------------------------------------------------------------------------------- /bin/com/recognition/software/jdeskew/ImageDeskew$HoughLine.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/com/recognition/software/jdeskew/ImageDeskew$HoughLine.class -------------------------------------------------------------------------------- /bin/com/recognition/software/jdeskew/ImageDeskew.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/com/recognition/software/jdeskew/ImageDeskew.class -------------------------------------------------------------------------------- /bin/com/recognition/software/jdeskew/ImageUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/com/recognition/software/jdeskew/ImageUtil.class -------------------------------------------------------------------------------- /bin/com/ricky/java/ocr/TessAPITest$TessDllAPIImpl.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/com/ricky/java/ocr/TessAPITest$TessDllAPIImpl.class -------------------------------------------------------------------------------- /bin/com/ricky/java/ocr/TessAPITest.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/com/ricky/java/ocr/TessAPITest.class -------------------------------------------------------------------------------- /bin/com/ricky/java/ocr/Tesseract1Test$Tess1Extension.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/com/ricky/java/ocr/Tesseract1Test$Tess1Extension.class -------------------------------------------------------------------------------- /bin/com/ricky/java/ocr/Tesseract1Test$Word.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/com/ricky/java/ocr/Tesseract1Test$Word.class -------------------------------------------------------------------------------- /bin/com/ricky/java/ocr/Tesseract1Test.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/com/ricky/java/ocr/Tesseract1Test.class -------------------------------------------------------------------------------- /bin/gsdll64.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/gsdll64.dll -------------------------------------------------------------------------------- /bin/liblept168.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/liblept168.dll -------------------------------------------------------------------------------- /bin/libtesseract302.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/libtesseract302.dll -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/ITesseract.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/ITesseract.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$ETEXT_DESC.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$ETEXT_DESC.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessBaseAPI.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessBaseAPI.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessMutableIterator.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessMutableIterator.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessOcrEngineMode.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessOcrEngineMode.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessOrientation.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessOrientation.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessPageIterator.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessPageIterator.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessPageIteratorLevel.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessPageIteratorLevel.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessPageSegMode.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessPageSegMode.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessPolyBlockType.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessPolyBlockType.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessResultIterator.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessResultIterator.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessTextlineOrder.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessTextlineOrder.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI$TessWritingDirection.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI$TessWritingDirection.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$ETEXT_DESC.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$ETEXT_DESC.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessBaseAPI.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessBaseAPI.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessMutableIterator.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessMutableIterator.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessOcrEngineMode.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessOcrEngineMode.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessOrientation.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessOrientation.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessPageIterator.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessPageIterator.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessPageIteratorLevel.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessPageIteratorLevel.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessPageSegMode.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessPageSegMode.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessPolyBlockType.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessPolyBlockType.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessResultIterator.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessResultIterator.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessTextlineOrder.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessTextlineOrder.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1$TessWritingDirection.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1$TessWritingDirection.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TessAPI1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TessAPI1.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/Tesseract.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/Tesseract.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/Tesseract1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/Tesseract1.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/TesseractException.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/TesseractException.class -------------------------------------------------------------------------------- /bin/net/sourceforge/tess4j/util/Utils.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/tess4j/util/Utils.class -------------------------------------------------------------------------------- /bin/net/sourceforge/vietocr/ImageHelper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/vietocr/ImageHelper.class -------------------------------------------------------------------------------- /bin/net/sourceforge/vietocr/ImageIOHelper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/vietocr/ImageIOHelper.class -------------------------------------------------------------------------------- /bin/net/sourceforge/vietocr/PdfUtilities$1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/vietocr/PdfUtilities$1.class -------------------------------------------------------------------------------- /bin/net/sourceforge/vietocr/PdfUtilities$2.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/vietocr/PdfUtilities$2.class -------------------------------------------------------------------------------- /bin/net/sourceforge/vietocr/PdfUtilities.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/net/sourceforge/vietocr/PdfUtilities.class -------------------------------------------------------------------------------- /bin/testimg/OCR_tesseract.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/testimg/OCR_tesseract.class -------------------------------------------------------------------------------- /bin/testimg/TesseractExp.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/testimg/TesseractExp.class -------------------------------------------------------------------------------- /bin/testimg/TestOCR.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/bin/testimg/TestOCR.class -------------------------------------------------------------------------------- /chi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/chi.jpg -------------------------------------------------------------------------------- /chi1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/chi1.jpg -------------------------------------------------------------------------------- /eurotext.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/eurotext.bmp -------------------------------------------------------------------------------- /eurotext.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/eurotext.gif -------------------------------------------------------------------------------- /eurotext.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/eurotext.pdf -------------------------------------------------------------------------------- /eurotext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/eurotext.png -------------------------------------------------------------------------------- /eurotext.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/eurotext.tif -------------------------------------------------------------------------------- /libs/ghost4j-0.5.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/libs/ghost4j-0.5.1.jar -------------------------------------------------------------------------------- /libs/hamcrest-core-1.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/libs/hamcrest-core-1.3.jar -------------------------------------------------------------------------------- /libs/jai_imageio.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/libs/jai_imageio.jar -------------------------------------------------------------------------------- /libs/jna-4.1.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/libs/jna-4.1.0.jar -------------------------------------------------------------------------------- /libs/log4j-1.2.17.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/libs/log4j-1.2.17.jar -------------------------------------------------------------------------------- /src/com/recognition/software/jdeskew/ImageDeskew.java: -------------------------------------------------------------------------------- 1 | /** 2 | * JDeskew 3 | */ 4 | package com.recognition.software.jdeskew; 5 | 6 | import java.awt.image.BufferedImage; 7 | 8 | public class ImageDeskew { 9 | 10 | /** 11 | * Representation of a line in the image. 12 | */ 13 | public class HoughLine { 14 | 15 | // count of points in the line 16 | public int count = 0; 17 | // index in matrix. 18 | public int index = 0; 19 | // the line is represented as all x, y that solve y * cos(alpha) - x * 20 | // sin(alpha) = d 21 | public double alpha; 22 | public double d; 23 | } 24 | // the source image 25 | private BufferedImage cImage; 26 | // the range of angles to search for lines 27 | private double cAlphaStart = -20; 28 | private double cAlphaStep = 0.2; 29 | private int cSteps = 40 * 5; 30 | // pre-calculation of sin and cos 31 | private double[] cSinA; 32 | private double[] cCosA; 33 | // range of d 34 | private double cDMin; 35 | private double cDStep = 1.0; 36 | private int cDCount; 37 | // count of points that fit in a line 38 | private int[] cHMatrix; 39 | 40 | // constructor 41 | public ImageDeskew(BufferedImage image) { 42 | this.cImage = image; 43 | } 44 | 45 | // calculate the skew angle of the image cImage 46 | public double getSkewAngle() { 47 | ImageDeskew.HoughLine[] hl; 48 | double sum = 0.0; 49 | int count = 0; 50 | 51 | // perform Hough Transformation 52 | calc(); 53 | // top 20 of the detected lines in the image 54 | hl = getTop(20); 55 | 56 | if (hl.length >= 20) { 57 | // average angle of the lines 58 | for (int i = 0; i < 19; i++) { 59 | sum += hl[i].alpha; 60 | count++; 61 | } 62 | return (sum / count); 63 | } else { 64 | return 0.0d; 65 | } 66 | } 67 | 68 | // calculate the count lines in the image with most points 69 | private ImageDeskew.HoughLine[] getTop(int count) { 70 | 71 | ImageDeskew.HoughLine[] hl = new ImageDeskew.HoughLine[count]; 72 | for (int i = 0; i < count; i++) { 73 | hl[i] = new ImageDeskew.HoughLine(); 74 | } 75 | 76 | ImageDeskew.HoughLine tmp; 77 | 78 | for (int i = 0; i < (this.cHMatrix.length - 1); i++) { 79 | if (this.cHMatrix[i] > hl[count - 1].count) { 80 | hl[count - 1].count = this.cHMatrix[i]; 81 | hl[count - 1].index = i; 82 | int j = count - 1; 83 | while ((j > 0) && (hl[j].count > hl[j - 1].count)) { 84 | tmp = hl[j]; 85 | hl[j] = hl[j - 1]; 86 | hl[j - 1] = tmp; 87 | j--; 88 | } 89 | } 90 | } 91 | 92 | int alphaIndex; 93 | int dIndex; 94 | 95 | for (int i = 0; i < count; i++) { 96 | dIndex = hl[i].index / cSteps; // integer division, no 97 | // remainder 98 | alphaIndex = hl[i].index - dIndex * cSteps; 99 | hl[i].alpha = getAlpha(alphaIndex); 100 | hl[i].d = dIndex + cDMin; 101 | } 102 | 103 | return hl; 104 | } 105 | 106 | // Hough Transformation 107 | private void calc() { 108 | int hMin = (int) ((this.cImage.getHeight()) / 4.0); 109 | int hMax = (int) ((this.cImage.getHeight()) * 3.0 / 4.0); 110 | init(); 111 | 112 | for (int y = hMin; y < hMax; y++) { 113 | for (int x = 1; x < (this.cImage.getWidth() - 2); x++) { 114 | // only lower edges are considered 115 | if (ImageUtil.isBlack(this.cImage, x, y)) { 116 | if (!ImageUtil.isBlack(this.cImage, x, y + 1)) { 117 | calc(x, y); 118 | } 119 | } 120 | } 121 | } 122 | 123 | } 124 | 125 | // calculate all lines through the point (x,y) 126 | private void calc(int x, int y) { 127 | double d; 128 | int dIndex; 129 | int index; 130 | 131 | for (int alpha = 0; alpha < (this.cSteps - 1); alpha++) { 132 | d = y * this.cCosA[alpha] - x * this.cSinA[alpha]; 133 | dIndex = (int) (d - this.cDMin); 134 | index = dIndex * this.cSteps + alpha; 135 | try { 136 | this.cHMatrix[index] += 1; 137 | } catch (Exception ex) { 138 | System.out.println(ex.toString()); 139 | } 140 | } 141 | } 142 | 143 | private void init() { 144 | 145 | double angle; 146 | 147 | // pre-calculation of sin and cos 148 | this.cSinA = new double[this.cSteps - 1]; 149 | this.cCosA = new double[this.cSteps - 1]; 150 | 151 | for (int i = 0; i < (this.cSteps - 1); i++) { 152 | angle = getAlpha(i) * Math.PI / 180.0; 153 | this.cSinA[i] = Math.sin(angle); 154 | this.cCosA[i] = Math.cos(angle); 155 | } 156 | 157 | // range of d 158 | this.cDMin = -this.cImage.getWidth(); 159 | this.cDCount = (int) (2.0 * ((this.cImage.getWidth() + this.cImage.getHeight())) / this.cDStep); 160 | this.cHMatrix = new int[this.cDCount * this.cSteps]; 161 | 162 | } 163 | 164 | public double getAlpha(int index) { 165 | return this.cAlphaStart + (index * this.cAlphaStep); 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/com/recognition/software/jdeskew/ImageUtil.java: -------------------------------------------------------------------------------- 1 | /** 2 | * JDeskew 3 | */ 4 | package com.recognition.software.jdeskew; 5 | 6 | //import java.awt.Color; 7 | //import java.awt.Graphics2D; 8 | //import java.awt.RenderingHints; 9 | //import java.awt.geom.AffineTransform; 10 | import java.awt.image.BufferedImage; 11 | import java.awt.image.WritableRaster; 12 | //import java.io.File; 13 | //import java.io.IOException; 14 | //import javax.imageio.ImageIO; 15 | 16 | public class ImageUtil { 17 | 18 | // public static BufferedImage readImageFile(File imageFile) throws IOException { 19 | // return ImageIO.read(imageFile); 20 | // } 21 | 22 | public static boolean isBlack(BufferedImage image, int x, int y) { 23 | if (image.getType() == BufferedImage.TYPE_BYTE_BINARY) { 24 | WritableRaster raster = image.getRaster(); 25 | int pixelRGBValue = raster.getSample(x, y, 0); 26 | if (pixelRGBValue == 0) { 27 | return true; 28 | } else { 29 | return false; 30 | } 31 | } 32 | 33 | int luminanceValue = 140; 34 | return isBlack(image, x, y, luminanceValue); 35 | } 36 | 37 | public static boolean isBlack(BufferedImage image, int x, int y, int luminanceCutOff) { 38 | int pixelRGBValue; 39 | int r; 40 | int g; 41 | int b; 42 | double luminance = 0.0; 43 | 44 | // return white on areas outside of image boundaries 45 | if (x < 0 || y < 0 || x > image.getWidth() || y > image.getHeight()) { 46 | return false; 47 | } 48 | 49 | try { 50 | pixelRGBValue = image.getRGB(x, y); 51 | r = (pixelRGBValue >> 16) & 0xff; 52 | g = (pixelRGBValue >> 8) & 0xff; 53 | b = (pixelRGBValue) & 0xff; 54 | luminance = (r * 0.299) + (g * 0.587) + (b * 0.114); 55 | } catch (Exception e) { 56 | // ignore. 57 | } 58 | 59 | return luminance < luminanceCutOff; 60 | } 61 | 62 | // public static BufferedImage rotate(BufferedImage image, double angle, int cx, int cy) { 63 | // int width = image.getWidth(null); 64 | // int height = image.getHeight(null); 65 | // 66 | // int minX, minY, maxX, maxY; 67 | // minX = minY = maxX = maxY = 0; 68 | // 69 | // int[] corners = {0, 0, width, 0, width, height, 0, height}; 70 | // 71 | // double theta = Math.toRadians(angle); 72 | // for (int i = 0; i < corners.length; i += 2) { 73 | // int x = (int) (Math.cos(theta) * (corners[i] - cx) 74 | // - Math.sin(theta) * (corners[i + 1] - cy) + cx); 75 | // int y = (int) (Math.sin(theta) * (corners[i] - cx) 76 | // + Math.cos(theta) * (corners[i + 1] - cy) + cy); 77 | // 78 | // if (x > maxX) { 79 | // maxX = x; 80 | // } 81 | // 82 | // if (x < minX) { 83 | // minX = x; 84 | // } 85 | // 86 | // if (y > maxY) { 87 | // maxY = y; 88 | // } 89 | // 90 | // if (y < minY) { 91 | // minY = y; 92 | // } 93 | // 94 | // } 95 | // 96 | // cx = (cx - minX); 97 | // cy = (cy - minY); 98 | // 99 | // BufferedImage bi = new BufferedImage((maxX - minX), (maxY - minY), 100 | // image.getType()); 101 | // Graphics2D g2 = bi.createGraphics(); 102 | // g2.setRenderingHint(RenderingHints.KEY_INTERPOLATION, 103 | // RenderingHints.VALUE_INTERPOLATION_BICUBIC); 104 | // 105 | // g2.setBackground(Color.white); 106 | // g2.fillRect(0, 0, bi.getWidth(), bi.getHeight()); 107 | // 108 | // AffineTransform at = new AffineTransform(); 109 | // at.rotate(theta, cx, cy); 110 | // 111 | // g2.setTransform(at); 112 | // g2.drawImage(image, -minX, -minY, null); 113 | // g2.dispose(); 114 | // 115 | // return bi; 116 | // } 117 | } 118 | -------------------------------------------------------------------------------- /src/gsdll64.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/src/gsdll64.dll -------------------------------------------------------------------------------- /src/liblept168.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/src/liblept168.dll -------------------------------------------------------------------------------- /src/libtesseract302.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/src/libtesseract302.dll -------------------------------------------------------------------------------- /src/net/sourceforge/tess4j/ITesseract.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2014 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package net.sourceforge.tess4j; 17 | 18 | import java.awt.Rectangle; 19 | import java.awt.image.BufferedImage; 20 | import java.io.File; 21 | import java.nio.ByteBuffer; 22 | import java.util.List; 23 | import javax.imageio.IIOImage; 24 | 25 | /** 26 | * An interface represents common OCR methods. 27 | */ 28 | public interface ITesseract { 29 | 30 | String htmlBeginTag = "\n" 32 | + "\n\n\n" 33 | + "\n\n" 35 | + "\n\n"; 36 | String htmlEndTag = "\n\n"; 37 | 38 | /** 39 | * Performs OCR operation. 40 | * 41 | * @param imageFile an image file 42 | * @return the recognized text 43 | * @throws TesseractException 44 | */ 45 | String doOCR(File imageFile) throws TesseractException; 46 | 47 | /** 48 | * Performs OCR operation. 49 | * 50 | * @param imageFile an image file 51 | * @param rect the bounding rectangle defines the region of the image to be 52 | * recognized. A rectangle of zero dimension or null indicates 53 | * the whole image. 54 | * @return the recognized text 55 | * @throws TesseractException 56 | */ 57 | String doOCR(File imageFile, Rectangle rect) throws TesseractException; 58 | 59 | /** 60 | * Performs OCR operation. 61 | * 62 | * @param bi a buffered image 63 | * @return the recognized text 64 | * @throws TesseractException 65 | */ 66 | String doOCR(BufferedImage bi) throws TesseractException; 67 | 68 | /** 69 | * Performs OCR operation. 70 | * 71 | * @param bi a buffered image 72 | * @param rect the bounding rectangle defines the region of the image to be 73 | * recognized. A rectangle of zero dimension or null indicates 74 | * the whole image. 75 | * @return the recognized text 76 | * @throws TesseractException 77 | */ 78 | String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException; 79 | 80 | /** 81 | * Performs OCR operation. 82 | * 83 | * @param imageList a list of IIOImage objects 84 | * @param rect the bounding rectangle defines the region of the image to be 85 | * recognized. A rectangle of zero dimension or null indicates 86 | * the whole image. 87 | * @return the recognized text 88 | * @throws TesseractException 89 | */ 90 | String doOCR(List imageList, Rectangle rect) throws TesseractException; 91 | 92 | /** 93 | * Performs OCR operation. Use SetImage, (optionally) 94 | * SetRectangle, and one or more of the Get*Text 95 | * functions. 96 | * 97 | * @param xsize width of image 98 | * @param ysize height of image 99 | * @param buf pixel data 100 | * @param rect the bounding rectangle defines the region of the image to be 101 | * recognized. A rectangle of zero dimension or null indicates 102 | * the whole image. 103 | * @param bpp bits per pixel, represents the bit depth of the image, with 1 104 | * for binary bitmap, 8 for gray, and 24 for color RGB. 105 | * @return the recognized text 106 | * @throws TesseractException 107 | */ 108 | String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException; 109 | 110 | /** 111 | * Sets tessdata path. 112 | * 113 | * @param datapath the tessdata path to set 114 | */ 115 | void setDatapath(String datapath); 116 | 117 | /** 118 | * Sets language for OCR. 119 | * 120 | * @param language the language code, which follows ISO 639-3 standard. 121 | */ 122 | void setLanguage(String language); 123 | 124 | /** 125 | * Sets OCR engine mode. 126 | * 127 | * @param ocrEngineMode the OcrEngineMode to set 128 | */ 129 | void setOcrEngineMode(int ocrEngineMode); 130 | 131 | /** 132 | * Sets page segmentation mode. 133 | * 134 | * @param mode the page segmentation mode to set 135 | */ 136 | void setPageSegMode(int mode); 137 | 138 | /** 139 | * Sets the value of Tesseract's internal parameter. 140 | * 141 | * @param key variable name, e.g., tessedit_create_hocr, 142 | * tessedit_char_whitelist, etc. 143 | * @param value value for corresponding variable, e.g., "1", "0", 144 | * "0123456789", etc. 145 | */ 146 | void setTessVariable(String key, String value); 147 | } 148 | -------------------------------------------------------------------------------- /src/net/sourceforge/tess4j/TessAPI.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2012 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package net.sourceforge.tess4j; 17 | 18 | import com.sun.jna.*; 19 | import com.sun.jna.ptr.*; 20 | import java.nio.*; 21 | 22 | /** 23 | * A Java wrapper for 24 | * Tesseract OCR 3.02 API using 25 | * JNA Interface Mapping. 26 | */ 27 | public interface TessAPI extends Library { 28 | 29 | static final boolean WINDOWS = System.getProperty("os.name").toLowerCase().startsWith("windows"); 30 | /** 31 | * Native library name. 32 | */ 33 | public static final String LIB_NAME = "libtesseract302"; 34 | public static final String LIB_NAME_NON_WIN = "tesseract"; 35 | /** 36 | * An instance of the class library. 37 | */ 38 | public static final TessAPI INSTANCE = (TessAPI) Native.loadLibrary(WINDOWS ? LIB_NAME : LIB_NAME_NON_WIN, TessAPI.class); 39 | 40 | /** 41 | * When Tesseract/Cube is initialized we can choose to instantiate/load/run 42 | * only the Tesseract part, only the Cube part or both along with the 43 | * combiner. The preference of which engine to use is stored in 44 | * tessedit_ocr_engine_mode.

ATTENTION: When 45 | * modifying this enum, please make sure to make the appropriate changes to 46 | * all the enums mirroring it (e.g. OCREngine in 47 | * cityblock/workflow/detection/detection_storage.proto). Such enums will 48 | * mention the connection to OcrEngineMode in the comments. 49 | */ 50 | public static interface TessOcrEngineMode { 51 | 52 | /** Run Tesseract only - fastest */ 53 | public static final int OEM_TESSERACT_ONLY = (int) 0; 54 | /** Run Cube only - better accuracy, but slower */ 55 | public static final int OEM_CUBE_ONLY = (int) 1; 56 | /** Run both and combine results - best accuracy */ 57 | public static final int OEM_TESSERACT_CUBE_COMBINED = (int) 2; 58 | /** Specify this mode when calling init_*(), 59 | to indicate that any of the above modes 60 | should be automatically inferred from the 61 | variables in the language-specific config, 62 | command-line configs, or if not specified 63 | in any of the above should be set to the 64 | default OEM_TESSERACT_ONLY. */ 65 | public static final int OEM_DEFAULT = (int) 3; 66 | }; 67 | 68 | /** 69 | * Possible modes for page layout analysis. These *must* be kept in order of 70 | * decreasing amount of layout analysis to be done, except for 71 | * OSD_ONLY, so that the inequality test macros below work. 72 | */ 73 | public static interface TessPageSegMode { 74 | 75 | /** Orientation and script detection only. */ 76 | public static final int PSM_OSD_ONLY = (int) 0; 77 | /** Automatic page segmentation with orientation and script detection. (OSD) */ 78 | public static final int PSM_AUTO_OSD = (int) 1; 79 | /** Automatic page segmentation, but no OSD, or OCR. */ 80 | public static final int PSM_AUTO_ONLY = (int) 2; 81 | /** Fully automatic page segmentation, but no OSD. */ 82 | public static final int PSM_AUTO = (int) 3; 83 | /** Assume a single column of text of variable sizes. */ 84 | public static final int PSM_SINGLE_COLUMN = (int) 4; 85 | /** Assume a single uniform block of vertically aligned text. */ 86 | public static final int PSM_SINGLE_BLOCK_VERT_TEXT = (int) 5; 87 | /** Assume a single uniform block of text. */ 88 | public static final int PSM_SINGLE_BLOCK = (int) 6; 89 | /** Treat the image as a single text line. */ 90 | public static final int PSM_SINGLE_LINE = (int) 7; 91 | /** Treat the image as a single word. */ 92 | public static final int PSM_SINGLE_WORD = (int) 8; 93 | /** Treat the image as a single word in a circle. */ 94 | public static final int PSM_CIRCLE_WORD = (int) 9; 95 | /** Treat the image as a single character. */ 96 | public static final int PSM_SINGLE_CHAR = (int) 10; 97 | /** Find as much text as possible in no particular order. */ 98 | public static final int PSM_SPARSE_TEXT = (int) 11; 99 | /** Sparse text with orientation and script detection. */ 100 | public static final int PSM_SPARSE_TEXT_OSD = (int) 12; 101 | /** Number of enum entries. */ 102 | public static final int PSM_COUNT = (int) 13; 103 | }; 104 | 105 | /** 106 | * Enum of the elements of the page hierarchy, used in 107 | * ResultIterator to provide functions that operate on each 108 | * level without having to have 5x as many functions. 109 | */ 110 | public static interface TessPageIteratorLevel { 111 | 112 | /** Block of text/image/separator line. */ 113 | public static final int RIL_BLOCK = (int) 0; 114 | /** Paragraph within a block. */ 115 | public static final int RIL_PARA = (int) 1; 116 | /** Line within a paragraph. */ 117 | public static final int RIL_TEXTLINE = (int) 2; 118 | /** Word within a textline. */ 119 | public static final int RIL_WORD = (int) 3; 120 | /** Symbol/character within a word. */ 121 | public static final int RIL_SYMBOL = (int) 4; 122 | }; 123 | 124 | public static interface TessPolyBlockType { 125 | 126 | /** Type is not yet known. Keep as the first element. */ 127 | public static final int PT_UNKNOWN = (int) 0; 128 | /** Text that lives inside a column. */ 129 | public static final int PT_FLOWING_TEXT = (int) 1; 130 | /** Text that spans more than one column. */ 131 | public static final int PT_HEADING_TEXT = (int) 2; 132 | /** Text that is in a cross-column pull-out region. */ 133 | public static final int PT_PULLOUT_TEXT = (int) 3; 134 | /** Partition belonging to an equation region. */ 135 | public static final int PT_EQUATION = (int) 4; 136 | /** Partition has inline equation. */ 137 | public static final int PT_INLINE_EQUATION = (int) 5; 138 | /** Partition belonging to a table region. */ 139 | public static final int PT_TABLE = (int) 6; 140 | /** Text-line runs vertically. */ 141 | public static final int PT_VERTICAL_TEXT = (int) 7; 142 | /** Text that belongs to an image. */ 143 | public static final int PT_CAPTION_TEXT = (int) 8; 144 | /** Image that lives inside a column. */ 145 | public static final int PT_FLOWING_IMAGE = (int) 9; 146 | /** Image that spans more than one column. */ 147 | public static final int PT_HEADING_IMAGE = (int) 10; 148 | /** Image that is in a cross-column pull-out region. */ 149 | public static final int PT_PULLOUT_IMAGE = (int) 11; 150 | /** Horizontal Line. */ 151 | public static final int PT_HORZ_LINE = (int) 12; 152 | /** Vertical Line. */ 153 | public static final int PT_VERT_LINE = (int) 13; 154 | /** Lies outside of any column. */ 155 | public static final int PT_NOISE = (int) 14; 156 | /** Number of enum entries. */ 157 | public static final int PT_COUNT = (int) 15; 158 | }; 159 | 160 | /** 161 | *
162 |      *  +------------------+
163 |      *  | 1 Aaaa Aaaa Aaaa |
164 |      *  | Aaa aa aaa aa    |
165 |      *  | aaaaaa A aa aaa. |
166 |      *  |                2 |
167 |      *  |   #######  c c C |
168 |      *  |   #######  c c c |
169 |      *  | < #######  c c c |
170 |      *  | < #######  c   c |
171 |      *  | < #######  .   c |
172 |      *  | 3 #######      c |
173 |      *  +------------------+
174 |      * 
175 | * Orientation Example:
176 | * ====================
177 | * Above is a 178 | * diagram of some (1) English and (2) Chinese text and a (3) photo 179 | * credit.
180 | *
181 | * Upright Latin characters are represented as A and a. '<' represents 182 | * a latin character rotated anti-clockwise 90 degrees. Upright 183 | * Chinese characters are represented C and c.
184 | *
185 | * NOTA BENE: enum values here should match goodoc.proto
186 | *
187 | * If you orient your head so that "up" aligns with Orientation, then 188 | * the characters will appear "right side up" and readable.
189 | *
190 | * In the example above, both the 191 | * English and Chinese paragraphs are oriented so their "up" is the top of 192 | * the page (page up). The photo credit is read with one's head turned 193 | * leftward ("up" is to page left).
194 | *
The values of this enum 195 | * match the convention of Tesseract's osdetect.h 196 | */ 197 | public static interface TessOrientation { 198 | 199 | public static final int ORIENTATION_PAGE_UP = (int) 0; 200 | public static final int ORIENTATION_PAGE_RIGHT = (int) 1; 201 | public static final int ORIENTATION_PAGE_DOWN = (int) 2; 202 | public static final int ORIENTATION_PAGE_LEFT = (int) 3; 203 | }; 204 | 205 | /** 206 | * The grapheme clusters within a line of text are laid out logically in 207 | * this direction, judged when looking at the text line rotated so that its 208 | * Orientation is "page up".

For English text, the writing 209 | * direction is left-to-right. For the Chinese text in the above example, 210 | * the writing direction is top-to-bottom. 211 | */ 212 | public static interface TessWritingDirection { 213 | 214 | public static final int WRITING_DIRECTION_LEFT_TO_RIGHT = (int) 0; 215 | public static final int WRITING_DIRECTION_RIGHT_TO_LEFT = (int) 1; 216 | public static final int WRITING_DIRECTION_TOP_TO_BOTTOM = (int) 2; 217 | }; 218 | 219 | /** 220 | * The text lines are read in the given sequence.

In English, 221 | * the order is top-to-bottom. In Chinese, vertical text lines are read 222 | * right-to-left. Mongolian is written in vertical columns top to bottom 223 | * like Chinese, but the lines order left-to right.

Note that 224 | * only some combinations make sense. For example, 225 | * WRITING_DIRECTION_LEFT_TO_RIGHT implies 226 | * TEXTLINE_ORDER_TOP_TO_BOTTOM. 227 | */ 228 | public static interface TessTextlineOrder { 229 | 230 | public static final int TEXTLINE_ORDER_LEFT_TO_RIGHT = (int) 0; 231 | public static final int TEXTLINE_ORDER_RIGHT_TO_LEFT = (int) 1; 232 | public static final int TEXTLINE_ORDER_TOP_TO_BOTTOM = (int) 2; 233 | }; 234 | public static final int TRUE = (int) 1; 235 | public static final int FALSE = (int) 0; 236 | 237 | /** 238 | * Returns the version identifier. 239 | */ 240 | String TessVersion(); 241 | 242 | void TessDeleteText(Pointer text); 243 | 244 | void TessDeleteTextArray(PointerByReference arr); 245 | 246 | void TessDeleteIntArray(IntBuffer arr); 247 | 248 | /** 249 | * Creates an instance of the base class for all Tesseract APIs. 250 | */ 251 | TessAPI.TessBaseAPI TessBaseAPICreate(); 252 | 253 | /** 254 | * Disposes the TesseractAPI instance. 255 | */ 256 | void TessBaseAPIDelete(TessAPI.TessBaseAPI handle); 257 | 258 | /** 259 | * Set the name of the input file. Needed only for training and reading a 260 | * UNLV zone file. 261 | */ 262 | void TessBaseAPISetInputName(TessAPI.TessBaseAPI handle, String name); 263 | 264 | /** 265 | * Set the name of the bonus output files. Needed only for debugging. 266 | */ 267 | void TessBaseAPISetOutputName(TessAPI.TessBaseAPI handle, String name); 268 | 269 | /** 270 | * Set the value of an internal "parameter." Supply the name of the 271 | * parameter and the value as a string, just as you would in a config file. 272 | * Returns false if the name lookup failed. E.g., 273 | * SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, 274 | * y and z. Or 275 | * SetVariable("classify_bln_numeric_mode", "1"); to set 276 | * numeric-only mode. 277 | * SetVariable may be used before 278 | * Init, but settings will revert to defaults on 279 | * End().

Note: Must be called after 280 | * Init(). Only works for non-init variables (init variables 281 | * should be passed to 282 | * Init()). 283 | */ 284 | int TessBaseAPISetVariable(TessAPI.TessBaseAPI handle, String name, String value); 285 | 286 | /** 287 | * Returns true (1) if the parameter was found among Tesseract parameters. 288 | * Fills in value with the value of the parameter. 289 | */ 290 | int TessBaseAPIGetIntVariable(TessAPI.TessBaseAPI handle, String name, IntBuffer value); 291 | 292 | int TessBaseAPIGetBoolVariable(TessAPI.TessBaseAPI handle, String name, IntBuffer value); 293 | 294 | int TessBaseAPIGetDoubleVariable(TessAPI.TessBaseAPI handle, String name, DoubleBuffer value); 295 | 296 | String TessBaseAPIGetStringVariable(TessAPI.TessBaseAPI handle, String name); 297 | 298 | /** 299 | * Print Tesseract parameters to the given file.

Note: Must not 300 | * be the first method called after instance create. 301 | */ 302 | void TessBaseAPIPrintVariablesToFile(TessAPI.TessBaseAPI handle, String filename); 303 | 304 | /** 305 | * Instances are now mostly thread-safe and totally independent, but some 306 | * global parameters remain. Basically it is safe to use multiple 307 | * TessBaseAPIs in different threads in parallel, UNLESS: you use 308 | * SetVariable on some of the Params in classify and textord. 309 | * If you do, then the effect will be to change it for all your 310 | * instances.

Start tesseract. Returns zero on success and -1 311 | * on failure. NOTE that the only members that may be called before Init are 312 | * those listed above here in the class definition.

The 313 | * datapath must be the name of the parent directory of 314 | * tessdata and must end in / . Any name after the last / will be stripped. 315 | * The language is (usually) an 316 | * ISO 639-3 string or 317 | * NULL will default to eng. It is entirely safe (and 318 | * eventually will be efficient too) to call Init multiple times on the same 319 | * instance to change language, or just to reset the classifier. The 320 | * language may be a string of the form [~][+[~]]* indicating 321 | * that multiple languages are to be loaded. E.g., hin+eng will load Hindi 322 | * and English. Languages may specify internally that they want to be loaded 323 | * with one or more other languages, so the ~ sign is available to override 324 | * that. E.g., if hin were set to load eng by default, then hin+~eng would 325 | * force loading only hin. The number of loaded languages is limited only by 326 | * memory, with the caveat that loading additional languages will impact 327 | * both speed and accuracy, as there is more work to do to decide on the 328 | * applicable language, and there is more chance of hallucinating incorrect 329 | * words. WARNING: On changing languages, all Tesseract parameters are reset 330 | * back to their default values. (Which may vary between languages.) If you 331 | * have a rare need to set a Variable that controls initialization for a 332 | * second call to 333 | * Init you should explicitly call 334 | * End() and then use 335 | * SetVariable before 336 | * Init. This is only a very rare use case, since there are 337 | * very few uses that require any parameters to be set before 338 | * Init.

If 339 | * set_only_non_debug_params is true, only params that do not 340 | * contain "debug" in the name will be set. 341 | */ 342 | int TessBaseAPIInit1(TessAPI.TessBaseAPI handle, String datapath, String language, int oem, PointerByReference configs, int configs_size); 343 | 344 | int TessBaseAPIInit2(TessAPI.TessBaseAPI handle, String datapath, String language, int oem); 345 | 346 | int TessBaseAPIInit3(TessAPI.TessBaseAPI handle, String datapath, String language); 347 | 348 | /** 349 | * Returns the languages string used in the last valid initialization. If 350 | * the last initialization specified "deu+hin" then that will be returned. 351 | * If hin loaded eng automatically as well, then that will not be included 352 | * in this list. To find the languages actually loaded, use 353 | * GetLoadedLanguagesAsVector. The returned string should NOT 354 | * be deleted. 355 | */ 356 | String TessBaseAPIGetInitLanguagesAsString(TessAPI.TessBaseAPI handle); 357 | 358 | /** 359 | * Returns the loaded languages in the vector of STRINGs. Includes all 360 | * languages loaded by the last 361 | * Init, including those loaded as dependencies of other loaded 362 | * languages. 363 | */ 364 | PointerByReference TessBaseAPIGetLoadedLanguagesAsVector(TessAPI.TessBaseAPI handle); 365 | 366 | /** 367 | * Returns the available languages in the vector of STRINGs. 368 | */ 369 | PointerByReference TessBaseAPIGetAvailableLanguagesAsVector(TessAPI.TessBaseAPI handle); 370 | 371 | /** 372 | * Init only the lang model component of Tesseract. The only functions that 373 | * work after this init are 374 | * SetVariable and 375 | * IsValidWord. WARNING: temporary! This function will be 376 | * removed from here and placed in a separate API at some future time. 377 | */ 378 | int TessBaseAPIInitLangMod(TessAPI.TessBaseAPI handle, String datapath, String language); 379 | 380 | /** 381 | * Init only for page layout analysis. Use only for calls to 382 | * SetImage and 383 | * AnalysePage. Calls that attempt recognition will generate an 384 | * error. 385 | */ 386 | void TessBaseAPIInitForAnalysePage(TessAPI.TessBaseAPI handle); 387 | 388 | /** 389 | * Read a "config" file containing a set of param, value pairs. Searches the 390 | * standard places: 391 | * tessdata/configs, 392 | * tessdata/tessconfigs and also accepts a relative or absolute 393 | * path name. Note: only non-init params will be set (init params are set by 394 | * Init()). 395 | */ 396 | void TessBaseAPIReadConfigFile(TessAPI.TessBaseAPI handle, String filename, int init_only); 397 | 398 | /** 399 | * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The 400 | * mode is stored as an IntParam so it can also be modified by 401 | * ReadConfigFile or 402 | * SetVariable("tessedit_pageseg_mode", mode as string). 403 | */ 404 | void TessBaseAPISetPageSegMode(TessAPI.TessBaseAPI handle, int mode); 405 | 406 | /** 407 | * Return the current page segmentation mode. 408 | */ 409 | int TessBaseAPIGetPageSegMode(TessAPI.TessBaseAPI handle); 410 | 411 | /** 412 | * Recognize a rectangle from an image and return the result as a string. 413 | * May be called many times for a single 414 | * Init. Currently has no error checking. Greyscale of 8 and 415 | * color of 24 or 32 bits per pixel may be given. Palette color images will 416 | * not work properly and must be converted to 24 bit. Binary images of 1 bit 417 | * per pixel may also be given but they must be byte packed with the MSB of 418 | * the first byte being the first pixel, and a 1 represents WHITE. For 419 | * binary images set bytes_per_pixel=0. The recognized text is returned as a 420 | * char* which is coded as UTF8 and must be freed with the delete [] 421 | * operator.

Note that 422 | * TesseractRect is the simplified convenience interface. For 423 | * advanced uses, use 424 | * SetImage, (optionally) 425 | * SetRectangle, 426 | * Recognize, and one or more of the 427 | * Get*Text functions below. 428 | */ 429 | Pointer TessBaseAPIRect(TessAPI.TessBaseAPI handle, ByteBuffer imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height); 430 | 431 | /** 432 | * Call between pages or documents etc to free up memory and forget adaptive 433 | * data. 434 | */ 435 | void TessBaseAPIClearAdaptiveClassifier(TessAPI.TessBaseAPI handle); 436 | 437 | /** 438 | * Provide an image for Tesseract to recognize. Format is as TesseractRect 439 | * above. Does not copy the image buffer, or take ownership. The source 440 | * image may be destroyed after Recognize is called, either explicitly or 441 | * implicitly via one of the 442 | * Get*Text functions. 443 | * SetImage clears all recognition results, and sets the 444 | * rectangle to the full image, so it may be followed immediately by a 445 | * GetUTF8Text, and it will automatically perform recognition. 446 | */ 447 | void TessBaseAPISetImage(TessAPI.TessBaseAPI handle, ByteBuffer imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line); 448 | 449 | /** 450 | * Set the resolution of the source image in pixels per inch so font size 451 | * information can be calculated in results. Call this after SetImage(). 452 | */ 453 | void TessBaseAPISetSourceResolution(TessAPI.TessBaseAPI handle, int ppi); 454 | 455 | /** 456 | * Restrict recognition to a sub-rectangle of the image. Call after 457 | * SetImage. Each 458 | * SetRectangle clears the recognition results so multiple 459 | * rectangles can be recognized with the same image. 460 | */ 461 | void TessBaseAPISetRectangle(TessAPI.TessBaseAPI handle, int left, int top, int width, int height); 462 | 463 | /** Scale factor from original image. */ 464 | int TessBaseAPIGetThresholdedImageScaleFactor(TessAPI.TessBaseAPI handle); 465 | 466 | /** Dump the internal binary image to a PGM file. */ 467 | void TessBaseAPIDumpPGM(TessAPI.TessBaseAPI handle, String filename); 468 | 469 | /** 470 | * Runs page layout analysis in the mode set by SetPageSegMode. May 471 | * optionally be called prior to Recognize to get access to just the page 472 | * layout results. Returns an iterator to the results. Returns NULL on 473 | * error. The returned iterator must be deleted after use. WARNING! This 474 | * class points to data held within the TessBaseAPI class, and therefore can 475 | * only be used while the TessBaseAPI class still exists and has not been 476 | * subjected to a call of 477 | * Init, 478 | * SetImage, 479 | * Recognize, 480 | * Clear, 481 | * End, DetectOS, or anything else that changes the internal 482 | * PAGE_RES. 483 | */ 484 | TessAPI.TessPageIterator TessBaseAPIAnalyseLayout(TessAPI.TessBaseAPI handle); 485 | 486 | /** 487 | * Recognize the image from SetAndThresholdImage, generating Tesseract 488 | * internal structures. Returns 0 on success. Optional. The 489 | * Get*Text functions below will call 490 | * Recognize if needed. After Recognize, the output is kept 491 | * internally until the next 492 | * SetImage. 493 | */ 494 | int TessBaseAPIRecognize(TessAPI.TessBaseAPI handle, TessAPI.ETEXT_DESC monitor); 495 | 496 | /** 497 | * Variant on Recognize used for testing chopper. 498 | */ 499 | int TessBaseAPIRecognizeForChopTest(TessAPI.TessBaseAPI handle, TessAPI.ETEXT_DESC monitor); 500 | 501 | /** 502 | * Get a reading-order iterator to the results of LayoutAnalysis and/or 503 | * Recognize. The returned iterator must be deleted after use. WARNING! This 504 | * class points to data held within the TessBaseAPI class, and therefore can 505 | * only be used while the TessBaseAPI class still exists and has not been 506 | * subjected to a call of 507 | * Init, 508 | * SetImage, 509 | * Recognize, 510 | * Clear, 511 | * End, DetectOS, or anything else that changes the internal 512 | * PAGE_RES. 513 | */ 514 | TessAPI.TessResultIterator TessBaseAPIGetIterator(TessAPI.TessBaseAPI handle); 515 | 516 | /** 517 | * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. 518 | * The returned iterator must be deleted after use. 519 | * WARNING! This class points to data held within the TessBaseAPI class, and 520 | * therefore can only be used while the TessBaseAPI class still exists and 521 | * has not been subjected to a call of Init, SetImage, Recognize, Clear, End 522 | * DetectOS, or anything else that changes the internal PAGE_RES. 523 | */ 524 | TessAPI.TessMutableIterator TessBaseAPIGetMutableIterator(TessAPI.TessBaseAPI handle); 525 | 526 | /** 527 | * Recognizes all the pages in the named file, as a multi-page tiff or list 528 | * of filenames, or single image, and gets the appropriate kind of text 529 | * according to parameters: 530 | * tessedit_create_boxfile, 531 | * tessedit_make_boxes_from_boxes, 532 | * tessedit_write_unlv, 533 | * tessedit_create_hocr. Calls ProcessPage on each page in the 534 | * input file, which may be a multi-page tiff, single-page other file 535 | * format, or a plain text list of images to read. If tessedit_page_number 536 | * is non-negative, processing begins at that page of a multi-page tiff 537 | * file, or filelist. The text is returned in text_out. Returns false on 538 | * error. If non-zero timeout_millisec terminates processing after the 539 | * timeout on a single page. If non-NULL and non-empty, and some page fails 540 | * for some reason, the page is reprocessed with the retry_config config 541 | * file. Useful for interactively debugging a bad page. 542 | */ 543 | Pointer TessBaseAPIProcessPages(TessAPI.TessBaseAPI handle, String filename, String retry_config, int timeout_millisec); 544 | 545 | /** 546 | * The recognized text is returned as a char* which is coded as UTF-8 and 547 | * must be freed with the delete [] operator. 548 | */ 549 | Pointer TessBaseAPIGetUTF8Text(TessAPI.TessBaseAPI handle); 550 | 551 | /** 552 | * Make a HTML-formatted string with hOCR markup from the internal data 553 | * structures. page_number is 0-based but will appear in the output as 554 | * 1-based. 555 | */ 556 | Pointer TessBaseAPIGetHOCRText(TessAPI.TessBaseAPI handle, int page_number); 557 | 558 | /** 559 | * The recognized text is returned as a char* which is coded in the same 560 | * format as a box file used in training. Returned string must be freed with 561 | * the delete [] operator. Constructs coordinates in the original image - 562 | * not just the rectangle. page_number is a 0-based page index that will 563 | * appear in the box file. 564 | */ 565 | Pointer TessBaseAPIGetBoxText(TessAPI.TessBaseAPI handle, int page_number); 566 | 567 | /** 568 | * The recognized text is returned as a char* which is coded as UNLV format 569 | * Latin-1 with specific reject and suspect codes and must be freed with the 570 | * delete [] operator. 571 | */ 572 | Pointer TessBaseAPIGetUNLVText(TessAPI.TessBaseAPI handle); 573 | 574 | /** 575 | * Returns the (average) confidence value between 0 and 100. 576 | */ 577 | int TessBaseAPIMeanTextConf(TessAPI.TessBaseAPI handle); 578 | 579 | /** 580 | * Returns all word confidences (between 0 and 100) in an array, terminated 581 | * by -1. The calling function must delete [] after use. The number of 582 | * confidences should correspond to the number of space-delimited words in 583 | * GetUTF8Text. 584 | */ 585 | IntByReference TessBaseAPIAllWordConfidences(TessAPI.TessBaseAPI handle); 586 | 587 | /** 588 | * Applies the given word to the adaptive classifier if possible. The word 589 | * must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the 590 | * boundaries of the graphemes. Assumes that SetImage/SetRectangle have been 591 | * used to set the image to the given word. The mode arg should be 592 | * PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control 593 | * layout analysis. The currently set PageSegMode is preserved. Returns 594 | * false if adaption was not possible for some reason. 595 | */ 596 | int TessBaseAPIAdaptToWordStr(TessAPI.TessBaseAPI handle, int mode, String wordstr); 597 | 598 | /** 599 | * Free up recognition results and any stored image data, without actually 600 | * freeing any recognition data that would be time-consuming to reload. 601 | * Afterwards, you must call 602 | * SetImage or 603 | * TesseractRect before doing any 604 | * Recognize or 605 | * Get* operation. 606 | */ 607 | void TessBaseAPIClear(TessAPI.TessBaseAPI handle); 608 | 609 | /** 610 | * Close down tesseract and free up all memory. 611 | * End() is equivalent to destructing and reconstructing your 612 | * TessBaseAPI. Once 613 | * End() has been used, none of the other API functions may be 614 | * used other than 615 | * Init and anything declared above it in the class definition. 616 | */ 617 | void TessBaseAPIEnd(TessAPI.TessBaseAPI handle); 618 | 619 | /** 620 | * Check whether a word is valid according to Tesseract's language model. 621 | * 622 | * @return 0 if the word is invalid, non-zero if valid. @warning temporary! 623 | * This function will be removed from here and placed in a separate API at 624 | * some future time. 625 | */ 626 | int TessBaseAPIIsValidWord(TessAPI.TessBaseAPI handle, String word); 627 | 628 | int TessBaseAPIGetTextDirection(TessAPI.TessBaseAPI handle, IntBuffer out_offset, FloatBuffer out_slope); 629 | 630 | /** 631 | * This method returns the string form of the specified unichar. 632 | */ 633 | String TessBaseAPIGetUnichar(TessAPI.TessBaseAPI handle, int unichar_id); 634 | 635 | /* Page iterator */ 636 | void TessPageIteratorDelete(TessAPI.TessPageIterator handle); 637 | 638 | TessAPI.TessPageIterator TessPageIteratorCopy(TessAPI.TessPageIterator handle); 639 | 640 | void TessPageIteratorBegin(TessAPI.TessPageIterator handle); 641 | 642 | int TessPageIteratorNext(TessAPI.TessPageIterator handle, int level); 643 | 644 | int TessPageIteratorIsAtBeginningOf(TessAPI.TessPageIterator handle, int level); 645 | 646 | int TessPageIteratorIsAtFinalElement(TessAPI.TessPageIterator handle, int level, int element); 647 | 648 | int TessPageIteratorBoundingBox(TessAPI.TessPageIterator handle, int level, IntBuffer left, IntBuffer top, IntBuffer right, IntBuffer bottom); 649 | 650 | int TessPageIteratorBlockType(TessAPI.TessPageIterator handle); 651 | 652 | int TessPageIteratorBaseline(TessAPI.TessPageIterator handle, int level, IntBuffer x1, IntBuffer y1, IntBuffer x2, IntBuffer y2); 653 | 654 | void TessPageIteratorOrientation(TessAPI.TessPageIterator handle, IntBuffer orientation, IntBuffer writing_direction, IntBuffer textline_order, FloatBuffer deskew_angle); 655 | 656 | /* Result iterator */ 657 | void TessResultIteratorDelete(TessAPI.TessResultIterator handle); 658 | 659 | TessAPI.TessResultIterator TessResultIteratorCopy(TessAPI.TessResultIterator handle); 660 | 661 | TessAPI.TessPageIterator TessResultIteratorGetPageIterator(TessAPI.TessResultIterator handle); 662 | 663 | TessAPI.TessPageIterator TessResultIteratorGetPageIteratorConst(TessAPI.TessResultIterator handle); 664 | 665 | Pointer TessResultIteratorGetUTF8Text(TessAPI.TessResultIterator handle, int level); 666 | 667 | float TessResultIteratorConfidence(TessAPI.TessResultIterator handle, int level); 668 | 669 | String TessResultIteratorWordFontAttributes(TessAPI.TessResultIterator handle, IntBuffer is_bold, IntBuffer is_italic, IntBuffer is_underlined, IntBuffer is_monospace, IntBuffer is_serif, IntBuffer is_smallcaps, IntBuffer pointsize, IntBuffer font_id); 670 | 671 | int TessResultIteratorWordIsFromDictionary(TessAPI.TessResultIterator handle); 672 | 673 | int TessResultIteratorWordIsNumeric(TessAPI.TessResultIterator handle); 674 | 675 | int TessResultIteratorSymbolIsSuperscript(TessAPI.TessResultIterator handle); 676 | 677 | int TessResultIteratorSymbolIsSubscript(TessAPI.TessResultIterator handle); 678 | 679 | int TessResultIteratorSymbolIsDropcap(TessAPI.TessResultIterator handle); 680 | 681 | public static class TessBaseAPI extends PointerType { 682 | 683 | public TessBaseAPI(Pointer address) { 684 | super(address); 685 | } 686 | 687 | public TessBaseAPI() { 688 | super(); 689 | } 690 | }; 691 | 692 | public static class ETEXT_DESC extends PointerType { 693 | 694 | public ETEXT_DESC(Pointer address) { 695 | super(address); 696 | } 697 | 698 | public ETEXT_DESC() { 699 | super(); 700 | } 701 | }; 702 | 703 | public static class TessPageIterator extends PointerType { 704 | 705 | public TessPageIterator(Pointer address) { 706 | super(address); 707 | } 708 | 709 | public TessPageIterator() { 710 | super(); 711 | } 712 | }; 713 | 714 | public static class TessMutableIterator extends PointerType { 715 | 716 | public TessMutableIterator(Pointer address) { 717 | super(address); 718 | } 719 | 720 | public TessMutableIterator() { 721 | super(); 722 | } 723 | }; 724 | 725 | public static class TessResultIterator extends PointerType { 726 | 727 | public TessResultIterator(Pointer address) { 728 | super(address); 729 | } 730 | 731 | public TessResultIterator() { 732 | super(); 733 | } 734 | }; 735 | } 736 | -------------------------------------------------------------------------------- /src/net/sourceforge/tess4j/TessAPI1.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2012 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package net.sourceforge.tess4j; 17 | 18 | import com.sun.jna.*; 19 | import com.sun.jna.ptr.*; 20 | import java.nio.*; 21 | 22 | /** 23 | * A Java wrapper for 24 | * Tesseract OCR 3.02 API using 25 | * JNA Direct Mapping. 26 | */ 27 | public class TessAPI1 implements Library { 28 | 29 | static final boolean WINDOWS = System.getProperty("os.name").toLowerCase().startsWith("windows"); 30 | /** 31 | * Native library name. 32 | */ 33 | public static final String LIB_NAME = "libtesseract302"; 34 | public static final String LIB_NAME_NON_WIN = "tesseract"; 35 | 36 | static { 37 | Native.register(WINDOWS ? LIB_NAME : LIB_NAME_NON_WIN); 38 | } 39 | 40 | /** 41 | * When Tesseract/Cube is initialized we can choose to instantiate/load/run 42 | * only the Tesseract part, only the Cube part or both along with the 43 | * combiner. The preference of which engine to use is stored in 44 | * tessedit_ocr_engine_mode.
45 | *
46 | * ATTENTION: When modifying this enum, please make sure to make the 47 | * appropriate changes to all the enums mirroring it (e.g. OCREngine in 48 | * cityblock/workflow/detection/detection_storage.proto). Such enums will 49 | * mention the connection to OcrEngineMode in the comments. 50 | */ 51 | public static interface TessOcrEngineMode { 52 | 53 | /** Run Tesseract only - fastest */ 54 | public static final int OEM_TESSERACT_ONLY = (int) 0; 55 | /** Run Cube only - better accuracy, but slower */ 56 | public static final int OEM_CUBE_ONLY = (int) 1; 57 | /** Run both and combine results - best accuracy */ 58 | public static final int OEM_TESSERACT_CUBE_COMBINED = (int) 2; 59 | /** Specify this mode when calling init_*(), 60 | to indicate that any of the above modes 61 | should be automatically inferred from the 62 | variables in the language-specific config, 63 | command-line configs, or if not specified 64 | in any of the above should be set to the 65 | default OEM_TESSERACT_ONLY. */ 66 | public static final int OEM_DEFAULT = (int) 3; 67 | }; 68 | 69 | /** 70 | * Possible modes for page layout analysis. These *must* be kept in order of 71 | * decreasing amount of layout analysis to be done, except for OSD_ONLY, so 72 | * that the inequality test macros below work. 73 | */ 74 | public static interface TessPageSegMode { 75 | 76 | /** Orientation and script detection only. */ 77 | public static final int PSM_OSD_ONLY = (int) 0; 78 | /** Automatic page segmentation with orientation and script detection. (OSD) */ 79 | public static final int PSM_AUTO_OSD = (int) 1; 80 | /** Automatic page segmentation, but no OSD, or OCR. */ 81 | public static final int PSM_AUTO_ONLY = (int) 2; 82 | /** Fully automatic page segmentation, but no OSD. */ 83 | public static final int PSM_AUTO = (int) 3; 84 | /** Assume a single column of text of variable sizes. */ 85 | public static final int PSM_SINGLE_COLUMN = (int) 4; 86 | /** Assume a single uniform block of vertically aligned text. */ 87 | public static final int PSM_SINGLE_BLOCK_VERT_TEXT = (int) 5; 88 | /** Assume a single uniform block of text. */ 89 | public static final int PSM_SINGLE_BLOCK = (int) 6; 90 | /** Treat the image as a single text line. */ 91 | public static final int PSM_SINGLE_LINE = (int) 7; 92 | /** Treat the image as a single word. */ 93 | public static final int PSM_SINGLE_WORD = (int) 8; 94 | /** Treat the image as a single word in a circle. */ 95 | public static final int PSM_CIRCLE_WORD = (int) 9; 96 | /** Treat the image as a single character. */ 97 | public static final int PSM_SINGLE_CHAR = (int) 10; 98 | /** Find as much text as possible in no particular order. */ 99 | public static final int PSM_SPARSE_TEXT = (int) 11; 100 | /** Sparse text with orientation and script detection. */ 101 | public static final int PSM_SPARSE_TEXT_OSD = (int) 12; 102 | /** Number of enum entries. */ 103 | public static final int PSM_COUNT = (int) 13; 104 | }; 105 | 106 | /** 107 | * Enum of the elements of the page hierarchy, used in ResultIterator to 108 | * provide functions that operate on each level without having to have 5x as 109 | * many functions. 110 | */ 111 | public static interface TessPageIteratorLevel { 112 | 113 | /** Block of text/image/separator line. */ 114 | public static final int RIL_BLOCK = (int) 0; 115 | /** Paragraph within a block. */ 116 | public static final int RIL_PARA = (int) 1; 117 | /** Line within a paragraph. */ 118 | public static final int RIL_TEXTLINE = (int) 2; 119 | /** Word within a textline. */ 120 | public static final int RIL_WORD = (int) 3; 121 | /** Symbol/character within a word. */ 122 | public static final int RIL_SYMBOL = (int) 4; 123 | }; 124 | 125 | public static interface TessPolyBlockType { 126 | 127 | /** Type is not yet known. Keep as the first element. */ 128 | public static final int PT_UNKNOWN = (int) 0; 129 | /** Text that lives inside a column. */ 130 | public static final int PT_FLOWING_TEXT = (int) 1; 131 | /** Text that spans more than one column. */ 132 | public static final int PT_HEADING_TEXT = (int) 2; 133 | /** Text that is in a cross-column pull-out region. */ 134 | public static final int PT_PULLOUT_TEXT = (int) 3; 135 | /** Partition belonging to an equation region. */ 136 | public static final int PT_EQUATION = (int) 4; 137 | /** Partition has inline equation. */ 138 | public static final int PT_INLINE_EQUATION = (int) 5; 139 | /** Partition belonging to a table region. */ 140 | public static final int PT_TABLE = (int) 6; 141 | /** Text-line runs vertically. */ 142 | public static final int PT_VERTICAL_TEXT = (int) 7; 143 | /** Text that belongs to an image. */ 144 | public static final int PT_CAPTION_TEXT = (int) 8; 145 | /** Image that lives inside a column. */ 146 | public static final int PT_FLOWING_IMAGE = (int) 9; 147 | /** Image that spans more than one column. */ 148 | public static final int PT_HEADING_IMAGE = (int) 10; 149 | /** Image that is in a cross-column pull-out region. */ 150 | public static final int PT_PULLOUT_IMAGE = (int) 11; 151 | /** Horizontal Line. */ 152 | public static final int PT_HORZ_LINE = (int) 12; 153 | /** Vertical Line. */ 154 | public static final int PT_VERT_LINE = (int) 13; 155 | /** Lies outside of any column. */ 156 | public static final int PT_NOISE = (int) 14; 157 | /** Number of enum entries. */ 158 | public static final int PT_COUNT = (int) 15; 159 | }; 160 | 161 | /** 162 | *
163 |      *  +------------------+
164 |      *  | 1 Aaaa Aaaa Aaaa |
165 |      *  | Aaa aa aaa aa    |
166 |      *  | aaaaaa A aa aaa. |
167 |      *  |                2 |
168 |      *  |   #######  c c C |
169 |      *  |   #######  c c c |
170 |      *  | < #######  c c c |
171 |      *  | < #######  c   c |
172 |      *  | < #######  .   c |
173 |      *  | 3 #######      c |
174 |      *  +------------------+
175 |      * 
176 | * Orientation Example:
177 | * ====================
178 | * Above is a 179 | * diagram of some (1) English and (2) Chinese text and a (3) photo 180 | * credit.
181 | *
182 | * Upright Latin characters are represented as A and a. '<' represents 183 | * a latin character rotated anti-clockwise 90 degrees. Upright 184 | * Chinese characters are represented C and c.
185 | *
186 | * NOTA BENE: enum values here should match goodoc.proto
187 | *
188 | * If you orient your head so that "up" aligns with Orientation, then 189 | * the characters will appear "right side up" and readable.
190 | *
191 | * In the example above, both the 192 | * English and Chinese paragraphs are oriented so their "up" is the top of 193 | * the page (page up). The photo credit is read with one's head turned 194 | * leftward ("up" is to page left).
195 | *
The values of this enum 196 | * match the convention of Tesseract's osdetect.h 197 | */ 198 | public static interface TessOrientation { 199 | 200 | public static final int ORIENTATION_PAGE_UP = (int) 0; 201 | public static final int ORIENTATION_PAGE_RIGHT = (int) 1; 202 | public static final int ORIENTATION_PAGE_DOWN = (int) 2; 203 | public static final int ORIENTATION_PAGE_LEFT = (int) 3; 204 | }; 205 | 206 | /** 207 | * The grapheme clusters within a line of text are laid out logically in 208 | * this direction, judged when looking at the text line rotated so that its 209 | * Orientation is "page up".
210 | *
211 | * For English text, the writing direction is left-to-right. For the Chinese 212 | * text in the above example, the writing direction is top-to-bottom. 213 | */ 214 | public static interface TessWritingDirection { 215 | 216 | public static final int WRITING_DIRECTION_LEFT_TO_RIGHT = (int) 0; 217 | public static final int WRITING_DIRECTION_RIGHT_TO_LEFT = (int) 1; 218 | public static final int WRITING_DIRECTION_TOP_TO_BOTTOM = (int) 2; 219 | }; 220 | 221 | /** 222 | * The text lines are read in the given sequence.

In English, 223 | * the order is top-to-bottom. In Chinese, vertical text lines are read 224 | * right-to-left. Mongolian is written in vertical columns top to bottom 225 | * like Chinese, but the lines order left-to right.

Note that 226 | * only some combinations make sense. For example, 227 | * WRITING_DIRECTION_LEFT_TO_RIGHT implies 228 | * TEXTLINE_ORDER_TOP_TO_BOTTOM. 229 | */ 230 | public static interface TessTextlineOrder { 231 | 232 | public static final int TEXTLINE_ORDER_LEFT_TO_RIGHT = (int) 0; 233 | public static final int TEXTLINE_ORDER_RIGHT_TO_LEFT = (int) 1; 234 | public static final int TEXTLINE_ORDER_TOP_TO_BOTTOM = (int) 2; 235 | }; 236 | public static final int TRUE = (int) 1; 237 | public static final int FALSE = (int) 0; 238 | 239 | /** 240 | * Returns the version identifier. 241 | */ 242 | public static native String TessVersion(); 243 | 244 | public static native void TessDeleteText(Pointer text); 245 | 246 | public static native void TessDeleteTextArray(PointerByReference arr); 247 | 248 | public static native void TessDeleteIntArray(IntBuffer arr); 249 | 250 | /** 251 | * Creates an instance of the base class for all Tesseract APIs. 252 | */ 253 | public static native TessAPI1.TessBaseAPI TessBaseAPICreate(); 254 | 255 | /** 256 | * Disposes the TesseractAPI instance. 257 | */ 258 | public static native void TessBaseAPIDelete(TessAPI1.TessBaseAPI handle); 259 | 260 | /** 261 | * Set the name of the input file. Needed only for training and reading a 262 | * UNLV zone file. 263 | */ 264 | public static native void TessBaseAPISetInputName(TessAPI1.TessBaseAPI handle, String name); 265 | 266 | /** 267 | * Set the name of the bonus output files. Needed only for debugging. 268 | */ 269 | public static native void TessBaseAPISetOutputName(TessAPI1.TessBaseAPI handle, String name); 270 | 271 | /** 272 | * Set the value of an internal "parameter." Supply the name of the 273 | * parameter and the value as a string, just as you would in a config file. 274 | * Returns false if the name lookup failed. E.g., 275 | * SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. Or 276 | * SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. 277 | * SetVariable may be used before Init, but settings will revert to defaults 278 | * on End().
279 | *
280 | * Note: Must be called after Init(). Only works for non-init variables 281 | * (init variables should be passed to Init()). 282 | */ 283 | public static native int TessBaseAPISetVariable(TessAPI1.TessBaseAPI handle, String name, String value); 284 | 285 | /** 286 | * Returns true (1) if the parameter was found among Tesseract parameters. 287 | * Fills in value with the value of the parameter. 288 | */ 289 | public static native int TessBaseAPIGetIntVariable(TessAPI1.TessBaseAPI handle, String name, IntBuffer value); 290 | 291 | public static native int TessBaseAPIGetBoolVariable(TessAPI1.TessBaseAPI handle, String name, IntBuffer value); 292 | 293 | public static native int TessBaseAPIGetDoubleVariable(TessAPI1.TessBaseAPI handle, String name, DoubleBuffer value); 294 | 295 | public static native String TessBaseAPIGetStringVariable(TessAPI1.TessBaseAPI handle, String name); 296 | 297 | /** 298 | * Print Tesseract parameters to the given file.
299 | *
300 | * Note: Must not be the first method called after instance create. 301 | */ 302 | public static native void TessBaseAPIPrintVariablesToFile(TessAPI1.TessBaseAPI handle, String filename); 303 | 304 | /** 305 | * Instances are now mostly thread-safe and totally independent, but some 306 | * global parameters remain. Basically it is safe to use multiple 307 | * TessBaseAPIs in different threads in parallel, UNLESS: you use 308 | * SetVariable on some of the Params in classify and textord. If you do, 309 | * then the effect will be to change it for all your instances.
310 | *
311 | * Start tesseract. Returns zero on success and -1 on failure. NOTE that the 312 | * only members that may be called before Init are those listed above here 313 | * in the class definition.
314 | *
315 | * The datapath must be the name of the parent directory of tessdata and 316 | * must end in / . Any name after the last / will be stripped. The language 317 | * is (usually) an ISO 639-3 string or NULL will default to eng. It is 318 | * entirely safe (and eventually will be efficient too) to call Init 319 | * multiple times on the same instance to change language, or just to reset 320 | * the classifier. The language may be a string of the form 321 | * [~][+[~]]* indicating that multiple languages are to be 322 | * loaded. E.g., hin+eng will load Hindi and English. Languages may specify 323 | * internally that they want to be loaded with one or more other languages, 324 | * so the ~ sign is available to override that. E.g., if hin were set to 325 | * load eng by default, then hin+~eng would force loading only hin. The 326 | * number of loaded languages is limited only by memory, with the caveat 327 | * that loading additional languages will impact both speed and accuracy, as 328 | * there is more work to do to decide on the applicable language, and there 329 | * is more chance of hallucinating incorrect words. WARNING: On changing 330 | * languages, all Tesseract parameters are reset back to their default 331 | * values. (Which may vary between languages.) If you have a rare need to 332 | * set a Variable that controls initialization for a second call to Init you 333 | * should explicitly call End() and then use SetVariable before Init. This 334 | * is only a very rare use case, since there are very few uses that require 335 | * any parameters to be set before Init.
336 | *
337 | * If set_only_non_debug_params is true, only params that do not contain 338 | * "debug" in the name will be set. 339 | */ 340 | public static native int TessBaseAPIInit1(TessAPI1.TessBaseAPI handle, String datapath, String language, int oem, PointerByReference configs, int configs_size); 341 | 342 | public static native int TessBaseAPIInit2(TessAPI1.TessBaseAPI handle, String datapath, String language, int oem); 343 | 344 | public static native int TessBaseAPIInit3(TessAPI1.TessBaseAPI handle, String datapath, String language); 345 | 346 | /** 347 | * Returns the languages string used in the last valid initialization. If 348 | * the last initialization specified "deu+hin" then that will be returned. 349 | * If hin loaded eng automatically as well, then that will not be included 350 | * in this list. To find the languages actually loaded, use 351 | * GetLoadedLanguagesAsVector. The returned string should NOT be deleted. 352 | */ 353 | public static native String TessBaseAPIGetInitLanguagesAsString(TessAPI1.TessBaseAPI handle); 354 | 355 | /** 356 | * Returns the loaded languages in the vector of STRINGs. Includes all 357 | * languages loaded by the last Init, including those loaded as dependencies 358 | * of other loaded languages. 359 | */ 360 | public static native PointerByReference TessBaseAPIGetLoadedLanguagesAsVector(TessAPI1.TessBaseAPI handle); 361 | 362 | /** 363 | * Returns the available languages in the vector of STRINGs. 364 | */ 365 | public static native PointerByReference TessBaseAPIGetAvailableLanguagesAsVector(TessAPI1.TessBaseAPI handle); 366 | 367 | /** 368 | * Init only the lang model component of Tesseract. The only functions that 369 | * work after this init are SetVariable and IsValidWord. WARNING: temporary! 370 | * This function will be removed from here and placed in a separate API at 371 | * some future time. 372 | */ 373 | public static native int TessBaseAPIInitLangMod(TessAPI1.TessBaseAPI handle, String datapath, String language); 374 | 375 | /** 376 | * Init only for page layout analysis. Use only for calls to SetImage and 377 | * AnalysePage. Calls that attempt recognition will generate an error. 378 | */ 379 | public static native void TessBaseAPIInitForAnalysePage(TessAPI1.TessBaseAPI handle); 380 | 381 | /** 382 | * Read a "config" file containing a set of param, value pairs. Searches the 383 | * standard places: tessdata/configs, tessdata/tessconfigs and also accepts 384 | * a relative or absolute path name. Note: only non-init params will be set 385 | * (init params are set by Init()). 386 | */ 387 | public static native void TessBaseAPIReadConfigFile(TessAPI1.TessBaseAPI handle, String filename, int init_only); 388 | 389 | /** 390 | * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The 391 | * mode is stored as an IntParam so it can also be modified by 392 | * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). 393 | */ 394 | public static native void TessBaseAPISetPageSegMode(TessAPI1.TessBaseAPI handle, int mode); 395 | 396 | /** 397 | * Return the current page segmentation mode. 398 | */ 399 | public static native int TessBaseAPIGetPageSegMode(TessAPI1.TessBaseAPI handle); 400 | 401 | /** 402 | * Recognize a rectangle from an image and return the result as a string. 403 | * May be called many times for a single Init. Currently has no error 404 | * checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be 405 | * given. Palette color images will not work properly and must be converted 406 | * to 24 bit. Binary images of 1 bit per pixel may also be given but they 407 | * must be byte packed with the MSB of the first byte being the first pixel, 408 | * and a 1 represents WHITE. For binary images set bytes_per_pixel=0. The 409 | * recognized text is returned as a char* which is coded as UTF8 and must be 410 | * freed with the delete [] operator.
411 | *
412 | * Note that TesseractRect is the simplified convenience interface. For 413 | * advanced uses, use SetImage, (optionally) SetRectangle, Recognize, and 414 | * one or more of the Get*Text functions below. 415 | */ 416 | public static native Pointer TessBaseAPIRect(TessAPI1.TessBaseAPI handle, ByteBuffer imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height); 417 | 418 | /** 419 | * Call between pages or documents etc to free up memory and forget adaptive 420 | * data. 421 | */ 422 | public static native void TessBaseAPIClearAdaptiveClassifier(TessAPI1.TessBaseAPI handle); 423 | 424 | /** 425 | * Provide an image for Tesseract to recognize. Format is as TesseractRect 426 | * above. Does not copy the image buffer, or take ownership. The source 427 | * image may be destroyed after Recognize is called, either explicitly or 428 | * implicitly via one of the Get*Text functions. SetImage clears all 429 | * recognition results, and sets the rectangle to the full image, so it may 430 | * be followed immediately by a GetUTF8Text, and it will automatically 431 | * perform recognition. 432 | */ 433 | public static native void TessBaseAPISetImage(TessAPI1.TessBaseAPI handle, ByteBuffer imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line); 434 | 435 | /** 436 | * Set the resolution of the source image in pixels per inch so font size 437 | * information can be calculated in results. Call this after SetImage(). 438 | */ 439 | public static native void TessBaseAPISetSourceResolution(TessAPI1.TessBaseAPI handle, int ppi); 440 | 441 | /** 442 | * Restrict recognition to a sub-rectangle of the image. Call after 443 | * SetImage. Each SetRectangle clears the recognition results so multiple 444 | * rectangles can be recognized with the same image. 445 | */ 446 | public static native void TessBaseAPISetRectangle(TessAPI1.TessBaseAPI handle, int left, int top, int width, int height); 447 | 448 | /** Scale factor from original image. */ 449 | public static native int TessBaseAPIGetThresholdedImageScaleFactor(TessAPI1.TessBaseAPI handle); 450 | 451 | /** Dump the internal binary image to a PGM file. */ 452 | public static native void TessBaseAPIDumpPGM(TessAPI1.TessBaseAPI handle, String filename); 453 | 454 | /** 455 | * Runs page layout analysis in the mode set by SetPageSegMode. May 456 | * optionally be called prior to Recognize to get access to just the page 457 | * layout results. Returns an iterator to the results. Returns NULL on 458 | * error. The returned iterator must be deleted after use. WARNING! This 459 | * class points to data held within the TessBaseAPI class, and therefore can 460 | * only be used while the TessBaseAPI class still exists and has not been 461 | * subjected to a call of Init, SetImage, Recognize, Clear, End, DetectOS, or 462 | * anything else that changes the internal PAGE_RES. 463 | */ 464 | public static native TessAPI1.TessPageIterator TessBaseAPIAnalyseLayout(TessAPI1.TessBaseAPI handle); 465 | 466 | /** 467 | * Recognize the image from SetAndThresholdImage, generating Tesseract 468 | * internal structures. Returns 0 on success. Optional. The Get*Text 469 | * functions below will call Recognize if needed. After Recognize, the 470 | * output is kept internally until the next SetImage. 471 | */ 472 | public static native int TessBaseAPIRecognize(TessAPI1.TessBaseAPI handle, TessAPI1.ETEXT_DESC monitor); 473 | 474 | /** 475 | * Variant on Recognize used for testing chopper. 476 | */ 477 | public static native int TessBaseAPIRecognizeForChopTest(TessAPI1.TessBaseAPI handle, TessAPI1.ETEXT_DESC monitor); 478 | 479 | /** 480 | * Get a reading-order iterator to the results of LayoutAnalysis and/or 481 | * Recognize. The returned iterator must be deleted after use. WARNING! This 482 | * class points to data held within the TessBaseAPI class, and therefore can 483 | * only be used while the TessBaseAPI class still exists and has not been 484 | * subjected to a call of Init, SetImage, Recognize, Clear, End, DetectOS, or 485 | * anything else that changes the internal PAGE_RES. 486 | */ 487 | public static native TessAPI1.TessResultIterator TessBaseAPIGetIterator(TessAPI1.TessBaseAPI handle); 488 | 489 | /** 490 | * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. 491 | * The returned iterator must be deleted after use. 492 | * WARNING! This class points to data held within the TessBaseAPI class, and 493 | * therefore can only be used while the TessBaseAPI class still exists and 494 | * has not been subjected to a call of Init, SetImage, Recognize, Clear, End 495 | * DetectOS, or anything else that changes the internal PAGE_RES. 496 | */ 497 | public static native TessAPI1.TessMutableIterator TessBaseAPIGetMutableIterator(TessAPI1.TessBaseAPI handle); 498 | 499 | /** 500 | * Recognizes all the pages in the named file, as a multi-page tiff or list 501 | * of filenames, or single image, and gets the appropriate kind of text 502 | * according to parameters: tessedit_create_boxfile, 503 | * tessedit_make_boxes_from_boxes, tessedit_write_unlv, 504 | * tessedit_create_hocr. Calls ProcessPage on each page in the input file, 505 | * which may be a multi-page tiff, single-page other file format, or a plain 506 | * text list of images to read. If tessedit_page_number is non-negative, 507 | * processing begins at that page of a multi-page tiff file, or filelist. 508 | * The text is returned in text_out. Returns false on error. If non-zero 509 | * timeout_millisec terminates processing after the timeout on a single 510 | * page. If non-NULL and non-empty, and some page fails for some reason, the 511 | * page is reprocessed with the retry_config config file. Useful for 512 | * interactively debugging a bad page. 513 | */ 514 | public static native Pointer TessBaseAPIProcessPages(TessAPI1.TessBaseAPI handle, String filename, String retry_config, int timeout_millisec); 515 | 516 | /** 517 | * The recognized text is returned as a char* which is coded as UTF-8 and 518 | * must be freed with the delete [] operator. 519 | */ 520 | public static native Pointer TessBaseAPIGetUTF8Text(TessAPI1.TessBaseAPI handle); 521 | 522 | /** 523 | * Make a HTML-formatted string with hOCR markup from the internal data 524 | * structures. page_number is 0-based but will appear in the output as 525 | * 1-based. 526 | */ 527 | public static native Pointer TessBaseAPIGetHOCRText(TessAPI1.TessBaseAPI handle, int page_number); 528 | 529 | /** 530 | * The recognized text is returned as a char* which is coded in the same 531 | * format as a box file used in training. Returned string must be freed with 532 | * the delete [] operator. Constructs coordinates in the original image - 533 | * not just the rectangle. page_number is a 0-based page index that will 534 | * appear in the box file. 535 | */ 536 | public static native Pointer TessBaseAPIGetBoxText(TessAPI1.TessBaseAPI handle, int page_number); 537 | 538 | /** 539 | * The recognized text is returned as a char* which is coded as UNLV format 540 | * Latin-1 with specific reject and suspect codes and must be freed with the 541 | * delete [] operator. 542 | */ 543 | public static native Pointer TessBaseAPIGetUNLVText(TessAPI1.TessBaseAPI handle); 544 | 545 | /** 546 | * Returns the (average) confidence value between 0 and 100. 547 | */ 548 | public static native int TessBaseAPIMeanTextConf(TessAPI1.TessBaseAPI handle); 549 | 550 | /** 551 | * Returns all word confidences (between 0 and 100) in an array, terminated 552 | * by -1. The calling function must delete [] after use. The number of 553 | * confidences should correspond to the number of space-delimited words in 554 | * GetUTF8Text. 555 | */ 556 | public static native IntByReference TessBaseAPIAllWordConfidences(TessAPI1.TessBaseAPI handle); 557 | 558 | /** 559 | * Applies the given word to the adaptive classifier if possible. The word 560 | * must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the 561 | * boundaries of the graphemes. Assumes that SetImage/SetRectangle have been 562 | * used to set the image to the given word. The mode arg should be 563 | * PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control 564 | * layout analysis. The currently set PageSegMode is preserved. Returns 565 | * false if adaption was not possible for some reason. 566 | */ 567 | public static native int TessBaseAPIAdaptToWordStr(TessAPI1.TessBaseAPI handle, int mode, String wordstr); 568 | 569 | /** 570 | * Free up recognition results and any stored image data, without actually 571 | * freeing any recognition data that would be time-consuming to reload. 572 | * Afterwards, you must call SetImage or TesseractRect before doing any 573 | * Recognize or Get* operation. 574 | */ 575 | public static native void TessBaseAPIClear(TessAPI1.TessBaseAPI handle); 576 | 577 | /** 578 | * Close down tesseract and free up all memory. End() is equivalent to 579 | * destructing and reconstructing your TessBaseAPI. Once End() has been 580 | * used, none of the other API functions may be used other than Init and 581 | * anything declared above it in the class definition. 582 | */ 583 | public static native void TessBaseAPIEnd(TessAPI1.TessBaseAPI handle); 584 | 585 | /** 586 | * Check whether a word is valid according to Tesseract's language model. 587 | * 588 | * @return 0 if the word is invalid, non-zero if valid. @warning temporary! 589 | * This function will be removed from here and placed in a separate API at 590 | * some future time. 591 | */ 592 | public static native int TessBaseAPIIsValidWord(TessAPI1.TessBaseAPI handle, String word); 593 | 594 | public static native int TessBaseAPIGetTextDirection(TessAPI1.TessBaseAPI handle, IntBuffer out_offset, FloatBuffer out_slope); 595 | 596 | /** 597 | * This method returns the string form of the specified unichar. 598 | */ 599 | public static native String TessBaseAPIGetUnichar(TessAPI1.TessBaseAPI handle, int unichar_id); 600 | 601 | /* Page iterator */ 602 | public static native void TessPageIteratorDelete(TessAPI1.TessPageIterator handle); 603 | 604 | public static native TessAPI1.TessPageIterator TessPageIteratorCopy(TessAPI1.TessPageIterator handle); 605 | 606 | public static native void TessPageIteratorBegin(TessAPI1.TessPageIterator handle); 607 | 608 | public static native int TessPageIteratorNext(TessAPI1.TessPageIterator handle, int level); 609 | 610 | public static native int TessPageIteratorIsAtBeginningOf(TessAPI1.TessPageIterator handle, int level); 611 | 612 | public static native int TessPageIteratorIsAtFinalElement(TessAPI1.TessPageIterator handle, int level, int element); 613 | 614 | public static native int TessPageIteratorBoundingBox(TessAPI1.TessPageIterator handle, int level, IntBuffer left, IntBuffer top, IntBuffer right, IntBuffer bottom); 615 | 616 | public static native int TessPageIteratorBlockType(TessAPI1.TessPageIterator handle); 617 | 618 | public static native int TessPageIteratorBaseline(TessAPI1.TessPageIterator handle, int level, IntBuffer x1, IntBuffer y1, IntBuffer x2, IntBuffer y2); 619 | 620 | public static native void TessPageIteratorOrientation(TessAPI1.TessPageIterator handle, IntBuffer orientation, IntBuffer writing_direction, IntBuffer textline_order, FloatBuffer deskew_angle); 621 | 622 | /* Result iterator */ 623 | public static native void TessResultIteratorDelete(TessAPI1.TessResultIterator handle); 624 | 625 | public static native TessAPI1.TessResultIterator TessResultIteratorCopy(TessAPI1.TessResultIterator handle); 626 | 627 | public static native TessAPI1.TessPageIterator TessResultIteratorGetPageIterator(TessAPI1.TessResultIterator handle); 628 | 629 | public static native TessAPI1.TessPageIterator TessResultIteratorGetPageIteratorConst(TessAPI1.TessResultIterator handle); 630 | 631 | public static native Pointer TessResultIteratorGetUTF8Text(TessAPI1.TessResultIterator handle, int level); 632 | 633 | public static native float TessResultIteratorConfidence(TessAPI1.TessResultIterator handle, int level); 634 | 635 | public static native String TessResultIteratorWordFontAttributes(TessAPI1.TessResultIterator handle, IntBuffer is_bold, IntBuffer is_italic, IntBuffer is_underlined, IntBuffer is_monospace, IntBuffer is_serif, IntBuffer is_smallcaps, IntBuffer pointsize, IntBuffer font_id); 636 | 637 | public static native int TessResultIteratorWordIsFromDictionary(TessAPI1.TessResultIterator handle); 638 | 639 | public static native int TessResultIteratorWordIsNumeric(TessAPI1.TessResultIterator handle); 640 | 641 | public static native int TessResultIteratorSymbolIsSuperscript(TessAPI1.TessResultIterator handle); 642 | 643 | public static native int TessResultIteratorSymbolIsSubscript(TessAPI1.TessResultIterator handle); 644 | 645 | public static native int TessResultIteratorSymbolIsDropcap(TessAPI1.TessResultIterator handle); 646 | 647 | public static class TessBaseAPI extends PointerType { 648 | 649 | public TessBaseAPI(Pointer address) { 650 | super(address); 651 | } 652 | 653 | public TessBaseAPI() { 654 | super(); 655 | } 656 | }; 657 | 658 | public static class ETEXT_DESC extends PointerType { 659 | 660 | public ETEXT_DESC(Pointer address) { 661 | super(address); 662 | } 663 | 664 | public ETEXT_DESC() { 665 | super(); 666 | } 667 | }; 668 | 669 | public static class TessPageIterator extends PointerType { 670 | 671 | public TessPageIterator(Pointer address) { 672 | super(address); 673 | } 674 | 675 | public TessPageIterator() { 676 | super(); 677 | } 678 | }; 679 | 680 | public static class TessMutableIterator extends PointerType { 681 | 682 | public TessMutableIterator(Pointer address) { 683 | super(address); 684 | } 685 | 686 | public TessMutableIterator() { 687 | super(); 688 | } 689 | }; 690 | 691 | public static class TessResultIterator extends PointerType { 692 | 693 | public TessResultIterator(Pointer address) { 694 | super(address); 695 | } 696 | 697 | public TessResultIterator() { 698 | super(); 699 | } 700 | }; 701 | } 702 | -------------------------------------------------------------------------------- /src/net/sourceforge/tess4j/Tesseract.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2012 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package net.sourceforge.tess4j; 17 | 18 | import net.sourceforge.vietocr.ImageIOHelper; 19 | import com.sun.jna.Pointer; 20 | import java.awt.Rectangle; 21 | import java.awt.image.*; 22 | import java.io.*; 23 | import java.nio.ByteBuffer; 24 | import java.util.*; 25 | import java.util.logging.*; 26 | import javax.imageio.IIOImage; 27 | 28 | /** 29 | * An object layer on top of 30 | * TessAPI, provides character recognition support for common image 31 | * formats, and multi-page TIFF images beyond the uncompressed, binary TIFF 32 | * format supported by Tesseract OCR engine. The extended capabilities are 33 | * provided by the 34 | * Java Advanced Imaging Image I/O Tools.

Support for 35 | * PDF documents is available through 36 | * Ghost4J, a 37 | * JNA wrapper for 38 | * GPL Ghostscript, which should be installed and included in 39 | * system path.

Any program that uses the library will need to 40 | * ensure that the required libraries (the 41 | * .jar files for 42 | * jna, 43 | * jai-imageio, and 44 | * ghost4j) are in its compile and run-time 45 | * classpath. 46 | */ 47 | public class Tesseract implements ITesseract { 48 | 49 | private static Tesseract instance; 50 | //private String language = "eng"; //英文识别语言包 51 | private String language = "chi_sim";//中文识别语言包 52 | private String datapath = "./"; 53 | private int psm = TessAPI.TessPageSegMode.PSM_AUTO; 54 | private boolean hocr; 55 | private int pageNum; 56 | private int ocrEngineMode = TessAPI.TessOcrEngineMode.OEM_DEFAULT; 57 | private final Properties prop = new Properties(); 58 | 59 | private TessAPI api; 60 | private TessAPI.TessBaseAPI handle; 61 | 62 | private final static Logger logger = Logger.getLogger(Tesseract.class.getName()); 63 | 64 | /** 65 | * Private constructor. 66 | */ 67 | public Tesseract() { 68 | System.setProperty("jna.encoding", "UTF8"); 69 | } 70 | 71 | /** 72 | * Gets an instance of the class library. 73 | * 74 | * @return instance 75 | */ 76 | public static synchronized Tesseract getInstance() { 77 | if (instance == null) { 78 | instance = new Tesseract(); 79 | } 80 | 81 | return instance; 82 | } 83 | 84 | /** 85 | * Sets tessdata path. 86 | * 87 | * @param datapath the tessdata path to set 88 | */ 89 | public void setDatapath(String datapath) { 90 | this.datapath = datapath; 91 | } 92 | 93 | /** 94 | * Sets language for OCR. 95 | * 96 | * @param language the language code, which follows ISO 639-3 standard. 97 | */ 98 | public void setLanguage(String language) { 99 | this.language = language; 100 | } 101 | 102 | /** 103 | * Sets OCR engine mode. 104 | * 105 | * @param ocrEngineMode the OcrEngineMode to set 106 | */ 107 | public void setOcrEngineMode(int ocrEngineMode) { 108 | this.ocrEngineMode = ocrEngineMode; 109 | } 110 | 111 | /** 112 | * Sets page segmentation mode. 113 | * 114 | * @param mode the page segmentation mode to set 115 | */ 116 | public void setPageSegMode(int mode) { 117 | this.psm = mode; 118 | } 119 | 120 | /** 121 | * Enables hocr output. 122 | * 123 | * @param hocr to enable or disable hocr output 124 | */ 125 | public void setHocr(boolean hocr) { 126 | this.hocr = hocr; 127 | prop.setProperty("tessedit_create_hocr", hocr ? "1" : "0"); 128 | } 129 | 130 | /** 131 | * Set the value of Tesseract's internal parameter. 132 | * 133 | * @param key variable name, e.g., 134 | * tessedit_create_hocr, 135 | * tessedit_char_whitelist, etc. 136 | * @param value value for corresponding variable, e.g., "1", "0", 137 | * "0123456789", etc. 138 | */ 139 | public void setTessVariable(String key, String value) { 140 | prop.setProperty(key, value); 141 | } 142 | 143 | /** 144 | * Performs OCR operation. 145 | * 146 | * @param imageFile an image file 147 | * @return the recognized text 148 | * @throws TesseractException 149 | */ 150 | public String doOCR(File imageFile) throws TesseractException { 151 | return doOCR(imageFile, null); 152 | } 153 | 154 | /** 155 | * Performs OCR operation. 156 | * 157 | * @param imageFile an image file 158 | * @param rect the bounding rectangle defines the region of the image to be 159 | * recognized. A rectangle of zero dimension or 160 | * null indicates the whole image. 161 | * @return the recognized text 162 | * @throws TesseractException 163 | */ 164 | public String doOCR(File imageFile, Rectangle rect) throws TesseractException { 165 | try { 166 | return doOCR(ImageIOHelper.getIIOImageList(imageFile), rect); 167 | } catch (Exception e) { 168 | logger.log(Level.SEVERE, e.getMessage(), e); 169 | throw new TesseractException(e); 170 | } 171 | } 172 | 173 | /** 174 | * Performs OCR operation. 175 | * 176 | * @param bi a buffered image 177 | * @return the recognized text 178 | * @throws TesseractException 179 | */ 180 | public String doOCR(BufferedImage bi) throws TesseractException { 181 | return doOCR(bi, null); 182 | } 183 | 184 | /** 185 | * Performs OCR operation. 186 | * 187 | * @param bi a buffered image 188 | * @param rect the bounding rectangle defines the region of the image to be 189 | * recognized. A rectangle of zero dimension or 190 | * null indicates the whole image. 191 | * @return the recognized text 192 | * @throws TesseractException 193 | */ 194 | public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException { 195 | try { 196 | return doOCR(ImageIOHelper.getIIOImageList(bi), rect); 197 | } catch (Exception e) { 198 | logger.log(Level.SEVERE, e.getMessage(), e); 199 | throw new TesseractException(e); 200 | } 201 | } 202 | 203 | /** 204 | * Performs OCR operation. 205 | * 206 | * @param imageList a list of 207 | * IIOImage objects 208 | * @param rect the bounding rectangle defines the region of the image to be 209 | * recognized. A rectangle of zero dimension or 210 | * null indicates the whole image. 211 | * @return the recognized text 212 | * @throws TesseractException 213 | */ 214 | public String doOCR(List imageList, Rectangle rect) throws TesseractException { 215 | init(); 216 | setTessVariables(); 217 | 218 | try { 219 | StringBuilder sb = new StringBuilder(); 220 | 221 | for (IIOImage oimage : imageList) { 222 | pageNum++; 223 | try { 224 | setImage(oimage.getRenderedImage(), rect); 225 | sb.append(getOCRText()); 226 | } catch (IOException ioe) { 227 | // skip the problematic image 228 | logger.log(Level.SEVERE, ioe.getMessage(), ioe); 229 | } 230 | } 231 | 232 | if (hocr) { 233 | sb.insert(0, htmlBeginTag).append(htmlEndTag); 234 | } 235 | 236 | return sb.toString(); 237 | } finally { 238 | dispose(); 239 | } 240 | } 241 | 242 | /** 243 | * Performs OCR operation. Use 244 | * SetImage, (optionally) 245 | * SetRectangle, and one or more of the 246 | * Get*Text functions. 247 | * 248 | * @param xsize width of image 249 | * @param ysize height of image 250 | * @param buf pixel data 251 | * @param rect the bounding rectangle defines the region of the image to be 252 | * recognized. A rectangle of zero dimension or 253 | * null indicates the whole image. 254 | * @param bpp bits per pixel, represents the bit depth of the image, with 1 255 | * for binary bitmap, 8 for gray, and 24 for color RGB. 256 | * @return the recognized text 257 | * @throws TesseractException 258 | */ 259 | public String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException { 260 | init(); 261 | setTessVariables(); 262 | 263 | try { 264 | setImage(xsize, ysize, buf, rect, bpp); 265 | return getOCRText(); 266 | } catch (Exception e) { 267 | logger.log(Level.SEVERE, e.getMessage(), e); 268 | throw new TesseractException(e); 269 | } finally { 270 | dispose(); 271 | } 272 | } 273 | 274 | /** 275 | * Initializes Tesseract engine. 276 | */ 277 | private void init() { 278 | pageNum = 0; 279 | api = TessAPI.INSTANCE; 280 | handle = api.TessBaseAPICreate(); 281 | api.TessBaseAPIInit2(handle, datapath, language, ocrEngineMode); 282 | api.TessBaseAPISetPageSegMode(handle, psm); 283 | } 284 | 285 | /** 286 | * Sets Tesseract's internal parameters. 287 | */ 288 | private void setTessVariables() { 289 | Enumeration em = prop.propertyNames(); 290 | while (em.hasMoreElements()) { 291 | String key = (String) em.nextElement(); 292 | api.TessBaseAPISetVariable(handle, key, prop.getProperty(key)); 293 | } 294 | } 295 | 296 | /** 297 | * A wrapper for {@link #setImage(int, int, ByteBuffer, Rectangle, int)}. 298 | */ 299 | private void setImage(RenderedImage image, Rectangle rect) throws IOException { 300 | setImage(image.getWidth(), image.getHeight(), ImageIOHelper.getImageByteBuffer(image), rect, image.getColorModel().getPixelSize()); 301 | } 302 | 303 | /** 304 | * Sets image to be processed. 305 | * 306 | * @param xsize width of image 307 | * @param ysize height of image 308 | * @param buf pixel data 309 | * @param rect the bounding rectangle defines the region of the image to be 310 | * recognized. A rectangle of zero dimension or 311 | * null indicates the whole image. 312 | * @param bpp bits per pixel, represents the bit depth of the image, with 1 313 | * for binary bitmap, 8 for gray, and 24 for color RGB. 314 | */ 315 | private void setImage(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) { 316 | int bytespp = bpp / 8; 317 | int bytespl = (int) Math.ceil(xsize * bpp / 8.0); 318 | api.TessBaseAPISetImage(handle, buf, xsize, ysize, bytespp, bytespl); 319 | 320 | if (rect != null && !rect.isEmpty()) { 321 | api.TessBaseAPISetRectangle(handle, rect.x, rect.y, rect.width, rect.height); 322 | } 323 | } 324 | 325 | /** 326 | * Gets recognized text. 327 | * 328 | * @return the recognized text 329 | */ 330 | private String getOCRText() { 331 | Pointer utf8Text = hocr ? api.TessBaseAPIGetHOCRText(handle, pageNum - 1) : api.TessBaseAPIGetUTF8Text(handle); 332 | String str = utf8Text.getString(0); 333 | api.TessDeleteText(utf8Text); 334 | return str; 335 | } 336 | 337 | /** 338 | * Releases all of the native resources used by this instance. 339 | */ 340 | private void dispose() { 341 | api.TessBaseAPIDelete(handle); 342 | } 343 | } 344 | -------------------------------------------------------------------------------- /src/net/sourceforge/tess4j/Tesseract1.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2012 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package net.sourceforge.tess4j; 17 | 18 | import net.sourceforge.vietocr.ImageIOHelper; 19 | import com.sun.jna.Pointer; 20 | import java.awt.Rectangle; 21 | import java.awt.image.*; 22 | import java.io.*; 23 | import java.nio.ByteBuffer; 24 | import java.util.*; 25 | import java.util.logging.*; 26 | import javax.imageio.IIOImage; 27 | 28 | /** 29 | * An object layer on top of 30 | * TessAPI1, provides character recognition support for common 31 | * image formats, and multi-page TIFF images beyond the uncompressed, binary 32 | * TIFF format supported by Tesseract OCR engine. The extended capabilities are 33 | * provided by the 34 | * Java Advanced Imaging Image I/O Tools.

Support for 35 | * PDF documents is available through 36 | * Ghost4J, a 37 | * JNA wrapper for 38 | * GPL Ghostscript, which should be installed and included in 39 | * system path.

Any program that uses the library will need to 40 | * ensure that the required libraries (the 41 | * .jar files for 42 | * jna, 43 | * jai-imageio, and 44 | * ghost4j) are in its compile and run-time 45 | * classpath. 46 | */ 47 | public class Tesseract1 extends TessAPI1 implements ITesseract { 48 | 49 | private String language = "eng"; 50 | private String datapath = "./"; 51 | private int psm = TessAPI1.TessPageSegMode.PSM_AUTO; 52 | private boolean hocr; 53 | private int pageNum; 54 | private int ocrEngineMode = TessAPI1.TessOcrEngineMode.OEM_DEFAULT; 55 | private final Properties prop = new Properties(); 56 | 57 | private TessBaseAPI handle; 58 | 59 | private final static Logger logger = Logger.getLogger(Tesseract1.class.getName()); 60 | 61 | /** 62 | * Public constructor. 63 | */ 64 | public Tesseract1() { 65 | System.setProperty("jna.encoding", "UTF8"); 66 | } 67 | 68 | /** 69 | * @param datapath the tessdata path to set 70 | */ 71 | public void setDatapath(String datapath) { 72 | this.datapath = datapath; 73 | } 74 | 75 | /** 76 | * Sets language for OCR. 77 | * 78 | * @param language the language code, which follows ISO 639-3 standard. 79 | */ 80 | public void setLanguage(String language) { 81 | this.language = language; 82 | } 83 | 84 | /** 85 | * Sets OCR engine mode. 86 | * 87 | * @param ocrEngineMode the OcrEngineMode to set 88 | */ 89 | public void setOcrEngineMode(int ocrEngineMode) { 90 | this.ocrEngineMode = ocrEngineMode; 91 | } 92 | 93 | /** 94 | * @param mode the page segmentation mode to set 95 | */ 96 | public void setPageSegMode(int mode) { 97 | this.psm = mode; 98 | } 99 | 100 | /** 101 | * Enables hocr output. 102 | * 103 | * @param hocr to enable or disable hocr output 104 | */ 105 | public void setHocr(boolean hocr) { 106 | this.hocr = hocr; 107 | prop.setProperty("tessedit_create_hocr", hocr ? "1" : "0"); 108 | } 109 | 110 | /** 111 | * Set the value of Tesseract's internal parameter. 112 | * 113 | * @param key variable name, e.g., 114 | * tessedit_create_hocr, 115 | * tessedit_char_whitelist, etc. 116 | * @param value value for corresponding variable, e.g., "1", "0", 117 | * "0123456789", etc. 118 | */ 119 | public void setTessVariable(String key, String value) { 120 | prop.setProperty(key, value); 121 | } 122 | 123 | /** 124 | * Returns API handle. 125 | */ 126 | protected TessBaseAPI getHandle() { 127 | return handle; 128 | } 129 | 130 | /** 131 | * Performs OCR operation. 132 | * 133 | * @param imageFile an image file 134 | * @return the recognized text 135 | * @throws TesseractException 136 | */ 137 | public String doOCR(File imageFile) throws TesseractException { 138 | return doOCR(imageFile, null); 139 | } 140 | 141 | /** 142 | * Performs OCR operation. 143 | * 144 | * @param imageFile an image file 145 | * @param rect the bounding rectangle defines the region of the image to be 146 | * recognized. A rectangle of zero dimension or 147 | * null indicates the whole image. 148 | * @return the recognized text 149 | * @throws TesseractException 150 | */ 151 | public String doOCR(File imageFile, Rectangle rect) throws TesseractException { 152 | try { 153 | return doOCR(ImageIOHelper.getIIOImageList(imageFile), rect); 154 | } catch (Exception e) { 155 | logger.log(Level.SEVERE, e.getMessage(), e); 156 | throw new TesseractException(e); 157 | } 158 | } 159 | 160 | /** 161 | * Performs OCR operation. 162 | * 163 | * @param bi a buffered image 164 | * @return the recognized text 165 | * @throws TesseractException 166 | */ 167 | public String doOCR(BufferedImage bi) throws TesseractException { 168 | return doOCR(bi, null); 169 | } 170 | 171 | /** 172 | * Performs OCR operation. 173 | * 174 | * @param bi a buffered image 175 | * @param rect the bounding rectangle defines the region of the image to be 176 | * recognized. A rectangle of zero dimension or 177 | * null indicates the whole image. 178 | * @return the recognized text 179 | * @throws TesseractException 180 | */ 181 | public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException { 182 | try { 183 | return doOCR(ImageIOHelper.getIIOImageList(bi), rect); 184 | } catch (Exception e) { 185 | logger.log(Level.SEVERE, e.getMessage(), e); 186 | throw new TesseractException(e); 187 | } 188 | } 189 | 190 | /** 191 | * Performs OCR operation. 192 | * 193 | * @param imageList a list of 194 | * IIOImage objects 195 | * @param rect the bounding rectangle defines the region of the image to be 196 | * recognized. A rectangle of zero dimension or 197 | * null indicates the whole image. 198 | * @return the recognized text 199 | * @throws TesseractException 200 | */ 201 | public String doOCR(List imageList, Rectangle rect) throws TesseractException { 202 | init(); 203 | setTessVariables(); 204 | 205 | try { 206 | StringBuilder sb = new StringBuilder(); 207 | 208 | for (IIOImage oimage : imageList) { 209 | pageNum++; 210 | try { 211 | setImage(oimage.getRenderedImage(), rect); 212 | sb.append(getOCRText()); 213 | } catch (IOException ioe) { 214 | // skip the problematic image 215 | logger.log(Level.SEVERE, ioe.getMessage(), ioe); 216 | } 217 | } 218 | 219 | if (hocr) { 220 | sb.insert(0, htmlBeginTag).append(htmlEndTag); 221 | } 222 | 223 | return sb.toString(); 224 | } finally { 225 | dispose(); 226 | } 227 | } 228 | 229 | /** 230 | * Performs OCR operation. Use 231 | * SetImage, (optionally) 232 | * SetRectangle, and one or more of the 233 | * Get*Text functions. 234 | * 235 | * @param xsize width of image 236 | * @param ysize height of image 237 | * @param buf pixel data 238 | * @param rect the bounding rectangle defines the region of the image to be 239 | * recognized. A rectangle of zero dimension or 240 | * null indicates the whole image. 241 | * @param bpp bits per pixel, represents the bit depth of the image, with 1 242 | * for binary bitmap, 8 for gray, and 24 for color RGB. 243 | * @return the recognized text 244 | * @throws TesseractException 245 | */ 246 | public String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException { 247 | init(); 248 | setTessVariables(); 249 | 250 | try { 251 | setImage(xsize, ysize, buf, rect, bpp); 252 | return getOCRText(); 253 | } catch (Exception e) { 254 | logger.log(Level.SEVERE, e.getMessage(), e); 255 | throw new TesseractException(e); 256 | } finally { 257 | dispose(); 258 | } 259 | } 260 | 261 | /** 262 | * Initializes Tesseract engine. 263 | */ 264 | protected void init() { 265 | pageNum = 0; 266 | handle = TessBaseAPICreate(); 267 | TessBaseAPIInit2(handle, datapath, language, ocrEngineMode); 268 | TessBaseAPISetPageSegMode(handle, psm); 269 | } 270 | 271 | /** 272 | * Sets Tesseract's internal parameters. 273 | */ 274 | protected void setTessVariables() { 275 | Enumeration em = prop.propertyNames(); 276 | while (em.hasMoreElements()) { 277 | String key = (String) em.nextElement(); 278 | TessBaseAPISetVariable(handle, key, prop.getProperty(key)); 279 | } 280 | } 281 | 282 | /** 283 | * A wrapper for {@link #setImage(int, int, ByteBuffer, Rectangle, int)}. 284 | */ 285 | protected void setImage(RenderedImage image, Rectangle rect) throws IOException { 286 | setImage(image.getWidth(), image.getHeight(), ImageIOHelper.getImageByteBuffer(image), rect, image.getColorModel().getPixelSize()); 287 | } 288 | 289 | /** 290 | * Sets image to be processed. 291 | * 292 | * @param xsize width of image 293 | * @param ysize height of image 294 | * @param buf pixel data 295 | * @param rect the bounding rectangle defines the region of the image to be 296 | * recognized. A rectangle of zero dimension or 297 | * null indicates the whole image. 298 | * @param bpp bits per pixel, represents the bit depth of the image, with 1 299 | * for binary bitmap, 8 for gray, and 24 for color RGB. 300 | */ 301 | protected void setImage(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) { 302 | int bytespp = bpp / 8; 303 | int bytespl = (int) Math.ceil(xsize * bpp / 8.0); 304 | TessBaseAPISetImage(handle, buf, xsize, ysize, bytespp, bytespl); 305 | 306 | if (rect != null && !rect.isEmpty()) { 307 | TessBaseAPISetRectangle(handle, rect.x, rect.y, rect.width, rect.height); 308 | } 309 | } 310 | 311 | /** 312 | * Gets recognized text. 313 | * 314 | * @return the recognized text 315 | */ 316 | protected String getOCRText() { 317 | Pointer utf8Text = hocr ? TessBaseAPIGetHOCRText(handle, pageNum - 1) : TessBaseAPIGetUTF8Text(handle); 318 | String str = utf8Text.getString(0); 319 | TessDeleteText(utf8Text); 320 | return str; 321 | } 322 | 323 | /** 324 | * Releases all of the native resources used by this instance. 325 | */ 326 | protected void dispose() { 327 | TessBaseAPIDelete(handle); 328 | } 329 | } 330 | -------------------------------------------------------------------------------- /src/net/sourceforge/tess4j/TesseractException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2010 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package net.sourceforge.tess4j; 18 | 19 | public class TesseractException extends Exception { 20 | 21 | public TesseractException() { 22 | super(); 23 | } 24 | 25 | public TesseractException(String message) { 26 | super(message); 27 | } 28 | 29 | public TesseractException(Throwable cause) { 30 | super(cause); 31 | } 32 | 33 | public TesseractException(String message, Throwable cause) { 34 | super(message, cause); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/net/sourceforge/tess4j/util/Utils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2013 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package net.sourceforge.tess4j.util; 17 | 18 | import java.lang.reflect.Field; 19 | import java.lang.reflect.Modifier; 20 | 21 | public class Utils { 22 | 23 | /** 24 | * Gets user-friendly name of the public static final constant defined in a class or an 25 | * interface for display purpose. 26 | * 27 | * @param value the constant value 28 | * @param c type of class or interface 29 | * @return name 30 | */ 31 | public static String getConstantName(Object value, Class c) { 32 | for (Field f : c.getDeclaredFields()) { 33 | int mod = f.getModifiers(); 34 | if (Modifier.isStatic(mod) && Modifier.isPublic(mod) && Modifier.isFinal(mod)) { 35 | try { 36 | if (f.get(null).equals(value)) { 37 | return f.getName(); 38 | } 39 | } catch (IllegalAccessException e) { 40 | return String.valueOf(value); 41 | } 42 | } 43 | } 44 | return String.valueOf(value); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/net/sourceforge/vietocr/ImageHelper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2008 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package net.sourceforge.vietocr; 17 | 18 | import java.awt.Graphics2D; 19 | import java.awt.Image; 20 | import java.awt.RenderingHints; 21 | import java.awt.Toolkit; 22 | import java.awt.Transparency; 23 | import java.awt.datatransfer.Clipboard; 24 | import java.awt.datatransfer.DataFlavor; 25 | import java.awt.image.*; 26 | 27 | public class ImageHelper { 28 | 29 | /** 30 | * Convenience method that returns a scaled instance of the provided 31 | * {@code BufferedImage}. 32 | * 33 | * @param image the original image to be scaled 34 | * @param targetWidth the desired width of the scaled instance, in pixels 35 | * @param targetHeight the desired height of the scaled instance, in pixels 36 | * @return a scaled version of the original {@code BufferedImage} 37 | */ 38 | public static BufferedImage getScaledInstance(BufferedImage image, int targetWidth, int targetHeight) { 39 | int type = (image.getTransparency() == Transparency.OPAQUE) 40 | ? BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB; 41 | BufferedImage tmp = new BufferedImage(targetWidth, targetHeight, type); 42 | Graphics2D g2 = tmp.createGraphics(); 43 | g2.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); 44 | g2.drawImage(image, 0, 0, targetWidth, targetHeight, null); 45 | g2.dispose(); 46 | return tmp; 47 | } 48 | 49 | /** 50 | * A replacement for the standard 51 | * BufferedImage.getSubimage method. 52 | * 53 | * @param image 54 | * @param x the X coordinate of the upper-left corner of the specified 55 | * rectangular region 56 | * @param y the Y coordinate of the upper-left corner of the specified 57 | * rectangular region 58 | * @param width the width of the specified rectangular region 59 | * @param height the height of the specified rectangular region 60 | * @return a BufferedImage that is the subimage of image. 61 | */ 62 | public static BufferedImage getSubImage(BufferedImage image, int x, int y, int width, int height) { 63 | int type = (image.getTransparency() == Transparency.OPAQUE) 64 | ? BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB; 65 | BufferedImage tmp = new BufferedImage(width, height, type); 66 | Graphics2D g2 = tmp.createGraphics(); 67 | g2.drawImage(image.getSubimage(x, y, width, height), 0, 0, null); 68 | g2.dispose(); 69 | return tmp; 70 | } 71 | 72 | /** 73 | * A simple method to convert an image to binary or B/W image. 74 | * 75 | * @param image input image 76 | * @return a monochrome image 77 | */ 78 | public static BufferedImage convertImageToBinary(BufferedImage image) { 79 | BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY); 80 | Graphics2D g2 = tmp.createGraphics(); 81 | g2.drawImage(image, 0, 0, null); 82 | g2.dispose(); 83 | return tmp; 84 | } 85 | 86 | /** 87 | * A simple method to convert an image to binary or B/W image. 88 | * 89 | * @param image input image 90 | * @return a monochrome image 91 | * @deprecated As of release 1.1, renamed to {@link #convertImageToBinary(BufferedImage image)} 92 | */ 93 | @Deprecated 94 | public static BufferedImage convertImage2Binary(BufferedImage image) { 95 | return convertImageToBinary(image); 96 | } 97 | 98 | /** 99 | * A simple method to convert an image to gray scale. 100 | * 101 | * @param image input image 102 | * @return a monochrome image 103 | */ 104 | public static BufferedImage convertImageToGrayscale(BufferedImage image) { 105 | BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); 106 | Graphics2D g2 = tmp.createGraphics(); 107 | g2.drawImage(image, 0, 0, null); 108 | g2.dispose(); 109 | return tmp; 110 | } 111 | 112 | private static final short[] invertTable; 113 | 114 | static { 115 | invertTable = new short[256]; 116 | for (int i = 0; i < 256; i++) { 117 | invertTable[i] = (short) (255 - i); 118 | } 119 | } 120 | 121 | /** 122 | * Inverts image color. 123 | * 124 | * @param image input image 125 | * @return an inverted-color image 126 | */ 127 | public static BufferedImage invertImageColor(BufferedImage image) { 128 | BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); 129 | BufferedImageOp invertOp = new LookupOp(new ShortLookupTable(0, invertTable), null); 130 | return invertOp.filter(image, tmp); 131 | } 132 | 133 | /** 134 | * Rotates an image. 135 | * 136 | * @param image the original image 137 | * @param angle the degree of rotation 138 | * @return a rotated image 139 | */ 140 | public static BufferedImage rotateImage(BufferedImage image, double angle) { 141 | double theta = Math.toRadians(angle); 142 | double sin = Math.abs(Math.sin(theta)); 143 | double cos = Math.abs(Math.cos(theta)); 144 | int w = image.getWidth(); 145 | int h = image.getHeight(); 146 | int newW = (int) Math.floor(w * cos + h * sin); 147 | int newH = (int) Math.floor(h * cos + w * sin); 148 | 149 | BufferedImage tmp = new BufferedImage(newW, newH, image.getType()); 150 | Graphics2D g2d = tmp.createGraphics(); 151 | g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, 152 | RenderingHints.VALUE_INTERPOLATION_BICUBIC); 153 | g2d.translate((newW - w) / 2, (newH - h) / 2); 154 | g2d.rotate(theta, w / 2, h / 2); 155 | g2d.drawImage(image, 0, 0, null); 156 | g2d.dispose(); 157 | return tmp; 158 | } 159 | 160 | /** 161 | * Gets an image from Clipboard. 162 | * 163 | * @return image 164 | */ 165 | public static Image getClipboardImage() { 166 | Clipboard clipboard = Toolkit.getDefaultToolkit().getSystemClipboard(); 167 | try { 168 | return (Image) clipboard.getData(DataFlavor.imageFlavor); 169 | } catch (Exception e) { 170 | return null; 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/net/sourceforge/vietocr/ImageIOHelper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2008 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package net.sourceforge.vietocr; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | import javax.imageio.*; 21 | import javax.imageio.stream.*; 22 | import javax.imageio.metadata.*; 23 | import com.sun.media.imageio.plugins.tiff.*; 24 | import java.awt.Toolkit; 25 | import java.awt.image.*; 26 | import java.nio.ByteBuffer; 27 | import java.nio.ByteOrder; 28 | import org.w3c.dom.NodeList; 29 | 30 | public class ImageIOHelper { 31 | 32 | final static String OUTPUT_FILE_NAME = "Tesstmp"; 33 | final static String TIFF_EXT = ".tif"; 34 | final static String TIFF_FORMAT = "tiff"; 35 | final static String JAI_IMAGE_WRITER_MESSAGE = "Need to install JAI Image I/O package.\nhttps://java.net/projects/jai-imageio/"; 36 | final static String JAI_IMAGE_READER_MESSAGE = "Unsupported image format. May need to install JAI Image I/O package.\nhttps://java.net/projects/jai-imageio/"; 37 | 38 | /** 39 | * Creates a list of TIFF image files from an image file. It basically 40 | * converts images of other formats to TIFF format, or a multi-page TIFF 41 | * image to multiple TIFF image files. 42 | * 43 | * @param imageFile input image file 44 | * @param index an index of the page; -1 means all pages, as in a multi-page 45 | * TIFF image 46 | * @return a list of TIFF image files 47 | * @throws Exception 48 | */ 49 | public static List createTiffFiles(File imageFile, int index) throws IOException { 50 | List tiffFiles = new ArrayList(); 51 | 52 | String imageFileName = imageFile.getName(); 53 | String imageFormat = imageFileName.substring(imageFileName.lastIndexOf('.') + 1); 54 | 55 | Iterator readers = ImageIO.getImageReadersByFormatName(imageFormat); 56 | 57 | if (!readers.hasNext()) { 58 | throw new RuntimeException(JAI_IMAGE_READER_MESSAGE); 59 | } 60 | 61 | ImageReader reader = readers.next(); 62 | 63 | ImageInputStream iis = ImageIO.createImageInputStream(imageFile); 64 | reader.setInput(iis); 65 | //Read the stream metadata 66 | // IIOMetadata streamMetadata = reader.getStreamMetadata(); 67 | 68 | //Set up the writeParam 69 | TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US); 70 | tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); 71 | 72 | //Get tif writer and set output to file 73 | Iterator writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT); 74 | 75 | if (!writers.hasNext()) { 76 | throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE); 77 | } 78 | 79 | ImageWriter writer = writers.next(); 80 | 81 | //Read the stream metadata 82 | IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam); 83 | 84 | int imageTotal = reader.getNumImages(true); 85 | 86 | for (int i = 0; i < imageTotal; i++) { 87 | // all if index == -1; otherwise, only index-th 88 | if (index == -1 || i == index) { 89 | // BufferedImage bi = reader.read(i); 90 | // IIOImage oimage = new IIOImage(bi, null, reader.getImageMetadata(i)); 91 | IIOImage oimage = reader.readAll(i, reader.getDefaultReadParam()); 92 | File tiffFile = File.createTempFile(OUTPUT_FILE_NAME, TIFF_EXT); 93 | ImageOutputStream ios = ImageIO.createImageOutputStream(tiffFile); 94 | writer.setOutput(ios); 95 | writer.write(streamMetadata, oimage, tiffWriteParam); 96 | ios.close(); 97 | tiffFiles.add(tiffFile); 98 | } 99 | } 100 | writer.dispose(); 101 | reader.dispose(); 102 | 103 | return tiffFiles; 104 | } 105 | 106 | /** 107 | * Creates a list of TIFF image files from a list of 108 | * IIOImage objects. 109 | * 110 | * @param imageList a list of IIOImage objects 111 | * @param index an index of the page; -1 means all pages 112 | * @return a list of TIFF image files 113 | * @throws Exception 114 | */ 115 | public static List createTiffFiles(List imageList, int index) throws IOException { 116 | return createTiffFiles(imageList, index, 0, 0); 117 | } 118 | 119 | public static List createTiffFiles(List imageList, int index, int dpiX, int dpiY) throws IOException { 120 | List tiffFiles = new ArrayList(); 121 | 122 | //Set up the writeParam 123 | TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US); 124 | tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); 125 | 126 | //Get tif writer and set output to file 127 | Iterator writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT); 128 | 129 | if (!writers.hasNext()) { 130 | throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE); 131 | } 132 | 133 | ImageWriter writer = writers.next(); 134 | 135 | //Get the stream metadata 136 | IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam); 137 | 138 | // all if index == -1; otherwise, only index-th 139 | for (IIOImage oimage : (index == -1 ? imageList : imageList.subList(index, index + 1))) { 140 | if (dpiX != 0 && dpiY != 0) { 141 | // Get the default image metadata. 142 | ImageTypeSpecifier imageType = ImageTypeSpecifier.createFromRenderedImage(oimage.getRenderedImage()); 143 | IIOMetadata imageMetadata = writer.getDefaultImageMetadata(imageType, null); 144 | imageMetadata = setDPIViaAPI(imageMetadata, dpiX, dpiY); 145 | oimage.setMetadata(imageMetadata); 146 | } 147 | 148 | File tiffFile = File.createTempFile(OUTPUT_FILE_NAME, TIFF_EXT); 149 | ImageOutputStream ios = ImageIO.createImageOutputStream(tiffFile); 150 | writer.setOutput(ios); 151 | writer.write(streamMetadata, oimage, tiffWriteParam); 152 | ios.close(); 153 | tiffFiles.add(tiffFile); 154 | } 155 | writer.dispose(); 156 | 157 | return tiffFiles; 158 | } 159 | 160 | /** 161 | * Set DPI using API. 162 | */ 163 | private static IIOMetadata setDPIViaAPI(IIOMetadata imageMetadata, int dpiX, int dpiY) 164 | throws IIOInvalidTreeException { 165 | // Derive the TIFFDirectory from the metadata. 166 | TIFFDirectory dir = TIFFDirectory.createFromMetadata(imageMetadata); 167 | 168 | // Get {X,Y}Resolution tags. 169 | BaselineTIFFTagSet base = BaselineTIFFTagSet.getInstance(); 170 | TIFFTag tagXRes = base.getTag(BaselineTIFFTagSet.TAG_X_RESOLUTION); 171 | TIFFTag tagYRes = base.getTag(BaselineTIFFTagSet.TAG_Y_RESOLUTION); 172 | 173 | // Create {X,Y}Resolution fields. 174 | TIFFField fieldXRes = new TIFFField(tagXRes, TIFFTag.TIFF_RATIONAL, 175 | 1, new long[][]{{dpiX, 1}}); 176 | TIFFField fieldYRes = new TIFFField(tagYRes, TIFFTag.TIFF_RATIONAL, 177 | 1, new long[][]{{dpiY, 1}}); 178 | 179 | // Append {X,Y}Resolution fields to directory. 180 | dir.addTIFFField(fieldXRes); 181 | dir.addTIFFField(fieldYRes); 182 | 183 | // Convert to metadata object and return. 184 | return dir.getAsMetadata(); 185 | } 186 | 187 | /** 188 | * Gets pixel data of an IIOImage object. 189 | * 190 | * @param image an IIOImage object 191 | * @return a byte buffer of pixel data 192 | * @throws IOException 193 | */ 194 | public static ByteBuffer getImageByteBuffer(IIOImage image) throws IOException { 195 | return getImageByteBuffer(image.getRenderedImage()); 196 | } 197 | 198 | /** 199 | * Gets pixel data of an RenderedImage object. 200 | * @param image an RenderedImage object 201 | * @return a byte buffer of pixel data 202 | * @throws IOException 203 | */ 204 | public static ByteBuffer getImageByteBuffer(RenderedImage image) throws IOException { 205 | //Set up the writeParam 206 | TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US); 207 | tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); 208 | 209 | //Get tif writer and set output to file 210 | Iterator writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT); 211 | 212 | if (!writers.hasNext()) { 213 | throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE); 214 | } 215 | 216 | ImageWriter writer = writers.next(); 217 | 218 | //Get the stream metadata 219 | IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam); 220 | 221 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 222 | ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream); 223 | writer.setOutput(ios); 224 | writer.write(streamMetadata, new IIOImage(image, null, null), tiffWriteParam); 225 | // writer.write(image); 226 | writer.dispose(); 227 | // ImageIO.write(image, "tiff", ios); // this can be used in lieu of writer 228 | ios.seek(0); 229 | BufferedImage bi = ImageIO.read(ios); 230 | return convertImageData(bi); 231 | } 232 | 233 | /** 234 | * Converts BufferedImage to ByteBuffer. 235 | * 236 | * @param bi Input image 237 | * @return pixel data 238 | */ 239 | public static ByteBuffer convertImageData(BufferedImage bi) { 240 | DataBuffer buff = bi.getRaster().getDataBuffer(); 241 | // ClassCastException thrown if buff not instanceof DataBufferByte because raster data is not necessarily bytes. 242 | // Convert the original buffered image to grayscale. 243 | if (!(buff instanceof DataBufferByte)) { 244 | bi = ImageHelper.convertImageToGrayscale(bi); 245 | buff = bi.getRaster().getDataBuffer(); 246 | } 247 | byte[] pixelData = ((DataBufferByte) buff).getData(); 248 | // return ByteBuffer.wrap(pixelData); 249 | ByteBuffer buf = ByteBuffer.allocateDirect(pixelData.length); 250 | buf.order(ByteOrder.nativeOrder()); 251 | buf.put(pixelData); 252 | buf.flip(); 253 | return buf; 254 | } 255 | 256 | /** 257 | * Gets a list of IIOImage objects for an image file. 258 | * 259 | * @param imageFile input image file. It can be any of the supported 260 | * formats, including TIFF, JPEG, GIF, PNG, BMP, JPEG, and PDF if GPL 261 | * Ghostscript is installed 262 | * @return a list of IIOImage objects 263 | * @throws Exception 264 | */ 265 | public static List getIIOImageList(File imageFile) throws IOException { 266 | File workingTiffFile = null; 267 | 268 | ImageReader reader = null; 269 | ImageInputStream iis = null; 270 | 271 | try { 272 | // convert PDF to TIFF 273 | if (imageFile.getName().toLowerCase().endsWith(".pdf")) { 274 | workingTiffFile = PdfUtilities.convertPdf2Tiff(imageFile); 275 | imageFile = workingTiffFile; 276 | } 277 | 278 | List iioImageList = new ArrayList(); 279 | 280 | String imageFileName = imageFile.getName(); 281 | String imageFormat = imageFileName.substring(imageFileName.lastIndexOf('.') + 1); 282 | if (imageFormat.matches("(pbm|pgm|ppm)")) { 283 | imageFormat = "pnm"; 284 | } else if (imageFormat.equals("jp2")) { 285 | imageFormat = "jpeg2000"; 286 | } 287 | Iterator readers = ImageIO.getImageReadersByFormatName(imageFormat); 288 | 289 | if (!readers.hasNext()) { 290 | throw new RuntimeException(JAI_IMAGE_READER_MESSAGE); 291 | } 292 | 293 | reader = readers.next(); 294 | iis = ImageIO.createImageInputStream(imageFile); 295 | reader.setInput(iis); 296 | 297 | int imageTotal = reader.getNumImages(true); 298 | 299 | for (int i = 0; i < imageTotal; i++) { 300 | // IIOImage oimage = new IIOImage(reader.read(i), null, reader.getImageMetadata(i)); 301 | IIOImage oimage = reader.readAll(i, reader.getDefaultReadParam()); 302 | iioImageList.add(oimage); 303 | } 304 | 305 | return iioImageList; 306 | } finally { 307 | try { 308 | if (iis != null) { 309 | iis.close(); 310 | } 311 | if (reader != null) { 312 | reader.dispose(); 313 | } 314 | } catch (Exception e) { 315 | // ignore 316 | } 317 | if (workingTiffFile != null && workingTiffFile.exists()) { 318 | workingTiffFile.delete(); 319 | } 320 | } 321 | } 322 | 323 | /** 324 | * Gets a list of IIOImage objects for a BufferedImage. 325 | * @param bi input image 326 | * @return a list of IIOImage objects 327 | * @throws IOException 328 | */ 329 | public static List getIIOImageList(BufferedImage bi) throws IOException { 330 | List iioImageList = new ArrayList(); 331 | IIOImage oimage = new IIOImage(bi, null, null); 332 | iioImageList.add(oimage); 333 | return iioImageList; 334 | } 335 | 336 | /** 337 | * Merges multiple images into one TIFF image. 338 | * 339 | * @param inputImages an array of image files 340 | * @param outputTiff the output TIFF file 341 | * @throws Exception 342 | */ 343 | public static void mergeTiff(File[] inputImages, File outputTiff) throws IOException { 344 | List imageList = new ArrayList(); 345 | 346 | for (int i = 0; i < inputImages.length; i++) { 347 | imageList.addAll(getIIOImageList(inputImages[i])); 348 | } 349 | 350 | if (imageList.isEmpty()) { 351 | // if no image 352 | return; 353 | } 354 | 355 | Iterator writers = ImageIO.getImageWritersByFormatName(TIFF_FORMAT); 356 | 357 | if (!writers.hasNext()) { 358 | throw new RuntimeException(JAI_IMAGE_WRITER_MESSAGE); 359 | } 360 | 361 | ImageWriter writer = writers.next(); 362 | 363 | //Set up the writeParam 364 | TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US); 365 | tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); 366 | 367 | //Get the stream metadata 368 | IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam); 369 | 370 | ImageOutputStream ios = ImageIO.createImageOutputStream(outputTiff); 371 | writer.setOutput(ios); 372 | 373 | IIOImage firstIioImage = imageList.remove(0); 374 | writer.write(streamMetadata, firstIioImage, tiffWriteParam); 375 | 376 | int i = 1; 377 | for (IIOImage iioImage : imageList) { 378 | writer.writeInsert(i++, iioImage, tiffWriteParam); 379 | } 380 | ios.close(); 381 | 382 | writer.dispose(); 383 | } 384 | 385 | /** 386 | * Reads image meta data. 387 | * 388 | * @param oimage 389 | * @return a map of meta data 390 | */ 391 | public static Map readImageData(IIOImage oimage) { 392 | Map dict = new HashMap(); 393 | 394 | IIOMetadata imageMetadata = oimage.getMetadata(); 395 | if (imageMetadata != null) { 396 | IIOMetadataNode dimNode = (IIOMetadataNode) imageMetadata.getAsTree("javax_imageio_1.0"); 397 | NodeList nodes = dimNode.getElementsByTagName("HorizontalPixelSize"); 398 | int dpiX; 399 | if (nodes.getLength() > 0) { 400 | float dpcWidth = Float.parseFloat(nodes.item(0).getAttributes().item(0).getNodeValue()); 401 | dpiX = (int) Math.round(25.4f / dpcWidth); 402 | } else { 403 | dpiX = Toolkit.getDefaultToolkit().getScreenResolution(); 404 | } 405 | dict.put("dpiX", String.valueOf(dpiX)); 406 | 407 | nodes = dimNode.getElementsByTagName("VerticalPixelSize"); 408 | int dpiY; 409 | if (nodes.getLength() > 0) { 410 | float dpcHeight = Float.parseFloat(nodes.item(0).getAttributes().item(0).getNodeValue()); 411 | dpiY = (int) Math.round(25.4f / dpcHeight); 412 | } else { 413 | dpiY = Toolkit.getDefaultToolkit().getScreenResolution(); 414 | } 415 | dict.put("dpiY", String.valueOf(dpiY)); 416 | } 417 | return dict; 418 | } 419 | 420 | /* 自定义修改识别中文*/ 421 | 422 | /** 423 | * 图片文件转换为tif格式 424 | * @param imageFile 文件路径 425 | * @param imageFormat 文件扩展名 426 | * @return 427 | */ 428 | public static File createImage(File imageFile, String imageFormat) { 429 | File tempFile = null; 430 | try { 431 | Iterator readers = ImageIO.getImageReadersByFormatName(imageFormat); 432 | ImageReader reader = readers.next(); 433 | 434 | ImageInputStream iis = ImageIO.createImageInputStream(imageFile); 435 | reader.setInput(iis); 436 | //Read the stream metadata 437 | IIOMetadata streamMetadata = reader.getStreamMetadata(); 438 | 439 | //Set up the writeParam 440 | TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.CHINESE); 441 | tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); 442 | 443 | //Get tif writer and set output to file 444 | Iterator writers = ImageIO.getImageWritersByFormatName("tiff"); 445 | ImageWriter writer = writers.next(); 446 | 447 | BufferedImage bi = reader.read(0); 448 | IIOImage image = new IIOImage(bi,null,reader.getImageMetadata(0)); 449 | tempFile = tempImageFile(imageFile); 450 | ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile); 451 | writer.setOutput(ios); 452 | writer.write(streamMetadata, image, tiffWriteParam); 453 | ios.close(); 454 | 455 | writer.dispose(); 456 | reader.dispose(); 457 | 458 | } catch (IOException e) { 459 | e.printStackTrace(); 460 | } 461 | return tempFile; 462 | } 463 | 464 | private static File tempImageFile(File imageFile) { 465 | String path = imageFile.getPath(); 466 | StringBuffer strB = new StringBuffer(path); 467 | strB.insert(path.lastIndexOf('.'),0); 468 | return new File(strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif")); 469 | } 470 | 471 | 472 | } 473 | -------------------------------------------------------------------------------- /src/net/sourceforge/vietocr/PdfUtilities.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2009 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package net.sourceforge.vietocr; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | import java.util.logging.*; 21 | import org.ghost4j.*; 22 | 23 | public class PdfUtilities { 24 | 25 | public static final String GS_INSTALL = "\nPlease download, install GPL Ghostscript from http://sourceforge.net/projects/ghostscript/files\nand/or set the appropriate environment variable."; 26 | 27 | private final static Logger logger = Logger.getLogger(PdfUtilities.class.getName()); 28 | 29 | /** 30 | * Convert PDF to TIFF format. 31 | * 32 | * @param inputPdfFile 33 | * @return a multi-page TIFF image 34 | */ 35 | public static File convertPdf2Tiff(File inputPdfFile) throws IOException { 36 | File[] pngFiles = null; 37 | 38 | try { 39 | pngFiles = convertPdf2Png(inputPdfFile); 40 | File tiffFile = File.createTempFile("multipage", ".tif"); 41 | 42 | // put PNG images into a single multi-page TIFF image for return 43 | ImageIOHelper.mergeTiff(pngFiles, tiffFile); 44 | return tiffFile; 45 | } catch (UnsatisfiedLinkError ule) { 46 | throw new RuntimeException(getMessage(ule.getMessage())); 47 | } catch (NoClassDefFoundError ncdfe) { 48 | throw new RuntimeException(getMessage(ncdfe.getMessage())); 49 | } finally { 50 | if (pngFiles != null) { 51 | // delete temporary PNG images 52 | for (File tempFile : pngFiles) { 53 | tempFile.delete(); 54 | } 55 | } 56 | } 57 | } 58 | 59 | /** 60 | * Convert PDF to PNG format. 61 | * 62 | * @param inputPdfFile 63 | * @return an array of PNG images 64 | */ 65 | public static File[] convertPdf2Png(File inputPdfFile) { 66 | File imageDir = inputPdfFile.getParentFile(); 67 | 68 | if (imageDir == null) { 69 | String userDir = System.getProperty("user.dir"); 70 | imageDir = new File(userDir); 71 | } 72 | 73 | //get Ghostscript instance 74 | Ghostscript gs = Ghostscript.getInstance(); 75 | 76 | //prepare Ghostscript interpreter parameters 77 | //refer to Ghostscript documentation for parameter usage 78 | List gsArgs = new ArrayList(); 79 | gsArgs.add("-gs"); 80 | gsArgs.add("-dNOPAUSE"); 81 | gsArgs.add("-dBATCH"); 82 | gsArgs.add("-dSAFER"); 83 | gsArgs.add("-sDEVICE=pnggray"); 84 | gsArgs.add("-r300"); 85 | gsArgs.add("-dGraphicsAlphaBits=4"); 86 | gsArgs.add("-dTextAlphaBits=4"); 87 | gsArgs.add("-sOutputFile=" + imageDir.getPath() + "/workingimage%03d.png"); 88 | gsArgs.add(inputPdfFile.getPath()); 89 | 90 | //execute and exit interpreter 91 | try { 92 | gs.initialize(gsArgs.toArray(new String[0])); 93 | gs.exit(); 94 | } catch (GhostscriptException e) { 95 | logger.log(Level.SEVERE, e.getMessage(), e); 96 | } 97 | 98 | // find working files 99 | File[] workingFiles = imageDir.listFiles(new FilenameFilter() { 100 | 101 | @Override 102 | public boolean accept(File dir, String name) { 103 | return name.toLowerCase().matches("workingimage\\d{3}\\.png$"); 104 | } 105 | }); 106 | 107 | Arrays.sort(workingFiles, new Comparator() { 108 | @Override 109 | public int compare(File f1, File f2) { 110 | return f1.getName().compareTo(f2.getName()); 111 | } 112 | }); 113 | 114 | return workingFiles; 115 | } 116 | 117 | /** 118 | * Split PDF. 119 | * @param inputPdfFile 120 | * @param outputPdfFile 121 | * @param firstPage 122 | * @param lastPage 123 | */ 124 | public static void splitPdf(String inputPdfFile, String outputPdfFile, String firstPage, String lastPage) { 125 | //get Ghostscript instance 126 | Ghostscript gs = Ghostscript.getInstance(); 127 | 128 | //prepare Ghostscript interpreter parameters 129 | //refer to Ghostscript documentation for parameter usage 130 | //gs -sDEVICE=pdfwrite -dNOPAUSE -dQUIET -dBATCH -dFirstPage=m -dLastPage=n -sOutputFile=out.pdf in.pdf 131 | List gsArgs = new ArrayList(); 132 | gsArgs.add("-gs"); 133 | gsArgs.add("-dNOPAUSE"); 134 | gsArgs.add("-dQUIET"); 135 | gsArgs.add("-dBATCH"); 136 | gsArgs.add("-sDEVICE=pdfwrite"); 137 | 138 | if (!firstPage.trim().isEmpty()) { 139 | gsArgs.add("-dFirstPage=" + firstPage); 140 | } 141 | 142 | if (!lastPage.trim().isEmpty()) { 143 | gsArgs.add("-dLastPage=" + lastPage); 144 | } 145 | 146 | gsArgs.add("-sOutputFile=" + outputPdfFile); 147 | gsArgs.add(inputPdfFile); 148 | 149 | //execute and exit interpreter 150 | try { 151 | gs.initialize(gsArgs.toArray(new String[0])); 152 | gs.exit(); 153 | } catch (GhostscriptException e) { 154 | logger.log(Level.SEVERE, e.getMessage(), e); 155 | throw new RuntimeException(e.getMessage()); 156 | } catch (UnsatisfiedLinkError ule) { 157 | throw new RuntimeException(getMessage(ule.getMessage())); 158 | } catch (NoClassDefFoundError ncdfe) { 159 | throw new RuntimeException(getMessage(ncdfe.getMessage())); 160 | } 161 | } 162 | 163 | /** 164 | * Get PDF Page Count. 165 | * 166 | * @param inputPdfFile 167 | * @return number of pages 168 | */ 169 | public static int getPdfPageCount(String inputPdfFile) { 170 | //get Ghostscript instance 171 | Ghostscript gs = Ghostscript.getInstance(); 172 | 173 | //prepare Ghostscript interpreter parameters 174 | //refer to Ghostscript documentation for parameter usage 175 | //gs -q -sPDFname=test.pdf pdfpagecount.ps 176 | List gsArgs = new ArrayList(); 177 | gsArgs.add("-gs"); 178 | gsArgs.add("-dNOPAUSE"); 179 | gsArgs.add("-dQUIET"); 180 | gsArgs.add("-dBATCH"); 181 | gsArgs.add("-sPDFname=" + inputPdfFile); 182 | gsArgs.add("lib/pdfpagecount.ps"); 183 | 184 | int pageCount = 0; 185 | ByteArrayOutputStream os = null; 186 | 187 | //execute and exit interpreter 188 | try { 189 | //output 190 | os = new ByteArrayOutputStream(); 191 | gs.setStdOut(os); 192 | gs.initialize(gsArgs.toArray(new String[0])); 193 | pageCount = Integer.parseInt(os.toString().replace("%%Pages: ", "")); 194 | os.close(); 195 | } catch (GhostscriptException e) { 196 | logger.log(Level.SEVERE, e.getMessage(), e); 197 | } catch (Exception e) { 198 | logger.log(Level.SEVERE, e.getMessage(), e); 199 | } 200 | 201 | return pageCount; 202 | } 203 | 204 | /** 205 | * Merge PDF files. 206 | * 207 | * @param inputPdfFiles 208 | * @param outputPdfFile 209 | */ 210 | public static void mergePdf(File[] inputPdfFiles, File outputPdfFile) { 211 | //get Ghostscript instance 212 | Ghostscript gs = Ghostscript.getInstance(); 213 | 214 | //prepare Ghostscript interpreter parameters 215 | //refer to Ghostscript documentation for parameter usage 216 | //gs -sDEVICE=pdfwrite -dNOPAUSE -dQUIET -dBATCH -sOutputFile=out.pdf in1.pdf in2.pdf in3.pdf 217 | List gsArgs = new ArrayList(); 218 | gsArgs.add("-gs"); 219 | gsArgs.add("-dNOPAUSE"); 220 | gsArgs.add("-dQUIET"); 221 | gsArgs.add("-dBATCH"); 222 | gsArgs.add("-sDEVICE=pdfwrite"); 223 | gsArgs.add("-sOutputFile=" + outputPdfFile.getPath()); 224 | 225 | for (File inputPdfFile : inputPdfFiles) { 226 | gsArgs.add(inputPdfFile.getPath()); 227 | } 228 | 229 | //execute and exit interpreter 230 | try { 231 | gs.initialize(gsArgs.toArray(new String[0])); 232 | gs.exit(); 233 | } catch (GhostscriptException e) { 234 | logger.log(Level.SEVERE, e.getMessage(), e); 235 | throw new RuntimeException(e.getMessage()); 236 | } catch (UnsatisfiedLinkError ule) { 237 | throw new RuntimeException(getMessage(ule.getMessage())); 238 | } catch (NoClassDefFoundError ncdfe) { 239 | throw new RuntimeException(getMessage(ncdfe.getMessage())); 240 | } 241 | } 242 | 243 | static String getMessage(String message) { 244 | if (message.contains("library 'gs") || message.contains("ghost4j")) { 245 | return message + GS_INSTALL; 246 | } 247 | return message; 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /tessdata/chi_sim.traineddata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/tessdata/chi_sim.traineddata -------------------------------------------------------------------------------- /tessdata/chi_tra.traineddata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/tessdata/chi_tra.traineddata -------------------------------------------------------------------------------- /tessdata/configs/api_config: -------------------------------------------------------------------------------- 1 | tessedit_zero_rejection T 2 | -------------------------------------------------------------------------------- /tessdata/configs/digits: -------------------------------------------------------------------------------- 1 | tessedit_char_whitelist 0123456789-. 2 | -------------------------------------------------------------------------------- /tessdata/configs/hocr: -------------------------------------------------------------------------------- 1 | tessedit_create_hocr 1 -------------------------------------------------------------------------------- /tessdata/eng.traineddata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsdgdf/Java_OCR/805804f4cf6d35c81626c73b65537f63ab5ec16f/tessdata/eng.traineddata -------------------------------------------------------------------------------- /test/com/ricky/java/ocr/Tesseract1Test.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright @ 2010 Quan Nguyen 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.ricky.java.ocr; 17 | 18 | import net.sourceforge.tess4j.TessAPI1; 19 | import net.sourceforge.tess4j.Tesseract1; 20 | import net.sourceforge.tess4j.TesseractException; 21 | import net.sourceforge.vietocr.ImageHelper; 22 | import net.sourceforge.vietocr.ImageIOHelper; 23 | import com.recognition.software.jdeskew.ImageDeskew; 24 | import com.sun.jna.Pointer; 25 | import javax.imageio.ImageIO; 26 | import java.awt.Rectangle; 27 | import java.awt.image.BufferedImage; 28 | import java.io.File; 29 | import java.nio.IntBuffer; 30 | import java.util.ArrayList; 31 | import java.util.List; 32 | import javax.imageio.IIOImage; 33 | import org.junit.After; 34 | import org.junit.AfterClass; 35 | import org.junit.Before; 36 | import org.junit.BeforeClass; 37 | import org.junit.Test; 38 | import static org.junit.Assert.*; 39 | 40 | public class Tesseract1Test { 41 | 42 | static final double MINIMUM_DESKEW_THRESHOLD = 0.05d; 43 | Tesseract1 instance; 44 | 45 | public Tesseract1Test() { 46 | } 47 | 48 | @BeforeClass 49 | public static void setUpClass() throws Exception { 50 | } 51 | 52 | @AfterClass 53 | public static void tearDownClass() throws Exception { 54 | } 55 | 56 | @Before 57 | public void setUp() { 58 | instance = new Tesseract1(); 59 | } 60 | 61 | @After 62 | public void tearDown() { 63 | } 64 | 65 | /** 66 | * Test of doOCR method, of class Tesseract1. 67 | */ 68 | @Test 69 | public void testDoOCR_File() throws Exception, TesseractException { 70 | System.out.println("doOCR on a PNG image"); 71 | // File imageFile = new File("eurotext.png"); 72 | File imageFile = new File("chi.jpg"); 73 | // String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 #90 dog"; 74 | String result = instance.doOCR(imageFile); 75 | System.out.println(result); 76 | //assertEquals(expResult, result.substring(0, expResult.length())); 77 | } 78 | 79 | /** 80 | * Test of doOCR method, of class Tesseract1. 81 | */ 82 | @Test 83 | public void testDoOCR_File_Rectangle() throws Exception { 84 | System.out.println("doOCR on a BMP image with bounding rectangle"); 85 | File imageFile = new File("eurotext.bmp"); 86 | Rectangle rect = new Rectangle(0, 0, 1024, 800); // define an equal or smaller region of interest on the image 87 | String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 #90 dog"; 88 | String result = instance.doOCR(imageFile, rect); 89 | System.out.println(result); 90 | assertEquals(expResult, result.substring(0, expResult.length())); 91 | } 92 | 93 | /** 94 | * Test of doOCR method, of class Tesseract1. 95 | */ 96 | @Test 97 | public void testDoOCR_List_Rectangle() throws Exception { 98 | System.out.println("doOCR on a PDF document"); 99 | File imageFile = new File("eurotext.pdf"); 100 | List imageList = ImageIOHelper.getIIOImageList(imageFile); 101 | String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 #90 dog"; 102 | String result = instance.doOCR(imageList, null); 103 | System.out.println(result); 104 | assertEquals(expResult, result.substring(0, expResult.length())); 105 | } 106 | 107 | /** 108 | * Test of doOCR method, of class Tesseract1. 109 | */ 110 | @Test 111 | public void testDoOCR_BufferedImage() throws Exception { 112 | System.out.println("doOCR on a buffered image of a GIF"); 113 | File imageFile = new File("eurotext.gif"); 114 | BufferedImage bi = ImageIO.read(imageFile); 115 | String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 #90 dog"; 116 | String result = instance.doOCR(bi); 117 | System.out.println(result); 118 | assertEquals(expResult, result.substring(0, expResult.length())); 119 | } 120 | 121 | /** 122 | * Test of deskew algorithm. 123 | */ 124 | @Test 125 | public void testDoOCR_SkewedImage() throws Exception { 126 | System.out.println("doOCR on a skewed PNG image"); 127 | File imageFile = new File("eurotext_deskew.png"); 128 | BufferedImage bi = ImageIO.read(imageFile); 129 | ImageDeskew id = new ImageDeskew(bi); 130 | double imageSkewAngle = id.getSkewAngle(); // determine skew angle 131 | if ((imageSkewAngle > MINIMUM_DESKEW_THRESHOLD || imageSkewAngle < -(MINIMUM_DESKEW_THRESHOLD))) { 132 | bi = ImageHelper.rotateImage(bi, -imageSkewAngle); // deskew image 133 | } 134 | 135 | String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 #90 dog"; 136 | String result = instance.doOCR(bi); 137 | System.out.println(result); 138 | assertEquals(expResult, result.substring(0, expResult.length())); 139 | } 140 | 141 | /** 142 | * Test of extending Tesseract1. 143 | */ 144 | @Test 145 | public void testExtendingTesseract1() throws Exception { 146 | System.out.println("Extends Tesseract1"); 147 | File imageFile = new File("eurotext.tif"); 148 | 149 | String expResult = "The (quick) [brown] {fox} jumps!\nOver the $43,456.78 #90 dog"; 150 | String[] expResults = expResult.split("\\s"); 151 | 152 | Tess1Extension instance1 = new Tess1Extension(); 153 | List result = instance1.getWords(imageFile); 154 | 155 | //print the complete result 156 | for (Word word : result) { 157 | System.out.println(word); 158 | } 159 | 160 | List text = new ArrayList(); 161 | for (Word word : result.subList(0, expResults.length)) { 162 | text.add(word.getText()); 163 | } 164 | 165 | assertArrayEquals(expResults, text.toArray()); 166 | } 167 | 168 | class Tess1Extension extends Tesseract1 { 169 | 170 | public List getWords(File file) { 171 | this.init(); 172 | this.setTessVariables(); 173 | 174 | List words = new ArrayList(); 175 | try { 176 | BufferedImage bi = ImageIO.read(file); 177 | setImage(bi, null); 178 | 179 | TessAPI1.TessBaseAPIRecognize(this.getHandle(), null); 180 | TessAPI1.TessResultIterator ri = TessAPI1.TessBaseAPIGetIterator(this.getHandle()); 181 | TessAPI1.TessPageIterator pi = TessAPI1.TessResultIteratorGetPageIterator(ri); 182 | TessAPI1.TessPageIteratorBegin(pi); 183 | 184 | do { 185 | Pointer ptr = TessAPI1.TessResultIteratorGetUTF8Text(ri, TessAPI1.TessPageIteratorLevel.RIL_WORD); 186 | String text = ptr.getString(0); 187 | TessAPI1.TessDeleteText(ptr); 188 | float confidence = TessAPI1.TessResultIteratorConfidence(ri, TessAPI1.TessPageIteratorLevel.RIL_WORD); 189 | IntBuffer leftB = IntBuffer.allocate(1); 190 | IntBuffer topB = IntBuffer.allocate(1); 191 | IntBuffer rightB = IntBuffer.allocate(1); 192 | IntBuffer bottomB = IntBuffer.allocate(1); 193 | TessAPI1.TessPageIteratorBoundingBox(pi, TessAPI1.TessPageIteratorLevel.RIL_WORD, leftB, topB, rightB, bottomB); 194 | int left = leftB.get(); 195 | int top = topB.get(); 196 | int right = rightB.get(); 197 | int bottom = bottomB.get(); 198 | Word word = new Word(text, confidence, new Rectangle(left, top, right - left, bottom - top)); 199 | words.add(word); 200 | } while (TessAPI1.TessPageIteratorNext(pi, TessAPI1.TessPageIteratorLevel.RIL_WORD) == TessAPI1.TRUE); 201 | 202 | return words; 203 | } catch (Exception e) { 204 | return words; 205 | } finally { 206 | this.dispose(); 207 | } 208 | } 209 | } 210 | 211 | class Word { 212 | 213 | private String text; 214 | private float confidence; 215 | private Rectangle rect; 216 | 217 | public Word(String text, float confidence, Rectangle rect) { 218 | this.text = text; 219 | this.confidence = confidence; 220 | this.rect = rect; 221 | } 222 | 223 | /** 224 | * @return the text 225 | */ 226 | public String getText() { 227 | return text; 228 | } 229 | 230 | /** 231 | * @return the confidence 232 | */ 233 | public float getConfidence() { 234 | return confidence; 235 | } 236 | 237 | /** 238 | * @return the bounding box 239 | */ 240 | public Rectangle getRect() { 241 | return rect; 242 | } 243 | 244 | @Override 245 | public String toString() { 246 | return String.format("%s\t[Confidence: %f Bounding box: %d %d %d %d]", text, confidence, rect.x, rect.y, rect.width, rect.height); 247 | } 248 | } 249 | } -------------------------------------------------------------------------------- /test/testimg/OCR_tesseract.java: -------------------------------------------------------------------------------- 1 | package testimg; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.InputStreamReader; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | import net.sourceforge.vietocr.ImageIOHelper; 11 | //import org.jdesktop.swingx.util.OS; 12 | /** 13 | * Created by zhanghong on 2016/8/26. 14 | */ 15 | public class OCR_tesseract { 16 | private final String LANG_OPTION = "-l"; //英文字母小写l,并非数字1 17 | private final String EOL = System.getProperty("line.separator"); 18 | // private String tessPath = "C://Program Files//Tesseract-OCR"; 19 | private String tessPath = "C:\\Program Files (x86)\\Tesseract-OCR"; 20 | 21 | // 注意这个路径,为安装的tesseract-OCR的路径 22 | 23 | //private String tessPath = new File("tesseract").getAbsolutePath(); 24 | 25 | public String recognizeText(File imageFile,String imageFormat)throws Exception{ 26 | File tempImage = ImageIOHelper.createImage(imageFile, imageFormat); 27 | File outputFile = new File(imageFile.getParentFile(),"output"); 28 | StringBuffer strB = new StringBuffer(); 29 | List cmd = new ArrayList(); 30 | // if(OS.isWindowsXP()){ 31 | cmd.add(tessPath+"//tesseract"); 32 | // }else if(OS.isLinux()){ 33 | // cmd.add("tesseract"); 34 | // }else{ 35 | // cmd.add(tessPath+"//tesseract"); 36 | // } 37 | cmd.add(""); 38 | cmd.add(outputFile.getName()); 39 | cmd.add(LANG_OPTION); 40 | cmd.add("chi_sim"); 41 | //cmd.add("eng"); 42 | 43 | ProcessBuilder pb = new ProcessBuilder(); 44 | pb.directory(imageFile.getParentFile()); 45 | 46 | cmd.set(1, tempImage.getName()); 47 | pb.command(cmd); 48 | pb.redirectErrorStream(true); 49 | 50 | Process process = pb.start(); 51 | //tesseract.exe 1.jpg 1 -l chi_sim 52 | int w = process.waitFor(); 53 | 54 | //删除临时正在工作文件 55 | tempImage.delete(); 56 | 57 | if(w==0){ 58 | BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8")); 59 | 60 | String str; 61 | while((str = in.readLine())!=null){ 62 | strB.append(str).append(EOL); 63 | } 64 | in.close(); 65 | }else{ 66 | String msg; 67 | switch(w){ 68 | case 1: 69 | msg = "Errors accessing files.There may be spaces in your image's filename."; 70 | break; 71 | case 29: 72 | msg = "Cannot recongnize the image or its selected region."; 73 | break; 74 | case 31: 75 | msg = "Unsupported image format."; 76 | break; 77 | default: 78 | msg = "Errors occurred."; 79 | } 80 | tempImage.delete(); 81 | throw new RuntimeException(msg); 82 | } 83 | new File(outputFile.getAbsolutePath()+".txt").delete(); 84 | return strB.toString(); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /test/testimg/TesseractExp.java: -------------------------------------------------------------------------------- 1 | package testimg; 2 | import java.io.File; 3 | import net.sourceforge.tess4j.*; 4 | 5 | /** 6 | * Created by zhanghong on 2016/8/26. 7 | */ 8 | public class TesseractExp { 9 | public static void main(String[] args) { 10 | // File imageFile = new File("eurotext.tif"); 11 | File imageFile = new File("chi.jpg"); 12 | File imageFile1 = new File("chi1.jpg"); 13 | ITesseract instance = new Tesseract(); // JNA Interface Mapping 14 | // ITesseract instance = new Tesseract1(); // JNA Direct Mapping 15 | 16 | try { 17 | String result = instance.doOCR(imageFile1); 18 | System.out.println(result); 19 | } catch (TesseractException e) { 20 | System.err.println(e.getMessage()); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /test/testimg/TestOCR.java: -------------------------------------------------------------------------------- 1 | package testimg; 2 | import java.io.File; 3 | import java.io.IOException; 4 | /** 5 | * Created by zhanghong on 2016/8/26. 6 | */ 7 | public class TestOCR { 8 | 9 | /** 10 | * @param args 11 | */ 12 | public static void main(String[] args) { 13 | //String path = "chi.jpg"; 14 | String path ="D:\\Java_OCR\\Java_OCR\\eurotext.png"; 15 | try { 16 | String valCode = new OCR_tesseract().recognizeText(new File(path), "png"); 17 | //6905_1294109277pAj9.jpg 18 | System.out.println(valCode); 19 | } catch (IOException e) { 20 | e.printStackTrace(); 21 | } catch (Exception e) { 22 | e.printStackTrace(); 23 | } 24 | } 25 | } 26 | --------------------------------------------------------------------------------