├── .gitignore ├── LICENSE ├── composer.json ├── dockerfile ├── example ├── .gitignore ├── OCR.php ├── README.MD ├── Storage.php ├── composer.json ├── db │ └── db.db ├── fonts │ └── num.ttf ├── img │ ├── .gitignore │ └── .gitkeep ├── index.php └── study.php ├── img └── 1.png ├── readme.md └── src ├── Image.php ├── ImageConnect.php ├── ImageOCR.php ├── ImageTool.php └── ImageWater.php /.gitignore: -------------------------------------------------------------------------------- 1 | old/ 2 | .idea/ 3 | vendor/ 4 | composer.lock 5 | .*/ 6 | info/ 7 | test.php -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Mo Huishou 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mohuishou/image-ocr", 3 | "description": "验证码识别", 4 | "keywords": [ 5 | "image", 6 | "ocr", 7 | "验证码", 8 | "验证码识别" 9 | ], 10 | "version": "0.1.2", 11 | "license": "MIT", 12 | "authors": [ 13 | { 14 | "name": "mohuishou", 15 | "email": "1@lailin.xyz" 16 | } 17 | ], 18 | "require": { 19 | "php": ">5.5" 20 | }, 21 | "autoload": { 22 | "psr-4": { 23 | "Mohuishou\\ImageOCR\\": "src/" 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /dockerfile: -------------------------------------------------------------------------------- 1 | FROM php:7.3.1-alpine 2 | 3 | RUN apk add --no-cache freetype libpng libjpeg-turbo freetype-dev libpng-dev libjpeg-turbo-dev && \ 4 | docker-php-ext-configure gd \ 5 | --with-gd \ 6 | --with-freetype-dir=/usr/include/ \ 7 | --with-png-dir=/usr/include/ \ 8 | --with-jpeg-dir=/usr/include/ && \ 9 | NPROC=$(grep -c ^processor /proc/cpuinfo 2>/dev/null || 1) && \ 10 | docker-php-ext-install -j${NPROC} gd && \ 11 | docker-php-ext-install exif && \ 12 | apk del --no-cache freetype-dev libpng-dev libjpeg-turbo-dev 13 | 14 | WORKDIR /app 15 | 16 | COPY . . 17 | 18 | WORKDIR /app/example 19 | 20 | EXPOSE 8088 21 | 22 | CMD ["php", "-S", "0.0.0.0:8088"] -------------------------------------------------------------------------------- /example/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | old/ 3 | .idea/ 4 | vendor/ 5 | composer.lock 6 | .*/ -------------------------------------------------------------------------------- /example/OCR.php: -------------------------------------------------------------------------------- 1 | 5 | * Date: 2017/3/19 6 | * Time: 23:32 7 | */ 8 | 9 | namespace Mohuishou\ImageOCR\Example; 10 | 11 | use Mohuishou\ImageOCR\Image; 12 | use Mohuishou\ImageOCR\ImageOCR; 13 | use Mohuishou\ImageOCR\ImageTool; 14 | 15 | /** 16 | * 图像识别 17 | * Class ImageOCR 18 | * @author mohuishou<1@lailin.xyz> 19 | * @package Mohuishou\ImageOCR 20 | */ 21 | class OCR 22 | { 23 | 24 | protected $image_ocr; 25 | 26 | protected $standard_data; 27 | 28 | protected $hash_data=[]; 29 | 30 | public function __construct($path) 31 | { 32 | $image=new Image($path); 33 | $this->image_ocr=new ImageOCR($image); 34 | 35 | //开启的debug模式 36 | $this->image_ocr->setDebug(true); 37 | 38 | //初始化 39 | $this->image_ocr->setMaxGrey(255); 40 | $this->image_ocr->setMinGrey(10); 41 | $this->image_ocr->setStandardWidth(13); 42 | $this->image_ocr->setStandardHeight(20); 43 | } 44 | 45 | public function standard() 46 | { 47 | //避免重复调用 48 | if (!empty($this->standard_data)) { 49 | return $this->standard_data; 50 | } 51 | 52 | try { 53 | //第一步灰度化 54 | $this->image_ocr->grey(); 55 | 56 | //第二步二值化 57 | $this->image_ocr->hashByBackground(ImageOCR::MAX_MODEL); 58 | 59 | //下一步会用到连通域分割法,先初始化 60 | $this->image_ocr->setImageConnect(); 61 | 62 | //第三部除噪&第四步分割 63 | $this->image_ocr->splitByConnect(); 64 | 65 | //第五步标准化 66 | return $this->image_ocr->standard(); 67 | } catch (\Exception $e) { 68 | echo $e->getMessage(); 69 | } 70 | return false; 71 | } 72 | 73 | public function draw() 74 | { 75 | foreach ($this->standard() as $v) { 76 | ImageTool::drawBrowser($v); 77 | echo "
"; 78 | } 79 | } 80 | 81 | public function save($path) 82 | { 83 | $this->image_ocr->saveImage($path); 84 | } 85 | 86 | /** 87 | * 返回识别结果 88 | * @author mohuishou<1@lailin.xyz> 89 | */ 90 | public function ocr() 91 | { 92 | $res = []; 93 | $hash_data=$this->getHash(); 94 | $db=Storage::getInstance(); 95 | $samples=$db->get(); 96 | foreach ($hash_data as $k => $v) { 97 | $res[]=$this->compare($v, $samples); 98 | } 99 | return implode("", $res); 100 | } 101 | 102 | /** 103 | * 和特征值库进行对比 104 | * @author mohuishou<1@lailin.xyz> 105 | * @param array $hash 待识别的二值化图像字符串 106 | * @param array $samples 特征值库的数组 107 | * @return string $code 返回识别的字符 108 | */ 109 | public function compare($hash, $samples) 110 | { 111 | $code=0; 112 | $s = 0; 113 | foreach ($samples as $k => $v) { 114 | $samples_hash_data=str_split($v["hash"]); 115 | $c = count( array_intersect_assoc ($samples_hash_data, $hash) ); 116 | if ($c>$s) { 117 | $s=$c; 118 | $code=$v["code"]; 119 | } 120 | if ($s>0.99*count($samples_hash_data)) { 121 | return $k; 122 | } 123 | } 124 | return $code; 125 | } 126 | 127 | public function getHash() 128 | { 129 | if (!empty($this->hash_data)) { 130 | return $this->hash_data; 131 | } 132 | $standard=$this->standard(); 133 | foreach ($standard as $k => $v) { 134 | $this->hash_data[$k]=[]; 135 | foreach ($v as $value) { 136 | $this->hash_data[$k]=array_merge($this->hash_data[$k], $value); 137 | } 138 | } 139 | return $this->hash_data; 140 | } 141 | 142 | 143 | public function study($code) 144 | { 145 | $hash_data=$this->getHash(); 146 | $standard_data=$this->standard(); 147 | $code_arr=str_split($code); 148 | if (count($code_arr)!=count($standard_data)) { 149 | echo "错误!您输入的验证码位数与识别的位数不符,请检查您的验证码!
"; 150 | echo "您输入的字符串为:$code
"; 151 | echo "标准化数组为:
"; 152 | $this->draw(); 153 | exit(0); 154 | } 155 | $db=Storage::getInstance(); 156 | foreach ($code_arr as $k => $v) { 157 | $db->add($v, $hash_data[$k]); 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /example/README.MD: -------------------------------------------------------------------------------- 1 | # EXAMPLE 2 | 3 | ## install 4 | 5 | ``` 6 | composer install 7 | ``` 8 | 9 | ## 说明 10 | 11 | `study.php` 用于建立基本的模型库,数据保存在`db/db.db`当中 12 | `index.php` 直接查看识别的效果 13 | `OCR.php` 基础的使用例子 14 | 15 | ## FAQ 16 | 17 | 如果遇到下面这个问题,请确认所安装的 PHP 有没有带 freetype 18 | 19 | ``` 20 | Call to undefined function imagettfbbox() 21 | 22 | // for mac 23 | brew install php --with-freetype 24 | ``` 25 | -------------------------------------------------------------------------------- /example/Storage.php: -------------------------------------------------------------------------------- 1 | _database = new Medoo([ 14 | 'database_type' => 'sqlite', 15 | 'database_file' => __DIR__ . '/db/db.db' 16 | ]); 17 | } 18 | 19 | private function clone(){ 20 | 21 | } 22 | 23 | public static function getInstance(){ 24 | if(self::$_instance == null){ 25 | self::$_instance = new self(); 26 | } 27 | return self::$_instance; 28 | } 29 | 30 | public function add($code,$hash){ 31 | $this->_database->insert("ocr",[ 32 | "code" => $code, 33 | "hash" => implode("",$hash) 34 | ]); 35 | } 36 | 37 | public function get($code = null){ 38 | $arr = null && $code && $arr = ["code" => $code]; 39 | return $this->_database->select("ocr",["hash","code"]); 40 | } 41 | 42 | } -------------------------------------------------------------------------------- /example/composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mohuishou/imageocr-example", 3 | "description": "the example for mohuishou/imageocr", 4 | "type": "project", 5 | "license": "MIT", 6 | "authors": [ 7 | { 8 | "name": "mohuishou", 9 | "email": "1@lailin.xyz" 10 | } 11 | ], 12 | "require": { 13 | "catfan/Medoo": "^1.4", 14 | "lifei6671/php-captcha": "^0.1.1", 15 | "mohuishou/image-ocr": "^0.1.1" 16 | }, 17 | "autoload": { 18 | "psr-4": { 19 | "Mohuishou\\ImageOCR\\Example\\": "./" 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /example/db/db.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mohuishou/ImageOCR/c66ac9e25d0c79f4559f353c982e83b3d4bb7c10/example/db/db.db -------------------------------------------------------------------------------- /example/fonts/num.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mohuishou/ImageOCR/c66ac9e25d0c79f4559f353c982e83b3d4bb7c10/example/fonts/num.ttf -------------------------------------------------------------------------------- /example/img/.gitignore: -------------------------------------------------------------------------------- 1 | *.png -------------------------------------------------------------------------------- /example/img/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mohuishou/ImageOCR/c66ac9e25d0c79f4559f353c982e83b3d4bb7c10/example/img/.gitkeep -------------------------------------------------------------------------------- /example/index.php: -------------------------------------------------------------------------------- 1 | initialize([ 11 | 'width' => 150, // 宽度 12 | 'height' => 50, // 高度 13 | 'line' => false, // 直线 14 | 'curve' => false, // 曲线 15 | 'noise' => 0, // 噪点背景 16 | 'fonts' => ["./fonts/num.ttf"] // 字体 17 | ]); 18 | 19 | $captch->create(); 20 | $img_path=__DIR__."/img/1.png"; 21 | $captch->save($img_path,1); 22 | 23 | $img=new \Mohuishou\ImageOCR\Example\OCR($img_path); 24 | ?> 25 | 26 | 27 | 28 | 29 | 31 | 32 | Document 33 | 34 | 39 | 40 |

ocr(); ?>

41 |
42 |
43 | Form mohuishou/ImageOCR 44 | 45 | 46 | -------------------------------------------------------------------------------- /example/study.php: -------------------------------------------------------------------------------- 1 | . 4 | * User: mohuishou<1@lailin.xyz> 5 | * Date: 2016/5/1 0001 6 | * Time: 20:44 7 | */ 8 | namespace Mohuishou\ImageOCR\Example; 9 | 10 | use Minho\Captcha\CaptchaBuilder; 11 | 12 | require_once 'vendor/autoload.php'; 13 | 14 | $img_path=__DIR__."/img/1.png"; 15 | 16 | if (isset($_POST['send'])&&$_POST['send']=="send") { 17 | $ocr=new OCR($img_path); 18 | $ocr->study($_POST['code']); 19 | echo ""; 20 | $ocr=null; 21 | } else { 22 | $captch = new CaptchaBuilder(); 23 | 24 | $captch->initialize([ 25 | 'width' => 150, // 宽度 26 | 'height' => 50, // 高度 27 | 'line' => false, // 直线 28 | 'curve' => false, // 曲线 29 | 'noise' => 0, // 噪点背景 30 | 'fonts' => ["./fonts/num.ttf"] // 字体 31 | ]); 32 | 33 | $captch->create(); 34 | 35 | $captch->save($img_path, 1); 36 | } 37 | 38 | ?> 39 | 40 | 41 | 42 | 43 | Study 44 | 45 | 46 |
47 | 48 | 49 | 50 |
51 | 52 | -------------------------------------------------------------------------------- /img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mohuishou/ImageOCR/c66ac9e25d0c79f4559f353c982e83b3d4bb7c10/img/1.png -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # ImageOCR 2 | 3 | > php 验证码识别库,对于非粘连字符具有很好的识别效果,对于一般粘连字符也能有较为良好的识别 4 | > 除噪算法支持孤立点除杂和连通域除噪,分割算法支持等宽分割、连通域分割以及滴水算法分割 5 | 6 | ## 示例效果 7 | 8 | ![示例](./img/1.png) 9 | 10 | ## Install 11 | 12 | ``` 13 | composer require mohuishou/image-ocr 14 | ``` 15 | 16 | ## 使用方法 17 | 18 | 例子详见 [example](./example) 19 | 20 | ### use docker 21 | 22 | ```bash 23 | docker run --rm -p 8088:8088 mohuishou/image-ocr 24 | ``` 25 | 26 | 点击 http://localhost:8088 查看效果 27 | 28 | ### 大致流程: 29 | 30 | ``` 31 | 初始化 -> 灰度化 ---> 二值化 ---> 除噪点 -> 分割 -> 标准化 -> 识别 32 | ``` 33 | 34 | #### 初始化 35 | 36 | 对象初始化 37 | 38 | ```php 39 | $image=new Image($img_path); 40 | $image_ocr=new ImageOCR($image) 41 | ``` 42 | 43 | 初始化二值化阈值 44 | 45 | ```php 46 | $image_ocr->setMaxGrey(90); 47 | $image_ocr->setMinGrey(10); 48 | ``` 49 | 50 | 初始化标准化图片宽高 51 | 52 | ```php 53 | $image_ocr->setStandardWidth(13); 54 | $image_ocr->setStandardHeight(20); 55 | ``` 56 | 57 | #### 开启 Debug 58 | 59 | ```php 60 | $image_ocr->setDebug(true); 61 | ``` 62 | 63 | #### 灰度化 64 | 65 | ```php 66 | try{ 67 | $image_ocr->grey(); 68 | }catch (\Exception $e){ 69 | echo $e->getMessage(); 70 | } 71 | ``` 72 | 73 | #### 二值化 74 | 75 | 注意:这一步的前提是需要先执行上一步灰度化,不然会抛出一个错误 76 | 77 | ```php 78 | try{ 79 | $image_ocr->hash($max_grey=null,$min_grey=null); 80 | }catch (\Exception $e){ 81 | echo $e->getMessage(); 82 | } 83 | ``` 84 | 85 | 二值化支持两种方式,第一种`$image_ocr->hash($max_grey=null,$min_grey=null)`即为上面那种固定的阈值范围,第二种为`hashByBackground($model=self::MAX_MODEL,$max_grey=null,$min_grey=null)`,通过背景图像的灰度值,动态取阈值,支持三种模式`MAX_MODEL`,`MIN_MODEL`,`BG_MODEL`分别是最大值、最小值和背景模式,最大值模式会用背景的灰度值替换阈值的上限,最小值模式替换下限,背景模式上下限都替换,即为只去除背景 86 | 87 | #### 除噪点 88 | 89 | 前置条件为二值化 90 | 91 | ##### 孤立点除噪法 92 | 93 | ```php 94 | try{ 95 | $image_ocr->removeSpots(); 96 | }catch (\Exception $e){ 97 | echo $e->getMessage(); 98 | } 99 | ``` 100 | 101 | ##### 连通域除噪法 102 | 103 | _[如果要使用连通域分割法,可以跳过连通域除噪点,分割的同时可以一并除噪]_ 104 | 105 | ```php 106 | try{ 107 | //使用之前需要初始化连通域对象 108 | $image_ocr->setImageConnect(); 109 | //除噪 110 | $image_ocr->removeSpotsByConnect(); 111 | }catch (\Exception $e){ 112 | echo $e->getMessage(); 113 | } 114 | ``` 115 | 116 | #### 分割 117 | 118 | ##### 非粘连字符串 119 | 120 | 连通域分割法 121 | 122 | ```php 123 | try{ 124 | //使用之前需要初始化连通域对象 125 | $image_ocr->setImageConnect(); 126 | //分割 127 | $image_ocr->splitByConnect(); 128 | }catch (\Exception $e){ 129 | echo $e->getMessage(); 130 | } 131 | ``` 132 | 133 | ##### 粘连字符串 134 | 135 | 滴水算法分割 136 | 137 | TODO: 待测试 138 | 139 | #### 标准化 140 | 141 | ```php 142 | try{ 143 | $standard_data=$image_ocr->standard(); 144 | }catch (\Exception $e){ 145 | echo $e->getMessage(); 146 | } 147 | ``` 148 | 149 | #### 识别 150 | 151 | TODO:待完善 152 | 153 | ## API 154 | 155 | ```php 156 | ImageOCR::__construct(Image $image) 157 | ImageOCR::saveImage($path) 158 | ImageOCR::grey() 159 | ImageOCR::hash($max_grey=null,$min_grey=null) 160 | ImageOCR::hashByBackground($model=self::MAX_MODEL,$max_grey=null,$min_grey=null) 161 | ImageOCR::removeSpots() 162 | ImageOCR::removeSpotsByConnect() 163 | ImageOCR::standard() 164 | ImageOCR::setImageConnect() 165 | ImageOCR::setImage(Image $image) 166 | ImageOCR::getStandardData() 167 | ImageOCR::setMaxGrey($max_grey) 168 | ImageOCR::setMinGrey($min_grey) 169 | ImageOCR::setStandardWidth($standard_width) 170 | ImageOCR::setStandardHeight($standard_height) 171 | 172 | //ImageTool的方法均为静态方法 173 | ImageTool::removeZero($data) 174 | ImageTool::removeZeroColumn($hash_data) 175 | ImageTool::drawBrowser($data) 176 | ImageTool::transposeAndRemoveZero($hash_data) 177 | ImageTool::hashTranspose($hash_data) 178 | ImageTool::img2hash($img) 179 | ImageTool::hash2img($hash_data,$padding=0) 180 | ``` 181 | 182 | ## CHANGELOG 183 | 184 | ### 0.2 [2017-4-1] 185 | 186 | ### 0.1 [2016-10-7] 187 | 188 | 1. 默认模板保存方式由数据库改为文件,保存路径为./db/db.json 189 | 2. 使用 composer 安装 190 | -------------------------------------------------------------------------------- /src/Image.php: -------------------------------------------------------------------------------- 1 | . 4 | * User: mohuishou<1@lailin.xyz> 5 | * Date: 2016/5/1 0001 6 | * Time: 20:29 7 | */ 8 | 9 | namespace Mohuishou\ImageOCR; 10 | 11 | class Image 12 | { 13 | 14 | /** 15 | * 最大灰度值 16 | * @var int 17 | * @author mohuishou<1@lailin.xyz> 18 | */ 19 | public $max_grey=null; 20 | 21 | /** 22 | * 最小灰度值 23 | * @var int 24 | * @author mohuishou<1@lailin.xyz> 25 | */ 26 | public $min_grey=null; 27 | 28 | /** 29 | * 标准化图像的宽度 30 | * @var int 31 | * @author mohuishou<1@lailin.xyz> 32 | */ 33 | public $standard_width=10; 34 | 35 | /** 36 | * 标准化图像的高度 37 | * @var int 38 | * @author mohuishou<1@lailin.xyz> 39 | */ 40 | public $standard_height=10; 41 | 42 | 43 | //图像字符串的个数 44 | const CHAR_NUM=4; 45 | 46 | //图像的宽度与高度信息 47 | private $_image_w; 48 | private $_image_h; 49 | 50 | /** 51 | * 输入图像的句柄 52 | * @var resource 53 | */ 54 | public $in_img; 55 | 56 | /** 57 | * @var array $_hash_data 二值化的数组 58 | */ 59 | private $_hash_data; 60 | 61 | 62 | public function __construct($imgPath) { 63 | 64 | //判断图像的类型 65 | $res = exif_imagetype($imgPath); 66 | 67 | switch($res) { 68 | case 1: 69 | $this->in_img = imagecreatefromgif($imgPath); 70 | break; 71 | case 2: 72 | $this->in_img = imagecreatefromjpeg($imgPath); 73 | break; 74 | case 3: 75 | $this->in_img = imagecreatefrompng($imgPath); 76 | break; 77 | case 6: 78 | $this->in_img = imagecreatefromwbmp($imgPath); 79 | break; 80 | default: 81 | throw new \Exception("不支持的图片格式!"); 82 | break; 83 | } 84 | 85 | //获取图像的大小信息 86 | $this->_image_w=getimagesize($imgPath)[0]; 87 | $this->_image_h=getimagesize($imgPath)[1]; 88 | } 89 | 90 | /** 91 | * 二值化,排除背景色,雪花等干扰项 92 | * @param array $grey_data 灰度图像 93 | * @param int $max 最大阈值 94 | * @param int $min 最小阈值 95 | * @throws \Exception 96 | * @author mohuishou<1@lailin.xyz> 97 | * @return array $data 二值化值 98 | */ 99 | public function imageHash($grey_data,$max,$min){ 100 | 101 | if ($grey_data==null) 102 | throw new \Exception("请先将图片灰度化!",1); 103 | 104 | $max==null && $max=$this->max_grey; 105 | if ($max==null) 106 | throw new \Exception("请输入最大灰度值!",1); 107 | 108 | $min==null && $min=$this->min_grey; 109 | if ($min==null) 110 | throw new \Exception("请输入最小灰度值!",1); 111 | 112 | $data=[]; 113 | for($i = 0; $i < count($grey_data); $i++) { 114 | for ($j = 0; $j < count($grey_data[0]);$j++) { 115 | $grey=$grey_data[$i][$j]; 116 | if($grey>$min&&$grey<$max){ 117 | $data[$i][$j]=1; 118 | }else{ 119 | $data[$i][$j]=0; 120 | } 121 | } 122 | } 123 | $this->_hash_data=$data; 124 | return $data; 125 | } 126 | 127 | /** 128 | * 彩色图像转灰度图像 129 | * 130 | * @return array $data 包含灰度图像的二维图像数组 131 | * @author mohuishou<1@lailin.xyz> 132 | */ 133 | public function rgb2grey(){ 134 | $data=[]; 135 | for($i = 0; $i < $this->_image_h; $i++) { 136 | for ($j = 0; $j < $this->_image_w; $j++) { 137 | $rgb = imagecolorat($this->in_img,$j,$i); 138 | $rgb_array = imagecolorsforindex($this->in_img, $rgb); 139 | //图片灰度化 140 | $data[$i][$j]=intval(($rgb_array['red']+$rgb_array['green']+$rgb_array['blue'])/3); 141 | } 142 | } 143 | return $data; 144 | } 145 | 146 | /** 147 | * 获取背景的灰度值 148 | * 149 | * @param [type] $data 150 | * @return int 151 | * @author mohuishou<1@lailin.xyz> 152 | */ 153 | public function getBgGrey($data){ 154 | $tmp=[]; 155 | foreach($data as $v){ 156 | $a=array_count_values($v); 157 | $max_a=max($a); 158 | $k=array_keys($a, $max_a); 159 | if(!empty($k)){ 160 | if(isset($tmp[$k[0]])){ 161 | $tmp[$k[0]]+=$max_a; 162 | 163 | }else{ 164 | $tmp[$k[0]]=$max_a; 165 | } 166 | } 167 | } 168 | return array_keys($tmp, max($tmp))[0]; 169 | } 170 | 171 | /** 172 | * 去除孤立噪点 173 | * @author mohuishou<1@lailin.xyz> 174 | * @param $hash_data 175 | * @return mixed 176 | */ 177 | public function removeHotSpots($hash_data){ 178 | for($i = 0; $i < $this->_image_h; $i++) { 179 | for ($j = 0; $j < $this->_image_w; $j++) { 180 | if($hash_data[$i][$j]){ 181 | if($this->isHotSpots($i,$j,$hash_data)) $hash_data[$i][$j]=0; 182 | } 183 | } 184 | } 185 | return $hash_data; 186 | } 187 | 188 | /** 189 | * 判断是否是孤立点 190 | * @author mohuishou<1@lailin.xyz> 191 | * @param $i 192 | * @param $j 193 | * @param $hash_data 194 | * @return bool ture:是噪点,false:不是 195 | */ 196 | public function isHotSpots($i,$j,$hash_data){ 197 | if($i == 0 || $j == 0 || $i == ($this->_image_h - 1) || $j == ($this->_image_w - 1)) return true; 198 | 199 | 200 | //待检查点为中心的九个点 201 | $points[0]=$hash_data[$i-1][$j-1]; 202 | $points[1]=$hash_data[$i-1][$j]; 203 | $points[2]=$hash_data[$i-1][$j+1]; 204 | $points[3]=$hash_data[$i][$j-1]; 205 | $points[4]=$hash_data[$i][$j];//待检查点 206 | $points[5]=$hash_data[$i][$j+1]; 207 | $points[6]=$hash_data[$i+1][$j-1]; 208 | $points[7]=$hash_data[$i+1][$j]; 209 | $points[8]=$hash_data[$i+1][$j+1]; 210 | 211 | $count=0; 212 | 213 | foreach ($points as $v){ 214 | if($v){ 215 | $count++; 216 | } 217 | } 218 | 219 | return $count<4; 220 | } 221 | 222 | /** 223 | * 去除零行 224 | * @author mohuishou<1@lailin.xyz> 225 | * @param null $data 当为默认值null时,自动赋值为&$this->_hash_data; 226 | * @return array $data 去掉零行之后的二值化数组 227 | */ 228 | public function removeZero($data=null){ 229 | $data==null && $data=&$this->_hash_data; 230 | foreach ($data as $k=>$v) { 231 | if (implode("", $v) == 0) unset($data[$k]); 232 | } 233 | return $data; 234 | } 235 | 236 | /** 237 | * 等宽分割 238 | * @author mohuishou<1@lailin.xyz> 239 | * @param int $n 图片的第几个字符 240 | * @return array $hash_data 标准化之后的二值化图像字符串 241 | */ 242 | public function splitByEqualWidth($n){ 243 | $data=[]; 244 | $a=$this->_image_w/self::CHAR_NUM; 245 | for($i=$n*$a;$i<($n+1)*$a;$i++){ 246 | $column=array_column($this->_hash_data,$i); 247 | if(implode("",$column)!=0){ 248 | $data[]=$column; 249 | } 250 | } 251 | } 252 | 253 | 254 | /** 255 | * 标准化,返回标准化之后的二值数组 256 | * @param array $hash_data 尚未标准化的二值数组 257 | * @param int $angle 旋转角度,默认30度 258 | * @param int $width 标准化图像宽度 259 | * @param int $height 标准化图像高度 260 | * @author mohuishou<1@lailin.xyz> 261 | * @return array $standard_data 标准化之后的二值数组 262 | */ 263 | public function standard($hash_data,$angle=30,$width=null,$height=null){ 264 | //初始化标准化图像的相关设置 265 | $width==null && $width=$this->standard_width; 266 | $height==null && $height=$this->standard_height; 267 | 268 | //hash 转 img 269 | $img=ImageTool::hash2img($hash_data,2); 270 | 271 | //图片旋转,取最小的字符宽度 272 | //最小的宽度 273 | $min_w=999; 274 | $out_hash_data=[]; 275 | $white=imagecolorallocate($img, 255, 255, 255); 276 | for($i=-$angle;$i<$angle;$i++){ 277 | $tmp_img=imagerotate($img,$i,$white); 278 | //计算字符宽度 279 | $tmp_img_hash_data=$this->imgTranspose($tmp_img); 280 | $w=count($tmp_img_hash_data); 281 | if($w<$min_w) { 282 | $out_hash_data = $tmp_img_hash_data; 283 | $min_w = $w; 284 | } 285 | } 286 | 287 | $out_hash_data=ImageTool::hashTranspose($out_hash_data); 288 | 289 | //最小宽度字符的高度与宽度 290 | $out_img_w=count($out_hash_data[0]); 291 | $out_img_h=count($out_hash_data); 292 | 293 | //最小字符图片 294 | $out_img=ImageTool::hash2img($out_hash_data); 295 | 296 | //图像标准化,宽度和高度进行标准化 297 | $standard_img = imagecreatetruecolor($width, $height); 298 | imagecopyresized($standard_img, $out_img, 0, 0, 0, 0,$width,$height,$out_img_w,$out_img_h); 299 | 300 | 301 | return ImageTool::img2hash($standard_img); 302 | } 303 | 304 | /** 305 | * 图像去除零行、零列、转置之后的二值数组 306 | * @param resource $img 图像资源句柄 307 | * @author mohuishou<1@lailin.xyz> 308 | * @return array 309 | */ 310 | public function imgTranspose($img){ 311 | $hash_data=ImageTool::img2hash($img); 312 | 313 | $hash_data=ImageTool::removeZero($hash_data); 314 | 315 | $hash_data=ImageTool::transposeAndRemoveZero($hash_data); 316 | 317 | return $hash_data; 318 | } 319 | 320 | } -------------------------------------------------------------------------------- /src/ImageConnect.php: -------------------------------------------------------------------------------- 1 | 9 | */ 10 | class ImageConnect 11 | { 12 | protected $is_connected=2; 13 | 14 | /** 15 | * TAG 初始值 16 | */ 17 | const TAG=10; 18 | 19 | /** 20 | * 字符区域的最小值 21 | * @var integer 22 | */ 23 | const MIN=40; 24 | 25 | private $_len_w; 26 | private $_len_h; 27 | 28 | /** 29 | * 二值化图像数组 30 | * @var array 31 | */ 32 | private $_hash_data=[]; 33 | 34 | /** 35 | * 保存标记的大小 36 | */ 37 | private $_tags=[]; 38 | 39 | /** 40 | * 需要继续探索的种子 41 | * @var array 42 | */ 43 | private $_seeds=[]; 44 | 45 | /** 46 | * 初始化 47 | * @param array $hash_data 二值化图像数组 48 | * @author mohushou<1@lailin.xyz> 49 | */ 50 | public function __construct($hash_data) 51 | { 52 | $this->_hash_data=$hash_data; 53 | $this->_len_h=count($hash_data); 54 | $this->_len_w=count($hash_data[0]); 55 | $this->addTag(); 56 | } 57 | 58 | 59 | public function setIsConnected($val) 60 | { 61 | if ($val<0||$val>8) { 62 | throw new \Expection("值设置错误!"); 63 | } 64 | $this->is_connected=$val; 65 | } 66 | 67 | /** 68 | * 字符区块分割 69 | * @author mohuishou<1@lailin.xyz> 70 | * @return array 71 | */ 72 | public function split() 73 | { 74 | $strs=[]; 75 | $str_data=[]; 76 | 77 | //找到需要保存的字符区块 78 | foreach ($this->_tags as $key => $value) { 79 | if ($value>self::MIN) { 80 | $strs[]=$key; 81 | } 82 | } 83 | 84 | //遍历找到需要的字符区块 85 | foreach ($strs as $k => $v) { 86 | $str_data[$k]=[]; 87 | foreach ($this->_hash_data as $i => $value) { 88 | foreach ($value as $j => $val) { 89 | if ($val == $v) { 90 | $str_data[$k][$i][$j]=1; 91 | } else { 92 | $str_data[$k][$i][$j]=0; 93 | } 94 | } 95 | } 96 | } 97 | 98 | //去除零行零列 99 | foreach ($str_data as $k => $v) { 100 | //去除零列 101 | $str_data[$k]=ImageTool::removeZeroColumn($v); 102 | 103 | //去除零行 104 | $str_data[$k]=ImageTool::removeZero($str_data[$k]); 105 | 106 | //重建索引 107 | $str_data[$k]=array_values($str_data[$k]); 108 | } 109 | 110 | return $str_data; 111 | } 112 | 113 | /** 114 | * 去除噪点 115 | * @return array $hash_data 去噪之后的数组 116 | * @author mohushou<1@lailin.xyz> 117 | */ 118 | public function removeHotSpots() 119 | { 120 | $remove=[]; 121 | foreach ($this->_tags as $key => $value) { 122 | if ($value_hash_data as $i => $value) { 127 | foreach ($value as $j => $val) { 128 | if (in_array($val, $remove)) { 129 | $this->_hash_data[$i][$j]=0; 130 | } else { 131 | if ($val>self::TAG) { 132 | $this->_hash_data[$i][$j]=1; 133 | } 134 | } 135 | } 136 | } 137 | return $this->_hash_data; 138 | } 139 | 140 | /** 141 | * 添加标记 142 | * 143 | * @return void 144 | * @author mohuishou<1@lailn.xyz> 145 | */ 146 | public function addTag() 147 | { 148 | $data=&$this->_hash_data; 149 | $data=ImageTool::hashTranspose($data); 150 | $tag=self::TAG; 151 | for ($i=0; $i < $this->_len_w; $i++) { 152 | for ($j=0; $j < $this->_len_h; $j++) { 153 | if ($data[$i][$j]>0&&$data[$i][$j]_tags[$tag]=1; 157 | $this->connectPoint($i, $j, $tag); 158 | while (!empty($this->_seeds)) { 159 | list($x,$y)=array_shift($this->_seeds); 160 | $this->connectPoint($x, $y, $tag); 161 | } 162 | } 163 | } 164 | } 165 | $this->_hash_data=ImageTool::hashTranspose($data); 166 | } 167 | 168 | /** 169 | * 查找连通的点 170 | * @param int $i 横坐标 171 | * @param int $j 纵坐标 172 | * @return void 173 | */ 174 | public function connectPoint($i, $j, $tag) 175 | { 176 | if ($i == 0 || $j == 0 || $i == ($this->_len_h - 1) || $j == ($this->_len_w - 1)) { 177 | return true; 178 | } 179 | if ($this->is_connected>0) { 180 | $count=0; 181 | for ($m=-1; $m < 2; $m++) { 182 | for ($n=-1; $n < 2; $n++) { 183 | if ($i+$m<$this->_len_w - 1 && $j+$n < $this->_len_h-1 && $this->_hash_data[$i+$m][$j+$n]>0 && $this->_hash_data[$i+$m][$j+$n]is_connected) { 189 | return; 190 | } 191 | } 192 | 193 | for ($m=-1; $m < 2; $m++) { 194 | for ($n=-1; $n < 2; $n++) { 195 | if ($this->_hash_data[$i+$m][$j+$n]>0&&$this->_hash_data[$i+$m][$j+$n]_hash_data[$i+$m][$j+$n]=$tag; 197 | $this->_tags[$tag]++; 198 | array_push($this->_seeds, [$i+$m,$j+$n]); 199 | } 200 | } 201 | } 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /src/ImageOCR.php: -------------------------------------------------------------------------------- 1 | 5 | * Date: 2017/3/20 6 | * Time: 0:35 7 | */ 8 | 9 | namespace Mohuishou\ImageOCR; 10 | 11 | class ImageOCR 12 | { 13 | 14 | /** 15 | * 标准化字符 16 | * @var array 17 | * @author mohuishou<1@lailin.xyz> 18 | */ 19 | protected $standard_data=[]; 20 | 21 | protected $is_debug = false; 22 | 23 | /** 24 | * @var Image 25 | * @author mohuishou<1@lailin.xyz> 26 | */ 27 | protected $image; 28 | 29 | /** 30 | * 最大灰度值 31 | * @var int 32 | * @author mohuishou<1@lailin.xyz> 33 | */ 34 | protected $max_grey=null; 35 | 36 | /** 37 | * 最小灰度值 38 | * @var int 39 | * @author mohuishou<1@lailin.xyz> 40 | */ 41 | protected $min_grey=null; 42 | 43 | /** 44 | * 灰度化图像 45 | * @var array 46 | * @author mohuishou<1@lailin.xyz>\ 47 | */ 48 | protected $grey_data; 49 | 50 | /** 51 | * 二值化图像 52 | * @var array 53 | * @author mohuishou<1@lailin.xyz> 54 | */ 55 | protected $hash_data; 56 | 57 | /** 58 | * @var ImageConnect 59 | * @author mohuishou<1@lailin.xyz> 60 | */ 61 | protected $image_connect; 62 | 63 | /** 64 | * 背景除噪的三种模式 65 | */ 66 | const MAX_MODEL=1; 67 | const MIN_MODEL=0; 68 | const BG_MODEL=2; 69 | 70 | /** 71 | * 对象初始化,需要分割得到标准化数组 72 | * 在构造函数内,需要设置 $this->standard_data & $this->image 73 | * BaseOCR constructor. 74 | * @param Image $image 75 | */ 76 | public function __construct(Image $image) 77 | { 78 | $this->image=$image; 79 | } 80 | 81 | /** 82 | * 保存输入的图片 83 | * @param string $path 需要保存的图片路径 84 | * @author mohuishou<1@lailin.xyz> 85 | */ 86 | public function saveImage($path) 87 | { 88 | imagepng($this->image->in_img, $path); 89 | } 90 | 91 | /** 92 | * 图片灰度化 93 | * @author mohuishou<1@lailin.xyz> 94 | * @return array 95 | */ 96 | public function grey() 97 | { 98 | return $this->grey_data=$this->image->rgb2grey(); 99 | } 100 | 101 | /** 102 | * 二值化,固定阈值 103 | * @param null|int $max_grey 最大灰度阈值 104 | * @param null|int $min_grey 最小灰度阈值 105 | * @author mohuishou<1@lailin.xyz> 106 | */ 107 | public function hash($max_grey = null, $min_grey = null) 108 | { 109 | $max_grey==null && $max_grey=$this->max_grey; 110 | $min_grey==null && $min_grey=$this->min_grey; 111 | $this->hash_data=$this->image->imageHash($this->grey_data, $max_grey, $min_grey); 112 | } 113 | 114 | /** 115 | * 二值化,背景动态阈值法 116 | * @param int $model 0:最大灰度值模式,1:最小灰度值模式,2:唯一模式 117 | * @param null|int $max_grey 最大灰度阈值 118 | * @param null|int $min_grey 最小灰度阈值 119 | * @author mohuishou<1@lailin.xyz> 120 | * @return array $hash_data 二值化图像数组 121 | * @throws \Exception 122 | */ 123 | public function hashByBackground($model = self::MAX_MODEL, $max_grey = null, $min_grey = null) 124 | { 125 | $bg_grey=$this->image->getBgGrey($this->grey_data); 126 | switch ($model) { 127 | case 0: 128 | $min_grey=$bg_grey; 129 | break; 130 | case 1: 131 | $max_grey=$bg_grey; 132 | break; 133 | case 2: 134 | $max_grey=$min_grey=$bg_grey; 135 | break; 136 | default: 137 | throw new \Exception("请选择正确模式!"); 138 | } 139 | return $this->hash($max_grey, $min_grey); 140 | } 141 | 142 | 143 | /** 144 | * 孤立点除噪法 145 | * @author mohuishou<1@lailin.xyz> 146 | * @throws \Exception 147 | */ 148 | public function removeSpots() 149 | { 150 | $this->checkHashData(); 151 | $this->hash_data=$this->image->removeHotSpots($this->hash_data); 152 | } 153 | 154 | /** 155 | * 连通域去噪 156 | * @author mohuishou<1@lailin.xyz> 157 | */ 158 | public function removeSpotsByConnect() 159 | { 160 | $this->checkImageConnect(); 161 | $this->hash_data=$this->image_connect->removeHotSpots(); 162 | } 163 | 164 | /** 165 | * 连通域分割 166 | * @author mohuishou<1@lailin.xyz> 167 | */ 168 | public function splitByConnect() 169 | { 170 | $this->checkImageConnect(); 171 | $this->standard_data=$this->image_connect->split(); 172 | } 173 | 174 | /** 175 | * 滴水算法分割 176 | * @author mohuishou<1@lailin.xyz> 177 | */ 178 | public function splitByWater() 179 | { 180 | } 181 | 182 | 183 | /** 184 | * 图像标准化 185 | * @return array 标准化的数组对象,从左到右的顺序 186 | * @author mohuishou<1@lailin.xyz> 187 | * @throws \Exception 188 | */ 189 | public function standard() 190 | { 191 | if ($this->standard_data==null) { 192 | $this->is_debug && ImageTool::drawBrowser($this->hash_data); 193 | throw new \Exception("请先获取分割之后的图片", 5); 194 | } 195 | $data=[]; 196 | foreach ($this->standard_data as $item) { 197 | $data[]=$this->image->standard($item); 198 | } 199 | return $this->standard_data=$data; 200 | } 201 | 202 | /** 203 | * 检查ImageConnect是否初始化 204 | * @author mohuishou<1@lailin.xyz> 205 | * @throws \Exception 206 | */ 207 | protected function checkImageConnect() 208 | { 209 | if (!($this->image_connect instanceof ImageConnect)) { 210 | $this->is_debug && ImageTool::drawBrowser($this->hash_data); 211 | throw new \Exception("请先调用setImageConnect初始化ImageConnect类", 4); 212 | } 213 | } 214 | 215 | /** 216 | * 检查图像是否已经二值化 217 | * @author mohuishou<1@lailin.xyz> 218 | * @throws \Exception 219 | */ 220 | protected function checkHashData() 221 | { 222 | if ($this->hash_data==null) { 223 | throw new \Exception("请先将图片二值化", 4); 224 | } 225 | } 226 | 227 | /** 228 | * 初始化ImageConnect 229 | * @author mohuishou<1@lailin.xyz> 230 | */ 231 | public function setImageConnect() 232 | { 233 | $this->checkHashData(); 234 | $this->image_connect=new ImageConnect($this->hash_data); 235 | } 236 | 237 | /** 238 | * 设置image对象 239 | * @param Image $image 240 | * @author mohuishou<1@lailin.xyz> 241 | */ 242 | public function setImage(Image $image) 243 | { 244 | $this->image=$image; 245 | } 246 | 247 | /** 248 | * 获取标准化的二值化数组 249 | * @author mohuishou<1@lailin.xyz> 250 | * @return array 251 | */ 252 | public function getStandardData() 253 | { 254 | return $this->standard_data; 255 | } 256 | 257 | /** 258 | * @param $max_grey 259 | * @author mohuishou<1@lailin.xyz> 260 | * @return mixed 261 | */ 262 | public function setMaxGrey($max_grey) 263 | { 264 | return $this->max_grey=$max_grey; 265 | } 266 | 267 | /** 268 | * @param $min_grey 269 | * @author mohuishou<1@lailin.xyz> 270 | * @return mixed 271 | */ 272 | public function setMinGrey($min_grey) 273 | { 274 | return $this->min_grey=$min_grey; 275 | } 276 | 277 | /** 278 | * @param $standard_width 279 | * @author mohuishou<1@lailin.xyz> 280 | * @return mixed 281 | */ 282 | public function setStandardWidth($standard_width) 283 | { 284 | return $this->image->standard_width=$standard_width; 285 | } 286 | 287 | /** 288 | * @param $standard_height 289 | * @author mohuishou<1@lailin.xyz> 290 | * @return mixed 291 | */ 292 | public function setStandardHeight($standard_height) 293 | { 294 | return $this->image->standard_height=$standard_height; 295 | } 296 | 297 | /** 298 | * 设置debug模式 299 | * @param $debug 300 | * @author mohuishou<1@lailin.xyz> 301 | * @return mixed 302 | */ 303 | public function setDebug($debug = false) 304 | { 305 | $this->is_debug = $debug; 306 | } 307 | } 308 | -------------------------------------------------------------------------------- /src/ImageTool.php: -------------------------------------------------------------------------------- 1 | 5 | * Date: 2017/3/19 6 | * Time: 16:28 7 | */ 8 | 9 | namespace Mohuishou\ImageOCR; 10 | 11 | /** 12 | * Class ImageTool 13 | * 一些工具函数 14 | * @author mohuishou<1@lailin.xyz> 15 | * @package Mohuishou\ImageOCR 16 | */ 17 | class ImageTool{ 18 | 19 | /** 20 | * 去除零行 21 | * @author mohuishou<1@lailin.xyz> 22 | * @param array $data 23 | * @return array $data 去掉零行之后的二值化数组 24 | */ 25 | static public function removeZero($data){ 26 | foreach ($data as $k=>$v) { 27 | if (implode("", $v) == 0) unset($data[$k]); 28 | } 29 | return $data; 30 | } 31 | 32 | /** 33 | * 去除零列 34 | * @param array $hash_data 35 | * @author mohuishou<1@lailin.xyz> 36 | * @return array 37 | */ 38 | static public function removeZeroColumn($hash_data){ 39 | $data=[]; 40 | for ($i=0;$i 58 | */ 59 | static public function drawBrowser($data){ 60 | foreach ($data as $v){ 61 | foreach ($v as $val){ 62 | if($val){ 63 | echo ""; 64 | }else{ 65 | echo ""; 66 | } 67 | } 68 | echo "
"; 69 | } 70 | } 71 | 72 | 73 | static public function transposeAndRemoveZero($hash_data){ 74 | $data=[]; 75 | for ($i=0;$i 88 | * @return array $data 转置之后的数组 89 | */ 90 | static public function hashTranspose($hash_data){ 91 | $data=[]; 92 | for ($i=0;$i 104 | * @return array $hash_data 二值化数组 105 | */ 106 | static public function img2hash($img){ 107 | 108 | //二值化数组初始化 109 | $hash_data=[]; 110 | 111 | //根据资源句柄获取整个图像的高与宽 112 | $img_w=imagesx($img); 113 | $img_h=imagesy($img); 114 | 115 | //图像二值化 116 | for($i = 0; $i <$img_h; $i++) { 117 | for ($j = 0; $j <$img_w; $j++) { 118 | $rgb = imagecolorat($img,$j,$i); 119 | if($rgb==0){ 120 | $hash_data[$i][$j]=1; 121 | }else{ 122 | $hash_data[$i][$j]=0; 123 | } 124 | } 125 | } 126 | 127 | return $hash_data; 128 | } 129 | 130 | /** 131 | * 二值化数组转图像 132 | * @param array $hash_data 二值化数组 133 | * @param int $padding 边距 134 | * @author mohuishou<1@lailin.xyz> 135 | * @return resource 图像的资源句柄 136 | */ 137 | static public function hash2img($hash_data,$padding=0){ 138 | 139 | //计算图片的宽度与高度 140 | $img_w=count($hash_data[0])+2*$padding; 141 | $img_h=count($hash_data)+2*$padding; 142 | 143 | //图像初始化 144 | $img = imagecreatetruecolor($img_w,$img_h);//创建一幅真彩色图像 145 | $white=imagecolorallocate($img, 255, 255, 255);//白色 146 | $black=imagecolorallocate($img, 0, 0, 0);//黑色 147 | 148 | //背景填充为白色 149 | imagefill($img, 0,0, $white); 150 | 151 | //进行画图 152 | foreach ($hash_data as $k=>$v){ 153 | foreach ($v as $key=> $val){ 154 | if ($val){ 155 | imagesetpixel($img, $key+$padding,$k+$padding, $black); 156 | } 157 | } 158 | } 159 | 160 | return $img; 161 | } 162 | } -------------------------------------------------------------------------------- /src/ImageWater.php: -------------------------------------------------------------------------------- 1 | 5 | * Date: 2017/3/22 6 | * Time: 14:32 7 | */ 8 | 9 | namespace Mohuishou\ImageOCR; 10 | 11 | /** 12 | * 滴水算法 13 | * Class ImageWater 14 | * @author mohuishou<1@lailin.xyz> 15 | * @package Mohuishou\ImageOCR 16 | */ 17 | class ImageWater{ 18 | 19 | /** 20 | * 二值化数组 21 | * @var array 22 | * @author mohuishou<1@lailin.xyz> 23 | */ 24 | protected $hash_data=[]; 25 | 26 | /** 27 | * 28 | * @var array 29 | * @author mohuishou<1@lailin.xyz> 30 | */ 31 | protected $data=[]; 32 | 33 | /** 34 | * @var array 35 | * @author mohuishou<1@lailin.xyz> 36 | */ 37 | protected $min_points=[]; 38 | 39 | /** 40 | * 单个字符最大宽度 41 | * @var int 42 | * @author mohuishou<1@lailin.xyz> 43 | */ 44 | protected $max_len; 45 | 46 | /** 47 | * 单个字符最小宽度 48 | * @var int 49 | * @author mohuishou<1@lailin.xyz> 50 | */ 51 | protected $min_len; 52 | 53 | /** 54 | * 单个字符平均宽度 55 | * @var int 56 | * @author mohuishou<1@lailin.xyz> 57 | */ 58 | protected $avg_len; 59 | 60 | /** 61 | * 标记位 62 | * @var int 63 | * @author mohuishou<1@lailin.xyz> 64 | */ 65 | protected $right_flag=0; 66 | 67 | /** 68 | * 分割标记 69 | */ 70 | const Tag=100; 71 | 72 | 73 | /** 74 | * 初始化 75 | * ImageWater constructor. 76 | * @param $hash_data 77 | */ 78 | public function __construct($hash_data) 79 | { 80 | $this->hash_data=$hash_data; 81 | } 82 | 83 | /** 84 | * 滴水算法 85 | * @param array $sp 分割点集合 86 | * @author mohuishou<1@lailin.xyz> 87 | * @return array 88 | */ 89 | public function water($sp){ 90 | $hash_data=$this->hash_data; 91 | $len_w=count(current($hash_data)); 92 | $len_h=count($hash_data); 93 | 94 | //添加分割线 95 | $tag=self::Tag; 96 | foreach ($sp as $j){ 97 | $x=0; 98 | $y=$j; 99 | $hash_data[$x][$y]=$tag; 100 | while ($x<$len_h && $y<$len_w){ 101 | list($x,$y)=$this->getWaterStatus($x,$y); 102 | //避免无限向右滚动 103 | $y=min($j,$y); 104 | $hash_data[$x][$y]=$tag; 105 | } 106 | $tag++; 107 | } 108 | 109 | //分割 110 | $data=[]; 111 | for ($i=self::Tag;$i<=$tag;$i++){ 112 | foreach ($hash_data as $x=>$v){ 113 | $flag=-1; 114 | if ($i==self::Tag){ 115 | $flag=1; 116 | } 117 | foreach ($v as $y=>$val){ 118 | if ($flag>0){ 119 | $data[][$x][$y]=$val; 120 | }else{ 121 | $data[][$x][$y]=0; 122 | } 123 | if ($val==$tag||$val==$tag-1){ 124 | $flag=-$flag; 125 | } 126 | } 127 | } 128 | } 129 | 130 | //格式化 131 | foreach ($data as $k=> $v){ 132 | //去除零列 133 | $data[$k]=ImageTool::removeZeroColumn($v); 134 | 135 | //去除零行 136 | $data[$k]=ImageTool::removeZero($data[$k]); 137 | 138 | //重建索引 139 | $data[$k]=array_values($data[$k]); 140 | } 141 | 142 | return $data; 143 | } 144 | 145 | /** 146 | * 获取水滴状态 147 | * @param $i 148 | * @param $j 149 | * @author mohuishou<1@lailin.xyz> 150 | * @return array 151 | */ 152 | public function getWaterStatus($i,$j){ 153 | $hash_data=$this->hash_data; 154 | $n[1]=$hash_data[$i+1][$j-1]; 155 | $n[2]=$hash_data[$i+1][$j]; 156 | $n[3]=$hash_data[$i+1][$j+1]; 157 | $n[4]=$hash_data[$i][$j+1]; 158 | $n[5]=$hash_data[$i][$j-1]; 159 | 160 | //第一种情况,全黑\全白,下移 161 | $count=0; 162 | foreach ($n as $v){ 163 | if ($v==1){ 164 | $count++; 165 | } 166 | } 167 | if ($count==0||$count==5){ 168 | $this->right_flag=0; 169 | return [$i+1,$j]; 170 | } 171 | 172 | //第二种情况 173 | if ($count==4 && $n[1]==0){ 174 | $this->right_flag=0; 175 | return [$i+1,$j-1]; 176 | } 177 | 178 | //第三种情况 179 | if ($n[1]==1 && $n[2]==0){ 180 | $this->right_flag=0; 181 | return [$i+1,$j]; 182 | } 183 | 184 | //第四种情况 185 | if ($n[1]==1 && $n[2]==1 && $n[3]==0){ 186 | $this->right_flag=0; 187 | return [$i+1,$j+1]; 188 | } 189 | 190 | //第五种情况 191 | if ($n[1]==1 && $n[2]==1 && $n[3]==1 && $n[4]==0){ 192 | $this->right_flag=1; 193 | return [$i,$j+1]; 194 | } 195 | 196 | //第六种情况 197 | if ($count==4 && $n[5]==0){ 198 | //避免左右循环摆动,判断上一次是否右移,如果右移直接向下滴落 199 | if ($this->right_flag){ 200 | $this->right_flag=0; 201 | return [$i+1,$j]; 202 | } 203 | return [$i,$j-1]; 204 | } 205 | 206 | return [$i+1,$j]; 207 | 208 | } 209 | 210 | /** 211 | * 获取字符数目 212 | * @author mohuishou<1@lailin.xyz> 213 | * @return float 214 | */ 215 | public function getStrNum() 216 | { 217 | return round(count($this->hash_data) / $this->avg_len); 218 | } 219 | 220 | /** 221 | * 获取分割点,也是滴水算法的起始滴落点 222 | * @author mohuishou<1@lailin.xyz> 223 | * @return array 224 | */ 225 | public function getSplitPoints(){ 226 | $str_num=$this->getStrNum(); 227 | 228 | $this->setMinPoints(); 229 | 230 | //初始化分割点 231 | $sp=[]; 232 | for ($p=0;$p<$str_num-1;$p++){ 233 | $sp[$p]=0; 234 | } 235 | 236 | //区块结束 237 | $end=count($this->data)-1; 238 | 239 | //循环查找分割点 240 | for ($p=1;$p<$str_num-1;$p++){ 241 | foreach ($this->min_points as $i ){ 242 | $len=abs($i-$sp[$p-1]); 243 | if ($len<=$this->max_len&&$len>=$this->min_len){ 244 | $len=abs($end-$i); 245 | if ($len>=$this->min_len*($str_num-$p)&&$len<=$this->max_len*($str_num-$p)){ 246 | $sp[$p]=$i; 247 | } 248 | } 249 | } 250 | } 251 | 252 | //分割点从$sp[1]开始 253 | unset($sp[0]); 254 | 255 | return $sp; 256 | } 257 | 258 | /** 259 | * 竖直投影统计 260 | * @author mohuishou<1@lailin.xyz> 261 | * @return array 262 | */ 263 | public function projectionStatistics(){ 264 | $data=[]; 265 | //转置 266 | $hash_data=ImageTool::hashTranspose($this->hash_data); 267 | foreach ($hash_data as $v){ 268 | //统计1的值 269 | $tmp=array_count_values($v); 270 | $data[]=$tmp[1]; 271 | } 272 | $this->data=$data; 273 | } 274 | 275 | /** 276 | * 设置极小值点 277 | * @author mohuishou<1@lailin.xyz> 278 | */ 279 | public function setMinPoints(){ 280 | 281 | //先获取竖直投影统计 282 | $this->projectionStatistics(); 283 | 284 | //一头一尾不需要计算 285 | for ($i=1;$idata)-1;$i++){ 286 | $res=$this->isMinPoint($i); 287 | if (is_array($res)){ 288 | $this->min_points[]=$res; 289 | $i=$res[1]; 290 | }else if (is_bool($res)){ 291 | if ($res){ 292 | $this->min_points[]=$i; 293 | } 294 | } 295 | } 296 | } 297 | 298 | /** 299 | * 判断极小值点 300 | * @param $i 301 | * @author mohuishou<1@lailin.xyz> 302 | * @return array|bool 303 | */ 304 | public function isMinPoint($i){ 305 | if ($i==0){ 306 | return false; 307 | } 308 | if ($this->H($i)>0&&$this->H($i-1)<0){ 309 | return true; 310 | } 311 | if ($this->H($i-1)<0){ 312 | if ($this->H($i)==0){ 313 | $j=$i; 314 | while ($this->H($j)==0){ 315 | $j++; 316 | } 317 | if ($this->H($j)>0){ 318 | return [$i,$j]; 319 | } 320 | return false; 321 | } 322 | } 323 | return false; 324 | } 325 | 326 | /** 327 | * @param $i 328 | * @author mohuishou<1@lailin.xyz> 329 | * @return mixed 330 | * @throws \Exception 331 | */ 332 | protected function H($i){ 333 | if ($i==count($this->data)-1){ 334 | throw new \Exception("最后一点无需计算"); 335 | } 336 | return $this->data[$i+1]-$this->data[$i]; 337 | } 338 | } --------------------------------------------------------------------------------