├── .github └── workflows │ └── main.yml ├── .gitignore ├── LICENSE ├── README.md ├── setup.py └── winocr.py /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build-n-publish: 7 | name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@main 11 | - name: Set up Python 3.10 12 | uses: actions/setup-python@v3 13 | with: 14 | python-version: "3.10" 15 | - name: Install pypa/build 16 | run: python -m pip install build --user 17 | - name: Build a binary wheel and a source tarball 18 | run: python -m build 19 | - name: Publish a Python distribution to PyPI 20 | uses: pypa/gh-action-pypi-publish@release/v1 21 | with: 22 | password: ${{ secrets.PYPI_API_TOKEN }} 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Distribution / packaging 2 | build/ 3 | dist/ 4 | *.egg-info/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Tomofumi Inoue 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WinOCR 2 | [![Python](https://img.shields.io/pypi/pyversions/winocr.svg)](https://badge.fury.io/py/winocr) 3 | [![PyPI](https://badge.fury.io/py/winocr.svg)](https://badge.fury.io/py/winocr) 4 | 5 | # Installation 6 | ```powershell 7 | pip install winocr 8 | ``` 9 | 10 |
11 | Full install 12 | 13 | ```powershell 14 | pip install winocr[all] 15 | ``` 16 |
17 | 18 | # Usage 19 | 20 | ## Pillow 21 | 22 | The language to be recognized can be specified by the lang parameter (second argument). 23 | 24 | ```python 25 | import winocr 26 | from PIL import Image 27 | 28 | img = Image.open('test.jpg') 29 | (await winocr.recognize_pil(img, 'ja')).text 30 | ``` 31 | ![](https://camo.githubusercontent.com/4e68db4fc3106c03e9919eb4391ce7548c1321429f9dc1a95a6937f51f01d5f6/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f363337383562393633666135643637653966326265316163396534393533353739663463323538342f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663333333733303631333533343633333832643632333533363631326433353333363233383264333636363332333532643336333433333333363333313336333033383338363636313265373036653637) 32 | 33 | ## OpenCV 34 | 35 | ```python 36 | import winocr 37 | import cv2 38 | 39 | img = cv2.imread('test.jpg') 40 | (await winocr.recognize_cv2(img, 'ja')).text 41 | ``` 42 | ![](https://camo.githubusercontent.com/fbbc81dd9fb138032625585dd3cd41a4b14b14621be77c11a15ea8949a3cc8a3/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f616439313337366536316230653332613234336664633932613435383665383763386636383362612f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663635333833303331333336333338333632643631333833333332326433393338333736333264333533373633333832643331333533383335333633353636333433313330333433323265373036653637) 43 | 44 | ## Connect to local runtime on Colaboratory 45 | 46 | Create a local connection by following [these instructions](https://research.google.com/colaboratory/local-runtimes.html). 47 | 48 | ```powershell 49 | pip install jupyterlab jupyter_http_over_ws 50 | jupyter serverextension enable --py jupyter_http_over_ws 51 | jupyter notebook --NotebookApp.allow_origin='https://colab.research.google.com' --ip=0.0.0.0 --port=8888 --NotebookApp.port_retries=0 52 | ``` 53 | 54 | ![](https://i.imgur.com/gvj959U.png) 55 | 56 | ![](https://i.imgur.com/o9e0Fwk.png) 57 | 58 | Also available on Jupyter / Jupyter Lab. 59 | 60 | ## REPL 61 | 62 | ```python 63 | import cv2 64 | from winocr import recognize_cv2_sync 65 | 66 | img = cv2.imread('testocr.png') 67 | recognize_cv2_sync(img)['text'] 68 | 'This is a lot of 12 point text to test the ocr code and see if it works on all types of file format. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox.' 69 | ``` 70 | 71 | ```python 72 | from PIL import Image 73 | from winocr import recognize_pil_sync 74 | 75 | img = Image.open('testocr.png') 76 | recognize_pil_sync(img)['text'] 77 | 'This is a lot of 12 point text to test the ocr code and see if it works on all types of file format. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox.' 78 | ``` 79 | 80 | ## Multi-Processing 81 | 82 | ```python 83 | from PIL import Image 84 | import concurrent.futures 85 | from winocr import recognize_pil_sync 86 | 87 | images = [Image.open('testocr.png') for i in range(1000)] 88 | 89 | with concurrent.futures.ProcessPoolExecutor() as executor: 90 | results = list(executor.map(recognize_pil_sync, images)) 91 | print(results) 92 | ``` 93 | 94 | ## Web API 95 | 96 | Run server 97 | ```powershell 98 | pip install winocr[api] 99 | winocr_serve 100 | ``` 101 | 102 | ### curl 103 | 104 | ```bash 105 | curl localhost:8000?lang=ja --data-binary @test.jpg 106 | ``` 107 | ![](https://camo.githubusercontent.com/658ff5e7ff505281fc464f642579ab8dac1a7e9120a0345c0eeaf0f46995c404/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f396463623138383330656665343832643962626231633861393064383032303566373131313265642f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663636363433313338333433353636363332643635333633343337326433303634333736363264333533333336363532643336333436353333363333303332363336333338363133313265373036653637) 108 | 109 | ### Python 110 | 111 | ```python 112 | import requests 113 | 114 | bytes = open('test.jpg', 'rb').read() 115 | requests.post('http://localhost:8000/?lang=ja', bytes).json()['text'] 116 | ``` 117 | 118 | ![](https://camo.githubusercontent.com/fb338aadf3f057e14c4b6474f4802b6958f9264aff634fdf22d7d5b321747bd5/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f303438353362653766613263333839623339323161653461303938663165343161626162316136372f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663634333733313632333333353331333632643332363436363635326436313634333933313264363236363631333532643636333433373636333736323338333636333332363336333265373036653637) 119 | 120 | You can run OCR with the Colaboratory runtime with `./ngrok http 8000` 121 | 122 | ```python 123 | from PIL import Image 124 | from io import BytesIO 125 | 126 | img = Image.open('test.jpg') 127 | # Preprocessing 128 | buf = BytesIO() 129 | img.save(buf, format='JPEG') 130 | requests.post('https://15a5fabf0d78.ngrok.io/?lang=ja', buf.getvalue()).json()['text'] 131 | ``` 132 | ![](https://camo.githubusercontent.com/61adc7eb41c54bedfd19ab3ce2e55dd7b0c865a22c0ab787439296a0afc75d7a/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f656538343938663932656566303336333262623064336162623236646531323639393730393030632f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663333333933303634333933353339333732643331363433353335326433353634333136333264333833353332333032643331333536333334363133383331333133383634363436343265373036653637) 133 | 134 | ```python 135 | import cv2 136 | import requests 137 | 138 | img = cv2.imread('test.jpg') 139 | # Preprocessing 140 | requests.post('https://15a5fabf0d78.ngrok.io/?lang=ja', cv2.imencode('.jpg', img)[1].tobytes()).json()['text'] 141 | ``` 142 | ![](https://camo.githubusercontent.com/a303dc95a4df7dbef67143a983b7792172b3d1b1837b0be7e7fa3c8a92b728d7/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f653566346530626630353338623835316464643532353837393630306137313261336365393738612f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663339363133333634363133353334363132643336333636343336326433393631333833323264333733303338363632643634363433343337363333323338333233313339333033303265373036653637) 143 | 144 | ### JavaScript 145 | 146 | If you only need to recognize Chrome and English, you can also consider the Text Detection API. 147 | 148 | ```javascript 149 | // File 150 | const file = document.querySelector('[type=file]').files[0] 151 | await fetch('http://localhost:8000/', {method: 'POST', body: file}).then(r => r.json()) 152 | 153 | // Blob 154 | const blob = await fetch('https://image.itmedia.co.jp/ait/articles/1706/15/news015_16.jpg').then(r=>r.blob()) 155 | await fetch('http://localhost:8000/?lang=ja', {method: 'POST', body: blob}).then(r => r.json()) 156 | ``` 157 | 158 | It is also possible to run OCR Server on Windows Server. 159 | 160 | # Information that can be obtained 161 | You can get **angle**, **text**, **line**, **word**, **BoundingBox**. 162 | 163 | ```python 164 | import pprint 165 | 166 | result = await winocr.recognize_pil(img, 'ja') 167 | pprint.pprint({ 168 | 'text_angle': result.text_angle, 169 | 'text': result.text, 170 | 'lines': [{ 171 | 'text': line.text, 172 | 'words': [{ 173 | 'bounding_rect': {'x': word.bounding_rect.x, 'y': word.bounding_rect.y, 'width': word.bounding_rect.width, 'height': word.bounding_rect.height}, 174 | 'text': word.text 175 | } for word in line.words] 176 | } for line in result.lines] 177 | }) 178 | ``` 179 | ![](https://camo.githubusercontent.com/c0715ad500369e6b1b498293335bd8844e38baee7ead335a7047128947f0b9b6/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f636561393234303738393733346663323734383663363265666563373936623633393764376433352f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663633363633353334333736323331333132643331333033383634326436333633333533333264363533383633333332643331333636363333333736353634333233383631363333353265373036653637) 180 | 181 | # Language installation 182 | ```powershell 183 | # Run as Administrator 184 | Add-WindowsCapability -Online -Name "Language.OCR~~~en-US~0.0.1.0" 185 | Add-WindowsCapability -Online -Name "Language.OCR~~~ja-JP~0.0.1.0" 186 | 187 | # Search for installed languages 188 | Get-WindowsCapability -Online -Name "Language.OCR*" 189 | # State: Not Present language is not installed, so please install it if necessary. 190 | Name : Language.OCR~~~hu-HU~0.0.1.0 191 | State : NotPresent 192 | DisplayName : ハンガリー語の光学式文字認識 193 | Description : ハンガリー語の光学式文字認識 194 | DownloadSize : 194407 195 | InstallSize : 535714 196 | 197 | Name : Language.OCR~~~it-IT~0.0.1.0 198 | State : NotPresent 199 | DisplayName : イタリア語の光学式文字認識 200 | Description : イタリア語の光学式文字認識 201 | DownloadSize : 159875 202 | InstallSize : 485922 203 | 204 | Name : Language.OCR~~~ja-JP~0.0.1.0 205 | State : Installed 206 | DisplayName : 日本語の光学式文字認識 207 | Description : 日本語の光学式文字認識 208 | DownloadSize : 1524589 209 | InstallSize : 3398536 210 | 211 | Name : Language.OCR~~~ko-KR~0.0.1.0 212 | State : NotPresent 213 | DisplayName : 韓国語の光学式文字認識 214 | Description : 韓国語の光学式文字認識 215 | DownloadSize : 3405683 216 | InstallSize : 7890408 217 | ``` 218 | 219 | If you hate Python and just want to recognize it with PowerShell, click [here](https://gist.github.com/GitHub30/8bc1e784148e4f9801520c7e7ba191ea) 220 | 221 | # Multi-Processing 222 | 223 | By processing in parallel, it is 3 times faster. You can make it even faster by increasing the number of cores! 224 | 225 | ```python 226 | from PIL import Image 227 | 228 | images = [Image.open('testocr.png') for i in range(1000)] 229 | ``` 230 | 231 | ### 1 core(elapsed 48s) 232 | 233 | The CPU is not used up. 234 | ![](https://camo.githubusercontent.com/a9003bdc7db7d8c0524fd8f9ef2394eac4a7ad68ba618954f518ed81a12738e8/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f633963393931656231343733313337383636666238363933656231643462656637623661646466632f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663636363133323633333236363335333232643339363633383336326436343334333533323264363433323633333732643631363233333633333036353330363136363338333736343265373036653637) 235 | 236 | ```python 237 | import winocr 238 | 239 | [(await winocr.recognize_pil(img)).text for img in images] 240 | ``` 241 | ![](https://camo.githubusercontent.com/5e965ce96d5b3fdb5220c619ceb1597d09fea8d34df5f3a7a0b5388a8286a034/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f356261623862393830666565333764363632663733383933646632613463306234623439346464312f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663635363536353336363336333332333032643337363636333335326433373634363233373264333833343634363232643633363136353631363533323634363536363631333933393265373036653637) 242 | 243 | ### 4 cores(elapsed 16s) 244 | 245 | I'm using 100% CPU. 246 | 247 | ![](https://camo.githubusercontent.com/9bc7fc8bbf5c1e5cc9a89e4fb2233900867b6f79019ee530fe36e5d36c896ad9/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f323732326136303261313930616335653534646637313634623965336366373134636234386434322f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663337363336353635363236363331363532643636333233323636326433353330363533353264333933363335363132643334333033323636363636333337333833363334333536323265373036653637) 248 | 249 | Create a worker module. 250 | ```python 251 | %%writefile worker.py 252 | import winocr 253 | import asyncio 254 | 255 | async def ensure_coroutine(awaitable): 256 | return await awaitable 257 | 258 | def recognize_pil_text(img): 259 | return asyncio.run(ensure_coroutine(winocr.recognize_pil(img))).text 260 | ``` 261 | 262 | ```python 263 | import worker 264 | import concurrent.futures 265 | 266 | with concurrent.futures.ProcessPoolExecutor() as executor: 267 | # https://stackoverflow.com/questions/62488423 268 | results = executor.map(worker.recognize_pil_text, images) 269 | list(results) 270 | ``` 271 | 272 | ![](https://camo.githubusercontent.com/cd21e01dd05a064986c764e0b86aa98f3b25ad3b346ff5bdfee3d1dd7dbae132/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f653137323531336435386531306339616436646464313438656562373865316263313132663632342f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663631333733313336333733353337333132643631363133353634326436333632333133353264363136343631333132643631333236343332333033303635333533383635363233383265373036653637) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | api = ["Pillow", "fastapi", "uvicorn"] 4 | cv2 = ["opencv-python"] 5 | all = api + cv2 6 | 7 | setuptools.setup( 8 | name="winocr", 9 | version="0.0.15", 10 | author="Tomofumi Inoue", 11 | author_email="funaox@gmail.com", 12 | description="Windows.Media.Ocr", 13 | long_description=open("README.md", encoding="utf-8").read(), 14 | long_description_content_type="text/markdown", 15 | url="https://github.com/GitHub30/winocr", 16 | project_urls={"Bug Tracker": "https://github.com/GitHub30/winocr/issues",}, 17 | classifiers=[ 18 | "Programming Language :: Python :: 3", 19 | "License :: OSI Approved :: MIT License", 20 | "Operating System :: Microsoft :: Windows :: Windows 10", 21 | ], 22 | install_requires=[ 23 | "winrt-windows-foundation-collections", 24 | "winrt-windows-foundation", 25 | "winrt-windows-globalization", 26 | "winrt-windows-graphics-imaging", 27 | "winrt-windows-media-ocr", 28 | "winrt-windows-storage-streams", 29 | ], 30 | extras_require={"all": all, "api": api, "cv2": cv2}, 31 | py_modules=["winocr"], 32 | entry_points={"console_scripts": ["winocr_serve = winocr:serve"]}, 33 | ) 34 | # Publish commands 35 | # https://packaging.python.org/tutorials/packaging-projects/ 36 | # pip install --upgrade pip build twine 37 | # python -m build 38 | # python -m twine upload dist/* 39 | 40 | -------------------------------------------------------------------------------- /winocr.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from winrt.windows.media.ocr import OcrEngine 3 | from winrt.windows.globalization import Language 4 | from winrt.windows.storage.streams import DataWriter 5 | from winrt.windows.graphics.imaging import SoftwareBitmap, BitmapPixelFormat 6 | 7 | def recognize_bytes(bytes, width, height, lang='en'): 8 | cmd = 'Add-WindowsCapability -Online -Name "Language.OCR~~~en-US~0.0.1.0"' 9 | assert OcrEngine.is_language_supported(Language(lang)), cmd 10 | writer = DataWriter() 11 | writer.write_bytes(bytes) 12 | sb = SoftwareBitmap.create_copy_from_buffer(writer.detach_buffer(), BitmapPixelFormat.RGBA8, width, height) 13 | return OcrEngine.try_create_from_language(Language(lang)).recognize_async(sb) 14 | 15 | def recognize_pil(img, lang='en'): 16 | if img.mode != 'RGBA': 17 | img = img.convert('RGBA') 18 | return recognize_bytes(img.tobytes(), img.width, img.height, lang) 19 | 20 | def recognize_cv2(img, lang='en'): 21 | import cv2 22 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA) 23 | return recognize_bytes(img.tobytes(), img.shape[1], img.shape[0], lang) 24 | 25 | def picklify(o): 26 | if hasattr(o, 'size'): 27 | return [picklify(e) for e in o] 28 | elif hasattr(o, '__module__'): 29 | return dict([(n, picklify(getattr(o, n))) for n in dir(o) if not n.startswith('_')]) 30 | else: 31 | return o 32 | 33 | async def to_coroutine(awaitable): 34 | return await awaitable 35 | 36 | def recognize_pil_sync(img, lang='en'): 37 | return picklify(asyncio.run(to_coroutine(recognize_pil(img, lang)))) 38 | 39 | def recognize_cv2_sync(img, lang='en'): 40 | return picklify(asyncio.run(to_coroutine(recognize_cv2(img, lang)))) 41 | 42 | def serve(): 43 | import json 44 | import uvicorn 45 | from PIL import Image 46 | from io import BytesIO 47 | from fastapi import FastAPI, Request, Response 48 | from fastapi.middleware.cors import CORSMiddleware 49 | 50 | app = FastAPI() 51 | app.add_middleware(CORSMiddleware, allow_origins=['*'], allow_credentials=True, allow_methods=['*'], allow_headers=['*']) 52 | @app.post('/') 53 | async def recognize(request: Request, lang: str = 'en'): 54 | result = await recognize_pil(Image.open(BytesIO(await request.body())), lang) 55 | return Response(json.dumps(picklify(result), indent=2, ensure_ascii=False), media_type='application/json') 56 | uvicorn.run(app, host='0.0.0.0') 57 | 58 | if __name__ == '__main__': 59 | serve() 60 | --------------------------------------------------------------------------------