├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── LICENSE
├── README.md
├── setup.py
└── winocr.py


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python 🐍 distributions 📦 to PyPI
 2 | 
 3 | on: push
 4 | 
 5 | jobs:
 6 |   build-n-publish:
 7 |     name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - uses: actions/checkout@main
11 |     - name: Set up Python 3.10
12 |       uses: actions/setup-python@v3
13 |       with:
14 |         python-version: "3.10"
15 |     - name: Install pypa/build
16 |       run: python -m pip install build --user
17 |     - name: Build a binary wheel and a source tarball
18 |       run: python -m build
19 |     - name: Publish a Python distribution to PyPI
20 |       uses: pypa/gh-action-pypi-publish@release/v1
21 |       with:
22 |         password: ${{ secrets.PYPI_API_TOKEN }}
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Distribution / packaging
2 | build/
3 | dist/
4 | *.egg-info/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Tomofumi Inoue
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # WinOCR
  2 | [![Python](https://img.shields.io/pypi/pyversions/winocr.svg)](https://badge.fury.io/py/winocr)
  3 | [![PyPI](https://badge.fury.io/py/winocr.svg)](https://badge.fury.io/py/winocr)
  4 | 
  5 | # Installation
  6 | ```powershell
  7 | pip install winocr
  8 | ```
  9 | 
 10 | <details>
 11 |   <summary>Full install</summary>
 12 |   
 13 |   ```powershell
 14 |   pip install winocr[all]
 15 |   ```
 16 | </details>
 17 | 
 18 | # Usage
 19 | 
 20 | ## Pillow
 21 | 
 22 | The language to be recognized can be specified by the lang parameter (second argument).
 23 | 
 24 | ```python
 25 | import winocr
 26 | from PIL import Image
 27 | 
 28 | img = Image.open('test.jpg')
 29 | (await winocr.recognize_pil(img, 'ja')).text
 30 | ```
 31 | ![](https://camo.githubusercontent.com/4e68db4fc3106c03e9919eb4391ce7548c1321429f9dc1a95a6937f51f01d5f6/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f363337383562393633666135643637653966326265316163396534393533353739663463323538342f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663333333733303631333533343633333832643632333533363631326433353333363233383264333636363332333532643336333433333333363333313336333033383338363636313265373036653637)
 32 | 
 33 | ## OpenCV
 34 | 
 35 | ```python
 36 | import winocr
 37 | import cv2
 38 | 
 39 | img = cv2.imread('test.jpg')
 40 | (await winocr.recognize_cv2(img, 'ja')).text
 41 | ```
 42 | ![](https://camo.githubusercontent.com/fbbc81dd9fb138032625585dd3cd41a4b14b14621be77c11a15ea8949a3cc8a3/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f616439313337366536316230653332613234336664633932613435383665383763386636383362612f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663635333833303331333336333338333632643631333833333332326433393338333736333264333533373633333832643331333533383335333633353636333433313330333433323265373036653637)
 43 | 
 44 | ## Connect to local runtime on Colaboratory
 45 | 
 46 | Create a local connection by following [these instructions](https://research.google.com/colaboratory/local-runtimes.html).
 47 | 
 48 | ```powershell
 49 | pip install jupyterlab jupyter_http_over_ws
 50 | jupyter serverextension enable --py jupyter_http_over_ws
 51 | jupyter notebook --NotebookApp.allow_origin='https://colab.research.google.com' --ip=0.0.0.0 --port=8888 --NotebookApp.port_retries=0
 52 | ```
 53 | 
 54 | ![](https://i.imgur.com/gvj959U.png)
 55 | 
 56 | ![](https://i.imgur.com/o9e0Fwk.png)
 57 | 
 58 | Also available on Jupyter / Jupyter Lab.
 59 | 
 60 | ## REPL
 61 | 
 62 | ```python
 63 | import cv2
 64 | from winocr import recognize_cv2_sync
 65 | 
 66 | img = cv2.imread('testocr.png')
 67 | recognize_cv2_sync(img)['text']
 68 | 'This is a lot of 12 point text to test the ocr code and see if it works on all types of file format. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox.'
 69 | ```
 70 | 
 71 | ```python
 72 | from PIL import Image
 73 | from winocr import recognize_pil_sync
 74 | 
 75 | img = Image.open('testocr.png')
 76 | recognize_pil_sync(img)['text']
 77 | 'This is a lot of 12 point text to test the ocr code and see if it works on all types of file format. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox.'
 78 | ```
 79 | 
 80 | ## Multi-Processing
 81 | 
 82 | ```python
 83 | from PIL import Image
 84 | import concurrent.futures
 85 | from winocr import recognize_pil_sync
 86 | 
 87 | images = [Image.open('testocr.png') for i in range(1000)]
 88 | 
 89 | with concurrent.futures.ProcessPoolExecutor() as executor:
 90 |   results = list(executor.map(recognize_pil_sync, images))
 91 | print(results)
 92 | ```
 93 | 
 94 | ## Web API
 95 | 
 96 | Run server
 97 | ```powershell
 98 | pip install winocr[api]
 99 | winocr_serve
100 | ```
101 | 
102 | ### curl
103 | 
104 | ```bash
105 | curl localhost:8000?lang=ja --data-binary @test.jpg
106 | ```
107 | ![](https://camo.githubusercontent.com/658ff5e7ff505281fc464f642579ab8dac1a7e9120a0345c0eeaf0f46995c404/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f396463623138383330656665343832643962626231633861393064383032303566373131313265642f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663636363433313338333433353636363332643635333633343337326433303634333736363264333533333336363532643336333436353333363333303332363336333338363133313265373036653637)
108 | 
109 | ### Python
110 | 
111 | ```python
112 | import requests
113 | 
114 | bytes = open('test.jpg', 'rb').read()
115 | requests.post('http://localhost:8000/?lang=ja', bytes).json()['text']
116 | ```
117 | 
118 | ![](https://camo.githubusercontent.com/fb338aadf3f057e14c4b6474f4802b6958f9264aff634fdf22d7d5b321747bd5/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f303438353362653766613263333839623339323161653461303938663165343161626162316136372f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663634333733313632333333353331333632643332363436363635326436313634333933313264363236363631333532643636333433373636333736323338333636333332363336333265373036653637)
119 | 
120 | You can run OCR with the Colaboratory runtime with `./ngrok http 8000`
121 | 
122 | ```python
123 | from PIL import Image
124 | from io import BytesIO
125 | 
126 | img = Image.open('test.jpg')
127 | # Preprocessing
128 | buf = BytesIO()
129 | img.save(buf, format='JPEG')
130 | requests.post('https://15a5fabf0d78.ngrok.io/?lang=ja', buf.getvalue()).json()['text']
131 | ```
132 | ![](https://camo.githubusercontent.com/61adc7eb41c54bedfd19ab3ce2e55dd7b0c865a22c0ab787439296a0afc75d7a/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f656538343938663932656566303336333262623064336162623236646531323639393730393030632f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663333333933303634333933353339333732643331363433353335326433353634333136333264333833353332333032643331333536333334363133383331333133383634363436343265373036653637)
133 | 
134 | ```python
135 | import cv2
136 | import requests
137 | 
138 | img = cv2.imread('test.jpg')
139 | # Preprocessing
140 | requests.post('https://15a5fabf0d78.ngrok.io/?lang=ja', cv2.imencode('.jpg', img)[1].tobytes()).json()['text']
141 | ```
142 | ![](https://camo.githubusercontent.com/a303dc95a4df7dbef67143a983b7792172b3d1b1837b0be7e7fa3c8a92b728d7/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f653566346530626630353338623835316464643532353837393630306137313261336365393738612f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663339363133333634363133353334363132643336333636343336326433393631333833323264333733303338363632643634363433343337363333323338333233313339333033303265373036653637)
143 | 
144 | ### JavaScript
145 | 
146 | If you only need to recognize Chrome and English, you can also consider the Text Detection API.
147 | 
148 | ```javascript
149 | // File
150 | const file = document.querySelector('[type=file]').files[0]
151 | await fetch('http://localhost:8000/', {method: 'POST', body: file}).then(r => r.json())
152 | 
153 | // Blob
154 | const blob = await fetch('https://image.itmedia.co.jp/ait/articles/1706/15/news015_16.jpg').then(r=>r.blob())
155 | await fetch('http://localhost:8000/?lang=ja', {method: 'POST', body: blob}).then(r => r.json())
156 | ```
157 | 
158 | It is also possible to run OCR Server on Windows Server.
159 | 
160 | # Information that can be obtained
161 | You can get **angle**, **text**, **line**, **word**, **BoundingBox**.
162 | 
163 | ```python
164 | import pprint
165 | 
166 | result = await winocr.recognize_pil(img, 'ja')
167 | pprint.pprint({
168 |     'text_angle': result.text_angle,
169 |     'text': result.text,
170 |     'lines': [{
171 |         'text': line.text,
172 |         'words': [{
173 |             'bounding_rect': {'x': word.bounding_rect.x, 'y': word.bounding_rect.y, 'width': word.bounding_rect.width, 'height': word.bounding_rect.height},
174 |             'text': word.text
175 |         } for word in line.words]
176 |     } for line in result.lines]
177 | })
178 | ```
179 | ![](https://camo.githubusercontent.com/c0715ad500369e6b1b498293335bd8844e38baee7ead335a7047128947f0b9b6/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f636561393234303738393733346663323734383663363265666563373936623633393764376433352f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663633363633353334333736323331333132643331333033383634326436333633333533333264363533383633333332643331333636363333333736353634333233383631363333353265373036653637)
180 | 
181 | # Language installation
182 | ```powershell
183 | # Run as Administrator
184 | Add-WindowsCapability -Online -Name "Language.OCR~~~en-US~0.0.1.0"
185 | Add-WindowsCapability -Online -Name "Language.OCR~~~ja-JP~0.0.1.0"
186 | 
187 | # Search for installed languages
188 | Get-WindowsCapability -Online -Name "Language.OCR*"
189 | # State: Not Present language is not installed, so please install it if necessary.
190 | Name         : Language.OCR~~~hu-HU~0.0.1.0
191 | State        : NotPresent
192 | DisplayName  : ハンガリー語の光学式文字認識
193 | Description  : ハンガリー語の光学式文字認識
194 | DownloadSize : 194407
195 | InstallSize  : 535714
196 | 
197 | Name         : Language.OCR~~~it-IT~0.0.1.0
198 | State        : NotPresent
199 | DisplayName  : イタリア語の光学式文字認識
200 | Description  : イタリア語の光学式文字認識
201 | DownloadSize : 159875
202 | InstallSize  : 485922
203 | 
204 | Name         : Language.OCR~~~ja-JP~0.0.1.0
205 | State        : Installed
206 | DisplayName  : 日本語の光学式文字認識
207 | Description  : 日本語の光学式文字認識
208 | DownloadSize : 1524589
209 | InstallSize  : 3398536
210 | 
211 | Name         : Language.OCR~~~ko-KR~0.0.1.0
212 | State        : NotPresent
213 | DisplayName  : 韓国語の光学式文字認識
214 | Description  : 韓国語の光学式文字認識
215 | DownloadSize : 3405683
216 | InstallSize  : 7890408
217 | ```
218 | 
219 | If you hate Python and just want to recognize it with PowerShell, click [here](https://gist.github.com/GitHub30/8bc1e784148e4f9801520c7e7ba191ea)
220 | 
221 | # Multi-Processing
222 | 
223 | By processing in parallel, it is 3 times faster. You can make it even faster by increasing the number of cores!
224 | 
225 | ```python
226 | from PIL import Image
227 | 
228 | images = [Image.open('testocr.png') for i in range(1000)]
229 | ```
230 | 
231 | ### 1 core(elapsed 48s)
232 | 
233 | The CPU is not used up.
234 | ![](https://camo.githubusercontent.com/a9003bdc7db7d8c0524fd8f9ef2394eac4a7ad68ba618954f518ed81a12738e8/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f633963393931656231343733313337383636666238363933656231643462656637623661646466632f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663636363133323633333236363335333232643339363633383336326436343334333533323264363433323633333732643631363233333633333036353330363136363338333736343265373036653637)
235 | 
236 | ```python
237 | import winocr
238 | 
239 | [(await winocr.recognize_pil(img)).text for img in images]
240 | ```
241 | ![](https://camo.githubusercontent.com/5e965ce96d5b3fdb5220c619ceb1597d09fea8d34df5f3a7a0b5388a8286a034/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f356261623862393830666565333764363632663733383933646632613463306234623439346464312f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663635363536353336363336333332333032643337363636333335326433373634363233373264333833343634363232643633363136353631363533323634363536363631333933393265373036653637)
242 | 
243 | ### 4 cores(elapsed 16s)
244 | 
245 | I'm using 100% CPU.
246 | 
247 | ![](https://camo.githubusercontent.com/9bc7fc8bbf5c1e5cc9a89e4fb2233900867b6f79019ee530fe36e5d36c896ad9/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f323732326136303261313930616335653534646637313634623965336366373134636234386434322f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663337363336353635363236363331363532643636333233323636326433353330363533353264333933363335363132643334333033323636363636333337333833363334333536323265373036653637)
248 | 
249 | Create a worker module.
250 | ```python
251 | %%writefile worker.py
252 | import winocr
253 | import asyncio
254 | 
255 | async def ensure_coroutine(awaitable):
256 |     return await awaitable
257 | 
258 | def recognize_pil_text(img):
259 |     return asyncio.run(ensure_coroutine(winocr.recognize_pil(img))).text
260 | ```
261 | 
262 | ```python
263 | import worker
264 | import concurrent.futures
265 | 
266 | with concurrent.futures.ProcessPoolExecutor() as executor:
267 |   # https://stackoverflow.com/questions/62488423
268 |   results = executor.map(worker.recognize_pil_text, images)
269 | list(results)
270 | ```
271 | 
272 | ![](https://camo.githubusercontent.com/cd21e01dd05a064986c764e0b86aa98f3b25ad3b346ff5bdfee3d1dd7dbae132/68747470733a2f2f63616d6f2e716969746175736572636f6e74656e742e636f6d2f653137323531336435386531306339616436646464313438656562373865316263313132663632342f36383734373437303733336132663266373136393639373436313264363936643631363736353264373337343666373236353265373333333265363137303264366536663732373436383635363137333734326433313265363136643631376136663665363137373733326536333666366432663330326633323330333833333336333332663631333733313336333733353337333132643631363133353634326436333632333133353264363136343631333132643631333236343332333033303635333533383635363233383265373036653637)


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | api = ["Pillow", "fastapi", "uvicorn"]
 4 | cv2 = ["opencv-python"]
 5 | all = api + cv2
 6 | 
 7 | setuptools.setup(
 8 |     name="winocr",
 9 |     version="0.0.15",
10 |     author="Tomofumi Inoue",
11 |     author_email="funaox@gmail.com",
12 |     description="Windows.Media.Ocr",
13 |     long_description=open("README.md", encoding="utf-8").read(),
14 |     long_description_content_type="text/markdown",
15 |     url="https://github.com/GitHub30/winocr",
16 |     project_urls={"Bug Tracker": "https://github.com/GitHub30/winocr/issues",},
17 |     classifiers=[
18 |         "Programming Language :: Python :: 3",
19 |         "License :: OSI Approved :: MIT License",
20 |         "Operating System :: Microsoft :: Windows :: Windows 10",
21 |     ],
22 |     install_requires=[
23 |         "winrt-windows-foundation-collections",
24 |         "winrt-windows-foundation",
25 |         "winrt-windows-globalization",
26 |         "winrt-windows-graphics-imaging",
27 |         "winrt-windows-media-ocr",
28 |         "winrt-windows-storage-streams",
29 |     ],
30 |     extras_require={"all": all, "api": api, "cv2": cv2},
31 |     py_modules=["winocr"],
32 |     entry_points={"console_scripts": ["winocr_serve = winocr:serve"]},
33 | )
34 | # Publish commands
35 | # https://packaging.python.org/tutorials/packaging-projects/
36 | # pip install --upgrade pip build twine
37 | # python -m build
38 | # python -m twine upload dist/*
39 | 
40 | 


--------------------------------------------------------------------------------
/winocr.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from winrt.windows.media.ocr import OcrEngine
 3 | from winrt.windows.globalization import Language
 4 | from winrt.windows.storage.streams import DataWriter
 5 | from winrt.windows.graphics.imaging import SoftwareBitmap, BitmapPixelFormat
 6 | 
 7 | def recognize_bytes(bytes, width, height, lang='en'):
 8 |     cmd = 'Add-WindowsCapability -Online -Name "Language.OCR~~~en-US~0.0.1.0"'
 9 |     assert OcrEngine.is_language_supported(Language(lang)), cmd
10 |     writer = DataWriter()
11 |     writer.write_bytes(bytes)
12 |     sb = SoftwareBitmap.create_copy_from_buffer(writer.detach_buffer(), BitmapPixelFormat.RGBA8, width, height)
13 |     return OcrEngine.try_create_from_language(Language(lang)).recognize_async(sb)
14 | 
15 | def recognize_pil(img, lang='en'):
16 |     if img.mode != 'RGBA':
17 |         img = img.convert('RGBA')
18 |     return recognize_bytes(img.tobytes(), img.width, img.height, lang)
19 | 
20 | def recognize_cv2(img, lang='en'):
21 |     import cv2
22 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
23 |     return recognize_bytes(img.tobytes(), img.shape[1], img.shape[0], lang)
24 | 
25 | def picklify(o):
26 |     if hasattr(o, 'size'):
27 |         return [picklify(e) for e in o]
28 |     elif hasattr(o, '__module__'):
29 |         return dict([(n, picklify(getattr(o, n))) for n in dir(o) if not n.startswith('_')])
30 |     else:
31 |         return o
32 | 
33 | async def to_coroutine(awaitable):
34 |     return await awaitable
35 | 
36 | def recognize_pil_sync(img, lang='en'):
37 |     return picklify(asyncio.run(to_coroutine(recognize_pil(img, lang))))
38 | 
39 | def recognize_cv2_sync(img, lang='en'):
40 |     return picklify(asyncio.run(to_coroutine(recognize_cv2(img, lang))))
41 | 
42 | def serve():
43 |     import json
44 |     import uvicorn
45 |     from PIL import Image
46 |     from io import BytesIO
47 |     from fastapi import FastAPI, Request, Response
48 |     from fastapi.middleware.cors import CORSMiddleware
49 | 
50 |     app = FastAPI()
51 |     app.add_middleware(CORSMiddleware, allow_origins=['*'], allow_credentials=True, allow_methods=['*'], allow_headers=['*'])
52 |     @app.post('/')
53 |     async def recognize(request: Request, lang: str = 'en'):
54 |         result = await recognize_pil(Image.open(BytesIO(await request.body())), lang)
55 |         return Response(json.dumps(picklify(result), indent=2, ensure_ascii=False), media_type='application/json')
56 |     uvicorn.run(app, host='0.0.0.0')
57 | 
58 | if __name__ == '__main__':
59 |     serve()
60 | 


--------------------------------------------------------------------------------