├── .codacy.yml ├── .coveragerc ├── .deepsource.toml ├── .env ├── .eslintrc ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md ├── config.yml └── stale.yml ├── .gitignore ├── .travis.yml ├── .travis ├── .dockerignore ├── .env ├── Dockerfile.alpine ├── Dockerfile.debian ├── Dockerfile.debian-slim ├── Dockerfile.test ├── docker-compose.mariadb.yml ├── docker-compose.postgres.yml ├── docker-nginx.conf └── docker_test.sh ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── Dockerfile.test ├── LICENSE ├── MANIFEST.in ├── README.md ├── docker-compose.yml ├── docs ├── README.md ├── _config.yml ├── assets │ └── .gitkeep └── user │ └── .gitkeep ├── erpnext_ocr ├── __init__.py ├── config │ ├── __init__.py │ ├── desktop.py │ ├── docs.py │ └── erpnext_ocr.py ├── docs │ ├── __init__.py │ ├── assets │ │ └── .gitkeep │ ├── index.md │ └── user │ │ └── .gitkeep ├── erpnext_ocr │ ├── __init__.py │ └── doctype │ │ ├── __init__.py │ │ ├── ocr_import │ │ ├── __init__.py │ │ ├── ocr_import.js │ │ ├── ocr_import.json │ │ ├── ocr_import.py │ │ ├── test_ocr_import.js │ │ ├── test_ocr_import.py │ │ └── test_records.json │ │ ├── ocr_import_mapping │ │ ├── __init__.py │ │ ├── ocr_import_mapping.json │ │ └── ocr_import_mapping.py │ │ ├── ocr_language │ │ ├── __init__.py │ │ ├── ocr_language.js │ │ ├── ocr_language.json │ │ ├── ocr_language.py │ │ ├── test_ocr_language.js │ │ └── test_ocr_language.py │ │ ├── ocr_read │ │ ├── __init__.py │ │ ├── ocr_read.js │ │ ├── ocr_read.json │ │ ├── ocr_read.py │ │ ├── test_ocr_read.js │ │ └── test_ocr_read.py │ │ └── ocr_settings │ │ ├── __init__.py │ │ ├── ocr_settings.js │ │ ├── ocr_settings.json │ │ ├── ocr_settings.py │ │ ├── test_ocr_settings.js │ │ └── test_ocr_settings.py ├── fixtures │ └── ocr_language.json ├── hooks.py ├── install.py ├── modules.txt ├── patches.txt ├── public │ ├── css │ │ └── treeview.min.css.map │ └── js │ │ └── treeview.min.js.map ├── templates │ ├── __init__.py │ └── pages │ │ ├── __init__.py │ │ └── __pycache__ │ │ └── __init__.py ├── tests │ ├── README.md │ ├── __init__.py │ ├── test_config_desktop.py │ ├── test_config_docs.py │ ├── test_data │ │ ├── Picture_010.png │ │ ├── Picture_010.tif │ │ ├── Picture_010_output.txt │ │ ├── Picture_010_screenshot.png │ │ ├── item.pdf │ │ ├── sample1.jpg │ │ ├── sample1_output.txt │ │ └── sample2.pdf │ ├── test_spell_checker.py │ └── test_tesseract.py └── translations │ ├── en.csv │ ├── fr.csv │ └── ru.csv ├── license.txt ├── manage.sh ├── package-lock.json ├── package.json ├── requirements.txt └── setup.py /.codacy.yml: -------------------------------------------------------------------------------- 1 | exclude_paths: 2 | - '**.sql' 3 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | omit = 3 | */python?.?/* 4 | */site-packages/nose/* 5 | -------------------------------------------------------------------------------- /.deepsource.toml: -------------------------------------------------------------------------------- 1 | version = 1 2 | 3 | test_patterns = [ 4 | "**/test_*.py" 5 | ] 6 | 7 | exclude_patterns = [ 8 | "erpnext_ocr/patches/**", 9 | "*.min.js" 10 | ] 11 | 12 | [[analyzers]] 13 | name = "python" 14 | enabled = true 15 | 16 | [analyzers.meta] 17 | runtime_version = "3.x.x" 18 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | 2 | ######################################## 3 | # ERPNext configuration 4 | ######################################## 5 | 6 | ERPNEXT_HOME=/srv/erpnext/frappe 7 | 8 | ERPNEXT_SITE=localhost 9 | 10 | ERPNEXT_ADMIN_PWD=erpnext_admin_password 11 | 12 | # Generated with openssl rand -base64 32 13 | ERPNEXT_ENCRYPTION_KEY=dcNdIKUHX/Vgl1sEc0eJIChyYx+2dQ/uASjWXs9hnic= 14 | 15 | ERPNEXT_DB_ROOT_LOGIN=root 16 | ERPNEXT_DB_ROOT_PWD=erpnext_db_root_password 17 | 18 | # DB name will be used as the DB user 19 | ERPNEXT_DB_NAME=erpnext 20 | ERPNEXT_DB_PWD=erpnext_password 21 | 22 | # Local development configuration 23 | IMAGE_NAME=docker-erpnext:erpnext_ocr-dev 24 | FRAPPE_APP_TO_TEST=erpnext_ocr 25 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "globals": { 3 | "frappe": true, 4 | "__": true, 5 | "_p": true, 6 | "_f": true, 7 | "repl": true, 8 | "Class": true, 9 | "locals": true, 10 | "cint": true, 11 | "cstr": true, 12 | "cur_frm": true, 13 | "cur_dialog": true, 14 | "cur_page": true, 15 | "cur_list": true, 16 | "cur_tree": true, 17 | "msg_dialog": true, 18 | "is_null": true, 19 | "in_list": true, 20 | "has_common": true, 21 | "has_words": true, 22 | "validate_email": true, 23 | "get_number_format": true, 24 | "format_number": true, 25 | "format_currency": true, 26 | "comment_when": true, 27 | "open_url_post": true, 28 | "toTitle": true, 29 | "lstrip": true, 30 | "strip": true, 31 | "strip_html": true, 32 | "replace_all": true, 33 | "flt": true, 34 | "precision": true, 35 | "CREATE": true, 36 | "AMEND": true, 37 | "CANCEL": true, 38 | "copy_dict": true, 39 | "get_number_format_info": true, 40 | "print_table": true, 41 | "Layout": true, 42 | "web_form_settings": true, 43 | "$c": true, 44 | "$a": true, 45 | "$i": true, 46 | "$bg": true, 47 | "$y": true, 48 | "$c_obj": true, 49 | "refresh_many": true, 50 | "refresh_field": true, 51 | "toggle_field": true, 52 | "get_field_obj": true, 53 | "get_query_params": true, 54 | "unhide_field": true, 55 | "hide_field": true, 56 | "set_field_options": true, 57 | "getCookie": true, 58 | "getCookies": true, 59 | "get_url_arg": true, 60 | "md5": true, 61 | "$": true, 62 | "jQuery": true, 63 | "moment": true, 64 | "hljs": true, 65 | "Awesomplete": true, 66 | "Sortable": true, 67 | "Showdown": true, 68 | "Taggle": true, 69 | "Gantt": true, 70 | "Slick": true, 71 | "Webcam": true, 72 | "PhotoSwipe": true, 73 | "PhotoSwipeUI_Default": true, 74 | "fluxify": true, 75 | "io": true, 76 | "QUnit": true, 77 | "JsBarcode": true, 78 | "L": true, 79 | "Chart": true, 80 | "DataTable": true 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: Bug report 4 | about: Create a report to help us improve 5 | title: '' 6 | labels: bug 7 | assignees: 8 | 9 | --- 10 | 11 | **Describe the bug** 12 | A clear and concise description of what the bug is. 13 | 14 | **To Reproduce** 15 | Steps to reproduce the behavior: 16 | 1. Go to '...' 17 | 2. Click on '....' 18 | 3. Scroll down to '....' 19 | 4. See error 20 | 21 | Provide Docker commands or docker-compose file if possible. 22 | 23 | **Expected behavior** 24 | A clear and concise description of what you expected to happen. 25 | 26 | **Screenshots** 27 | If applicable, add screenshots to help explain your problem. 28 | 29 | **Desktop (please complete the following information):** 30 | 31 | - OS: (e.g. iOS) 32 | - Browser (e.g. chrome, safari) 33 | - Version (e.g. 22) 34 | 35 | **Smartphone (please complete the following information):** 36 | 37 | - Device: (e.g. iPhone6) 38 | - OS: (e.g. iOS8.1) 39 | - Browser (e.g. stock browser, safari) 40 | - Version (e.g. 22) 41 | 42 | **Additional context** 43 | Add any other context about the problem here. 44 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | name: Feature request 4 | about: Suggest an idea for this project 5 | title: '' 6 | labels: enhancement 7 | assignees: 8 | 9 | --- 10 | 11 | **Is your feature request related to a problem? Please describe.** 12 | A clear and concise description of what the problem is. Ex. I'm always frustrated when (...) 13 | 14 | **Describe the solution you'd like** 15 | A clear and concise description of what you want to happen. 16 | 17 | **Describe alternatives you've considered** 18 | A clear and concise description of any alternative solutions or features you've considered. 19 | 20 | **Additional context** 21 | Add any other context or screenshots about the feature request here. 22 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | > Please provide enough information so that others can review your pull request: 20 | 21 | 22 | 23 | > Explain the **details** for making this change. What existing problem does the pull request solve? 24 | 25 | 26 | 27 | > Screenshots/GIFs 28 | 29 | 30 | -------------------------------------------------------------------------------- /.github/config.yml: -------------------------------------------------------------------------------- 1 | # Configuration for new-issue-welcome - https://github.com/behaviorbot/new-issue-welcome 2 | 3 | # Comment to be posted to on first time issues 4 | newIssueWelcomeComment: > 5 | Thanks for opening your first issue here! Be sure to follow the issue template! 6 | 7 | # Configuration for new-pr-welcome - https://github.com/behaviorbot/new-pr-welcome 8 | 9 | # Comment to be posted to on PRs from first time contributors in your repository 10 | newPRWelcomeComment: > 11 | Thanks for opening this pull request! Please check out our contributing guidelines. 12 | 13 | # Configuration for first-pr-merge - https://github.com/behaviorbot/first-pr-merge 14 | 15 | # Comment to be posted to on pull requests merged by a first time user 16 | firstPRMergeComment: > 17 | :tada: Congrats on merging your first pull request! We here at behaviorbot are proud of you! 18 | 19 | # It is recommend to include as many gifs and emojis as possible 20 | 21 | # Configuration for request-info - https://github.com/behaviorbot/request-info 22 | 23 | # *Required* Comment to reply with 24 | requestInfoReplyComment: > 25 | We would appreciate it if you could provide us with more info about this issue/pr! 26 | 27 | # *OPTIONAL* default titles to check against for lack of descriptiveness 28 | # MUST BE ALL LOWERCASE 29 | requestInfoDefaultTitles: 30 | - update readme.md 31 | - updates 32 | 33 | 34 | # *OPTIONAL* Label to be added to Issues and Pull Requests with insufficient information given 35 | requestInfoLabelToAdd: needs-more-info 36 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Configuration for probot-stale - https://github.com/probot/stale 2 | 3 | # Number of days of inactivity before an issue becomes stale 4 | daysUntilStale: 60 5 | # Number of days of inactivity before a stale issue is closed 6 | daysUntilClose: 30 7 | 8 | # Issues with these labels will never be considered stale 9 | exemptLabels: 10 | - pinned 11 | - security 12 | 13 | # Label to use when marking an issue as stale 14 | staleLabel: wontfix 15 | 16 | # Comment to post when marking an issue as stale. Set to `false` to disable 17 | markComment: > 18 | This issue has been automatically marked as stale because it has not had 19 | recent activity. It will be closed if no further activity occurs. Thank you 20 | for your contributions. 21 | 22 | # Comment to post when closing a stale issue. Set to `false` to disable 23 | closeComment: false 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.py~ 3 | *.comp.js 4 | *.DS_Store 5 | Thumbs.db 6 | locale 7 | .wnf-lang-status 8 | *.swp 9 | *.egg-info 10 | dist/ 11 | build/ 12 | erpnext_ocr/docs/current 13 | .vscode 14 | .idea/ 15 | *.iml 16 | node_modules 17 | .kdev4/ 18 | *.kdev4 19 | *debug.log 20 | 21 | # Byte-compiled / optimized / DLL files 22 | __pycache__/ 23 | *.py[cod] 24 | *$py.class 25 | 26 | # C extensions 27 | *.so 28 | 29 | # Distribution / packaging 30 | .Python 31 | build/ 32 | develop-eggs/ 33 | dist/ 34 | downloads/ 35 | eggs/ 36 | .eggs/ 37 | lib/ 38 | lib64/ 39 | parts/ 40 | sdist/ 41 | var/ 42 | wheels/ 43 | *.egg-info/ 44 | .installed.cfg 45 | *.egg 46 | MANIFEST 47 | 48 | # PyInstaller 49 | # Usually these files are written by a python script from a template 50 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 51 | *.manifest 52 | *.spec 53 | 54 | # Installer logs 55 | pip-log.txt 56 | pip-delete-this-directory.txt 57 | 58 | # Unit test / coverage reports 59 | htmlcov/ 60 | .tox/ 61 | .coverage 62 | .coverage.* 63 | .cache 64 | nosetests.xml 65 | coverage.xml 66 | *.cover 67 | .hypothesis/ 68 | .pytest_cache/ 69 | 70 | # Codacy coverage 71 | .codacy-coverage 72 | get.sh 73 | 74 | # Translations 75 | *.mo 76 | *.pot 77 | 78 | # Django stuff: 79 | *.log 80 | .static_storage/ 81 | .media/ 82 | local_settings.py 83 | 84 | # Flask stuff: 85 | instance/ 86 | .webassets-cache 87 | 88 | # Scrapy stuff: 89 | .scrapy 90 | 91 | # Sphinx documentation 92 | docs/_build/ 93 | 94 | # PyBuilder 95 | target/ 96 | 97 | # Jupyter Notebook 98 | .ipynb_checkpoints 99 | 100 | # pyenv 101 | .python-version 102 | 103 | # celery beat schedule file 104 | celerybeat-schedule 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | #.env 111 | .venv 112 | env/ 113 | venv/ 114 | ENV/ 115 | env.bak/ 116 | venv.bak/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | 131 | # Logs 132 | logs 133 | *.log 134 | npm-debug.log* 135 | yarn-debug.log* 136 | yarn-error.log* 137 | 138 | # Runtime data 139 | pids 140 | *.pid 141 | *.seed 142 | *.pid.lock 143 | 144 | # Directory for instrumented libs generated by jscoverage/JSCover 145 | lib-cov 146 | 147 | # Coverage directory used by tools like istanbul 148 | coverage 149 | 150 | # nyc test coverage 151 | .nyc_output 152 | 153 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 154 | .grunt 155 | 156 | # Bower dependency directory (https://bower.io/) 157 | bower_components 158 | 159 | # node-waf configuration 160 | .lock-wscript 161 | 162 | # Compiled binary addons (https://nodejs.org/api/addons.html) 163 | build/Release 164 | 165 | # Dependency directories 166 | node_modules/ 167 | jspm_packages/ 168 | 169 | # Typescript v1 declaration files 170 | typings/ 171 | 172 | # Optional npm cache directory 173 | .npm 174 | 175 | # Optional eslint cache 176 | .eslintcache 177 | 178 | # Optional REPL history 179 | .node_repl_history 180 | 181 | # Output of 'npm pack' 182 | *.tgz 183 | 184 | # Yarn Integrity file 185 | .yarn-integrity 186 | 187 | # dotenv environment variables file 188 | #.env 189 | 190 | # next.js build output 191 | .next 192 | 193 | # Frappe 194 | public/ 195 | services/ 196 | 197 | erpnext_ocr/www/ 198 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: linux 2 | dist: bionic 3 | 4 | services: docker 5 | 6 | language: shell 7 | 8 | branches: 9 | only: 10 | - master 11 | 12 | before_install: 13 | - env | sort 14 | - export TAG=travis 15 | - export VCS_REF=`git rev-parse --short HEAD` 16 | - export BUILD_DATE=`date -u +"%Y-%m-%dT%H:%M:%SZ"` 17 | - export home=$(pwd) 18 | - export travis_dir="${home}/.travis" 19 | - export IMAGE_NAME="docker-erpnext_ocr:${TAG}" 20 | - export BUILD_BRANCH=${TRAVIS_PULL_REQUEST_BRANCH:-${TRAVIS_BRANCH}} 21 | - export BUILD_URL=https://github.com/${TRAVIS_PULL_REQUEST_SLUG:-${TRAVIS_REPO_SLUG}} 22 | 23 | install: 24 | - echo "Changing to travis test directory ($travis_dir)" 25 | - cd "$travis_dir" 26 | # Prepare base image for build version and variant 27 | - sed -i -e "s/%%VERSION%%/${VERSION}/g" Dockerfile.${VARIANT} 28 | - sed -i -e "s/%%IMAGE_NAME%%/${IMAGE_NAME}/g" Dockerfile.test 29 | # Test container build 30 | - travis_retry travis_wait 60 docker-compose -f docker-compose.${DATABASE}.yml build 31 | 32 | before_script: 33 | - docker images 34 | 35 | script: 36 | - echo "Changing to travis test directory ($travis_dir)" 37 | - cd "$travis_dir" 38 | # Test container run 39 | - travis_retry docker-compose -f docker-compose.${DATABASE}.yml up -d && sleep 60 40 | - docker-compose -f docker-compose.${DATABASE}.yml ps 41 | - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_db" 42 | - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_db" | grep "Up" || exit 1 43 | - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_app" 44 | - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_app" | grep "Up" || exit 1 45 | - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_web" 46 | - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_web" | grep "Up" || exit 1 47 | - echo 'Wait until sites and apps database installed (9-10 minutes)' && travis_wait 15 sleep 720 48 | - docker-compose -f docker-compose.${DATABASE}.yml ps 49 | - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_db" 50 | - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_db" | grep "Up" 51 | - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_app" 52 | - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_app" | grep "Up" 53 | - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_web" 54 | - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_web" | grep "Up" 55 | - echo 'Wait until test finished (5 minutes)' && sleep 300 56 | - docker-compose -f docker-compose.${DATABASE}.yml logs "sut" 57 | - docker-compose -f docker-compose.${DATABASE}.yml ps "sut" | grep "Exit 0" 58 | # Test container restart 59 | - docker-compose -f docker-compose.${DATABASE}.yml down 60 | - travis_retry docker-compose -f docker-compose.${DATABASE}.yml up -d && sleep 60 61 | - docker-compose -f docker-compose.${DATABASE}.yml ps 62 | - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_db" 63 | - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_db" | grep "Up" 64 | - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_app" 65 | - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_app" | grep "Up" 66 | - docker-compose -f docker-compose.${DATABASE}.yml logs "erpnext_web" 67 | - docker-compose -f docker-compose.${DATABASE}.yml ps "erpnext_web" | grep "Up" 68 | - echo 'Wait until test finished (5 minutes)' && sleep 300 69 | - docker-compose -f docker-compose.${DATABASE}.yml logs "sut" 70 | - docker-compose -f docker-compose.${DATABASE}.yml ps "sut" | grep "Exit 0" 71 | 72 | after_script: 73 | - echo "Changing to project directory ($home)" 74 | - cd "$home" 75 | - sudo pip install coverage==4.5.4 76 | - sudo pip install python-coveralls 77 | - curl https://deepsource.io/cli | sudo sh 78 | # Create same directory path as docker test execution 79 | - sudo mkdir -p '/home/frappe/frappe-bench/apps' 80 | - sudo ln -sf "$home" '/home/frappe/frappe-bench/apps/erpnext_ocr' 81 | # Copy and convert coverage to XML format 82 | - cp '/srv/erpnext/frappe/sites/.coverage' .coverage 83 | - coverage xml 84 | - coverage report -m 85 | # Publish coverage to Coveralls 86 | - coveralls -b "$home" -d "$home/.coverage" 87 | # Publish coverage to Codacy 88 | - test -z "$CODACY_PROJECT_TOKEN" || bash <(curl -Ls https://coverage.codacy.com/get.sh) report -l Python -r "./coverage.xml" 89 | # Publish coverage to Codecov 90 | - test -z "$CODECOV_TOKEN" || bash <(curl -s https://codecov.io/bash) 91 | # Publish coverage to DeepSource 92 | - export DEEPSOURCE_DSN=https://sampledsn@deepsource.io 93 | - ./bin/deepsource report --analyzer test-coverage --key python --value-file ./coverage.xml 94 | 95 | notifications: 96 | email: false 97 | 98 | jobs: 99 | allow_failures: 100 | - env: VERSION=develop VARIANT=alpine DATABASE=mariadb 101 | - env: VERSION=develop VARIANT=alpine DATABASE=postgres 102 | - env: VERSION=develop VARIANT=debian DATABASE=mariadb 103 | - env: VERSION=develop VARIANT=debian DATABASE=postgres 104 | - env: VERSION=develop VARIANT=debian-slim DATABASE=mariadb 105 | - env: VERSION=develop VARIANT=debian-slim DATABASE=postgres 106 | - env: VERSION=13 VARIANT=alpine DATABASE=mariadb 107 | - env: VERSION=13 VARIANT=alpine DATABASE=postgres 108 | - env: VERSION=13 VARIANT=debian DATABASE=mariadb 109 | - env: VERSION=13 VARIANT=debian DATABASE=postgres 110 | - env: VERSION=13 VARIANT=debian-slim DATABASE=mariadb 111 | - env: VERSION=13 VARIANT=debian-slim DATABASE=postgres 112 | - env: VERSION=12 VARIANT=alpine DATABASE=postgres 113 | - env: VERSION=12 VARIANT=debian DATABASE=postgres 114 | - env: VERSION=12 VARIANT=debian-slim DATABASE=postgres 115 | # Stop supporting Frappe 10 and Python 2 116 | - env: VERSION=10 VARIANT=alpine DATABASE=mariadb 117 | - env: VERSION=10 VARIANT=debian DATABASE=mariadb 118 | - env: VERSION=10 VARIANT=debian-slim DATABASE=mariadb 119 | 120 | env: # Environments 121 | - VERSION=develop VARIANT=alpine DATABASE=mariadb 122 | - VERSION=develop VARIANT=alpine DATABASE=postgres 123 | - VERSION=develop VARIANT=debian DATABASE=mariadb 124 | - VERSION=develop VARIANT=debian DATABASE=postgres 125 | - VERSION=develop VARIANT=debian-slim DATABASE=mariadb 126 | - VERSION=develop VARIANT=debian-slim DATABASE=postgres 127 | - VERSION=10 VARIANT=alpine DATABASE=mariadb 128 | - VERSION=10 VARIANT=debian DATABASE=mariadb 129 | - VERSION=10 VARIANT=debian-slim DATABASE=mariadb 130 | - VERSION=11 VARIANT=alpine DATABASE=mariadb 131 | - VERSION=11 VARIANT=debian DATABASE=mariadb 132 | - VERSION=11 VARIANT=debian-slim DATABASE=mariadb 133 | - VERSION=12 VARIANT=alpine DATABASE=mariadb 134 | - VERSION=12 VARIANT=alpine DATABASE=postgres 135 | - VERSION=12 VARIANT=debian DATABASE=mariadb 136 | - VERSION=12 VARIANT=debian DATABASE=postgres 137 | - VERSION=12 VARIANT=debian-slim DATABASE=mariadb 138 | - VERSION=12 VARIANT=debian-slim DATABASE=postgres 139 | - VERSION=13 VARIANT=alpine DATABASE=mariadb 140 | - VERSION=13 VARIANT=alpine DATABASE=postgres 141 | - VERSION=13 VARIANT=debian DATABASE=mariadb 142 | - VERSION=13 VARIANT=debian DATABASE=postgres 143 | - VERSION=13 VARIANT=debian-slim DATABASE=mariadb 144 | - VERSION=13 VARIANT=debian-slim DATABASE=postgres 145 | -------------------------------------------------------------------------------- /.travis/.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore files during docker build 2 | docker-compose*.yml 3 | .env 4 | Dockerfile* 5 | .travis.yml 6 | -------------------------------------------------------------------------------- /.travis/.env: -------------------------------------------------------------------------------- 1 | 2 | ######################################## 3 | # ERPNext configuration 4 | ######################################## 5 | 6 | ERPNEXT_HOME=/srv/erpnext/frappe 7 | 8 | ERPNEXT_SITE=localhost 9 | 10 | ERPNEXT_ADMIN_PWD=erpnext_admin_password 11 | 12 | # Generated with openssl rand -base64 32 13 | ERPNEXT_ENCRYPTION_KEY=dcNdIKUHX/Vgl1sEc0eJIChyYx+2dQ/uASjWXs9hnic= 14 | 15 | ERPNEXT_DB_ROOT_LOGIN=root 16 | ERPNEXT_DB_ROOT_PWD=erpnext_db_root_password 17 | 18 | # DB name will be used as the DB user 19 | ERPNEXT_DB_NAME=erpnext 20 | ERPNEXT_DB_PWD=erpnext_password 21 | 22 | # Tessdata links 23 | TESSDATA_BEST=https://github.com/tesseract-ocr/tessdata_best/blob/master/ara.traineddata?raw=true 24 | TESSDATA_FAST=https://github.com/tesseract-ocr/tessdata_fast/blob/master/ara.traineddata?raw=true 25 | TESSDATA_PATH=/usr/share/tesseract-ocr/tessdata -------------------------------------------------------------------------------- /.travis/Dockerfile.alpine: -------------------------------------------------------------------------------- 1 | FROM monogramm/docker-erpnext:%%VERSION%%-alpine 2 | 3 | RUN set -ex; \ 4 | sudo apk add --update \ 5 | chromium \ 6 | chromium-chromedriver \ 7 | ; 8 | 9 | # Build environment variables 10 | ENV DOCKER_TAG=travis \ 11 | DOCKER_VCS_REF=${TRAVIS_COMMIT} \ 12 | DOCKER_BUILD_DATE=${TRAVIS_BUILD_NUMBER} \ 13 | TESSDATA_PREFIX=/home/$FRAPPE_USER/tessdata \ 14 | LANG=C.UTF-8 \ 15 | LC_ALL=C 16 | 17 | # Copy the whole repository to app folder for manual install 18 | #COPY --chown=frappe:frappe . "/home/$FRAPPE_USER"/frappe-bench/apps/erpnext_ocr 19 | 20 | ARG BUILD_BRANCH 21 | ARG BUILD_URL 22 | 23 | RUN set -ex; \ 24 | sudo apk add --update \ 25 | ghostscript \ 26 | imagemagick \ 27 | imagemagick-dev \ 28 | tesseract-ocr \ 29 | tesseract-ocr-dev \ 30 | leptonica \ 31 | pkgconfig \ 32 | ; \ 33 | mkdir -p $TESSDATA_PREFIX; \ 34 | sudo chown -R $FRAPPE_USER:$FRAPPE_USER $TESSDATA_PREFIX ; \ 35 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/eng.traineddata -O $TESSDATA_PREFIX/eng.traineddata; \ 36 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/equ.traineddata -O $TESSDATA_PREFIX/equ.traineddata; \ 37 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/osd.traineddata -O $TESSDATA_PREFIX/osd.traineddata; \ 38 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/fra.traineddata -O $TESSDATA_PREFIX/fra.traineddata; \ 39 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/deu.traineddata -O $TESSDATA_PREFIX/deu.traineddata; \ 40 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/spa.traineddata -O $TESSDATA_PREFIX/spa.traineddata; \ 41 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/por.traineddata -O $TESSDATA_PREFIX/por.traineddata; \ 42 | sudo chmod -R 755 $TESSDATA_PREFIX ; \ 43 | sudo sed -i \ 44 | -e 's/rights="none" pattern="PDF"/rights="read" pattern="PDF"/g' \ 45 | /etc/ImageMagick*/policy.xml \ 46 | ; \ 47 | sudo mkdir -p "/home/$FRAPPE_USER"/frappe-bench/logs; \ 48 | sudo touch "/home/$FRAPPE_USER"/frappe-bench/logs/bench.log; \ 49 | sudo chmod 777 \ 50 | "/home/$FRAPPE_USER"/frappe-bench/logs \ 51 | "/home/$FRAPPE_USER"/frappe-bench/logs/* \ 52 | ; \ 53 | bench get-app --branch ${BUILD_BRANCH} ${BUILD_URL} 54 | -------------------------------------------------------------------------------- /.travis/Dockerfile.debian: -------------------------------------------------------------------------------- 1 | FROM monogramm/docker-erpnext:%%VERSION%%-debian 2 | 3 | # Install Google Chrome & Chrome WebDriver for UI tests 4 | RUN set -ex; \ 5 | sudo apt-get update -q; \ 6 | sudo apt-get install -y --no-install-recommends \ 7 | unzip \ 8 | ; \ 9 | CHROMEDRIVER_VERSION=`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`; \ 10 | sudo mkdir -p /opt/chromedriver-$CHROMEDRIVER_VERSION; \ 11 | sudo curl -sS -o /tmp/chromedriver_linux64.zip http://chromedriver.storage.googleapis.com/$CHROMEDRIVER_VERSION/chromedriver_linux64.zip; \ 12 | sudo unzip -qq /tmp/chromedriver_linux64.zip -d /opt/chromedriver-$CHROMEDRIVER_VERSION; \ 13 | sudo rm /tmp/chromedriver_linux64.zip; \ 14 | sudo chmod +x /opt/chromedriver-$CHROMEDRIVER_VERSION/chromedriver; \ 15 | sudo ln -fs /opt/chromedriver-$CHROMEDRIVER_VERSION/chromedriver /usr/local/bin/chromedriver; \ 16 | export PATH="$PATH;/usr/local/bin/chromedriver" 17 | 18 | # Build environment variables 19 | ENV DOCKER_TAG=travis \ 20 | DOCKER_VCS_REF=${TRAVIS_COMMIT} \ 21 | DOCKER_BUILD_DATE=${TRAVIS_BUILD_NUMBER} \ 22 | TESSDATA_PREFIX=/home/$FRAPPE_USER/tessdata \ 23 | LC_ALL=C 24 | 25 | # Copy the whole repository to app folder for manual install 26 | #COPY --chown=frappe:frappe . "/home/$FRAPPE_USER"/frappe-bench/apps/erpnext_ocr 27 | 28 | ARG BUILD_BRANCH 29 | ARG BUILD_URL 30 | 31 | RUN set -ex; \ 32 | sudo apt-get update -q; \ 33 | sudo apt-get install -y --no-install-recommends \ 34 | ghostscript \ 35 | imagemagick \ 36 | libmagickwand-dev \ 37 | tesseract-ocr \ 38 | libtesseract-dev \ 39 | libleptonica-dev \ 40 | pkg-config \ 41 | ; \ 42 | sudo rm -rf /var/lib/apt/lists/*; \ 43 | mkdir -p $TESSDATA_PREFIX; \ 44 | sudo chown -R $FRAPPE_USER:$FRAPPE_USER $TESSDATA_PREFIX ; \ 45 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/eng.traineddata -O $TESSDATA_PREFIX/eng.traineddata; \ 46 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/equ.traineddata -O $TESSDATA_PREFIX/equ.traineddata; \ 47 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/osd.traineddata -O $TESSDATA_PREFIX/osd.traineddata; \ 48 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/fra.traineddata -O $TESSDATA_PREFIX/fra.traineddata; \ 49 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/deu.traineddata -O $TESSDATA_PREFIX/deu.traineddata; \ 50 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/spa.traineddata -O $TESSDATA_PREFIX/spa.traineddata; \ 51 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/por.traineddata -O $TESSDATA_PREFIX/por.traineddata; \ 52 | sudo chmod -R 755 $TESSDATA_PREFIX ; \ 53 | sudo sed -i \ 54 | -e 's/rights="none" pattern="PDF"/rights="read" pattern="PDF"/g' \ 55 | /etc/ImageMagick*/policy.xml \ 56 | ; \ 57 | sudo mkdir -p "/home/$FRAPPE_USER"/frappe-bench/logs; \ 58 | sudo touch "/home/$FRAPPE_USER"/frappe-bench/logs/bench.log; \ 59 | sudo chmod 777 \ 60 | "/home/$FRAPPE_USER"/frappe-bench/logs \ 61 | "/home/$FRAPPE_USER"/frappe-bench/logs/* \ 62 | ; \ 63 | bench get-app --branch ${BUILD_BRANCH} ${BUILD_URL} 64 | -------------------------------------------------------------------------------- /.travis/Dockerfile.debian-slim: -------------------------------------------------------------------------------- 1 | FROM monogramm/docker-erpnext:%%VERSION%%-debian-slim 2 | 3 | # Install Google Chrome & Chrome WebDriver for UI tests 4 | RUN set -ex; \ 5 | sudo apt-get update -q; \ 6 | sudo apt-get install -y --no-install-recommends \ 7 | iputils-ping \ 8 | unzip \ 9 | ; \ 10 | CHROMEDRIVER_VERSION=`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`; \ 11 | sudo mkdir -p /opt/chromedriver-$CHROMEDRIVER_VERSION; \ 12 | sudo curl -sS -o /tmp/chromedriver_linux64.zip http://chromedriver.storage.googleapis.com/$CHROMEDRIVER_VERSION/chromedriver_linux64.zip; \ 13 | sudo unzip -qq /tmp/chromedriver_linux64.zip -d /opt/chromedriver-$CHROMEDRIVER_VERSION; \ 14 | sudo rm /tmp/chromedriver_linux64.zip; \ 15 | sudo chmod +x /opt/chromedriver-$CHROMEDRIVER_VERSION/chromedriver; \ 16 | sudo ln -fs /opt/chromedriver-$CHROMEDRIVER_VERSION/chromedriver /usr/local/bin/chromedriver; \ 17 | export PATH="$PATH;/usr/local/bin/chromedriver" 18 | 19 | # Build environment variables 20 | ENV DOCKER_TAG=travis \ 21 | DOCKER_VCS_REF=${TRAVIS_COMMIT} \ 22 | DOCKER_BUILD_DATE=${TRAVIS_BUILD_NUMBER} \ 23 | TESSDATA_PREFIX=/home/$FRAPPE_USER/tessdata \ 24 | LC_ALL=C 25 | 26 | # Copy the whole repository to app folder for manual install 27 | #COPY --chown=frappe:frappe . "/home/$FRAPPE_USER"/frappe-bench/apps/erpnext_ocr 28 | 29 | ARG BUILD_BRANCH 30 | ARG BUILD_URL 31 | 32 | RUN set -ex; \ 33 | sudo apt-get update -q; \ 34 | sudo apt-get install -y --no-install-recommends \ 35 | ghostscript \ 36 | imagemagick \ 37 | libmagickwand-dev \ 38 | tesseract-ocr \ 39 | libtesseract-dev \ 40 | libleptonica-dev \ 41 | pkg-config \ 42 | ; \ 43 | sudo rm -rf /var/lib/apt/lists/*; \ 44 | mkdir -p $TESSDATA_PREFIX; \ 45 | sudo chown -R $FRAPPE_USER:$FRAPPE_USER $TESSDATA_PREFIX ; \ 46 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/eng.traineddata -O $TESSDATA_PREFIX/eng.traineddata; \ 47 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/equ.traineddata -O $TESSDATA_PREFIX/equ.traineddata; \ 48 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/osd.traineddata -O $TESSDATA_PREFIX/osd.traineddata; \ 49 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/fra.traineddata -O $TESSDATA_PREFIX/fra.traineddata; \ 50 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/deu.traineddata -O $TESSDATA_PREFIX/deu.traineddata; \ 51 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/spa.traineddata -O $TESSDATA_PREFIX/spa.traineddata; \ 52 | wget -q https://raw.github.com/tesseract-ocr/tessdata/master/por.traineddata -O $TESSDATA_PREFIX/por.traineddata; \ 53 | sudo chmod -R 755 $TESSDATA_PREFIX ; \ 54 | sudo sed -i \ 55 | -e 's/rights="none" pattern="PDF"/rights="read" pattern="PDF"/g' \ 56 | /etc/ImageMagick*/policy.xml \ 57 | ; \ 58 | sudo mkdir -p "/home/$FRAPPE_USER"/frappe-bench/logs; \ 59 | sudo touch "/home/$FRAPPE_USER"/frappe-bench/logs/bench.log; \ 60 | sudo chmod 777 \ 61 | "/home/$FRAPPE_USER"/frappe-bench/logs \ 62 | "/home/$FRAPPE_USER"/frappe-bench/logs/* \ 63 | ; \ 64 | bench get-app --branch ${BUILD_BRANCH} ${BUILD_URL} 65 | -------------------------------------------------------------------------------- /.travis/Dockerfile.test: -------------------------------------------------------------------------------- 1 | FROM %%IMAGE_NAME%% 2 | 3 | COPY docker_test.sh /docker_test.sh 4 | 5 | RUN set -ex; \ 6 | sudo chmod 755 /docker_test.sh; \ 7 | sudo pip install coverage==4.5.4; \ 8 | sudo pip install python-coveralls 9 | 10 | EXPOSE 4444 11 | 12 | # Default Chrome configuration 13 | ENV DISPLAY=:20.0 \ 14 | SCREEN_GEOMETRY="1440x900x24" \ 15 | CHROMEDRIVER_PORT=4444 \ 16 | CHROMEDRIVER_WHITELISTED_IPS="127.0.0.1" \ 17 | CHROMEDRIVER_URL_BASE='' \ 18 | CHROMEDRIVER_EXTRA_ARGS='' 19 | 20 | # Test environment variables 21 | ENV TEST_VERSION=${TEST_VERSION} 22 | 23 | CMD ["/docker_test.sh"] 24 | -------------------------------------------------------------------------------- /.travis/docker-compose.mariadb.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | # https://docs.docker.com/docker-hub/builds/automated-testing/ 5 | sut: 6 | build: 7 | context: ./ 8 | dockerfile: Dockerfile.test 9 | command: sh /docker_test.sh 10 | # Only for travis-ci, do not name test container in dockerhub 11 | container_name: sut 12 | depends_on: 13 | - erpnext_db 14 | - erpnext_app 15 | - erpnext_web 16 | - erpnext_scheduler 17 | - erpnext_worker_default 18 | - erpnext_worker_long 19 | - erpnext_worker_short 20 | - erpnext_socketio 21 | - erpnext_redis_cache 22 | - erpnext_redis_queue 23 | - erpnext_redis_socketio 24 | environment: 25 | # Container setup 26 | - NODE_TYPE=test 27 | # Docker setup 28 | - DOCKER_APPS_TIMEOUT=900 29 | - DOCKER_DEBUG=1 30 | # Test setup 31 | - TEST_VERSION=${VERSION} 32 | - TRAVIS_BUILD_ID=${TRAVIS_BUILD_ID} 33 | - TRAVIS_BUILD_NUMBER=${TRAVIS_BUILD_NUMBER} 34 | - TRAVIS_BUILD_WEB_URL=${TRAVIS_BUILD_WEB_URL} 35 | - TRAVIS_COMMIT=${TRAVIS_COMMIT} 36 | - TRAVIS_COMMIT_MESSAGE=${TRAVIS_COMMIT_MESSAGE} 37 | - TRAVIS_COMMIT_RANGE=${TRAVIS_COMMIT_RANGE} 38 | - TRAVIS_JOB_ID=${TRAVIS_JOB_ID} 39 | - TRAVIS_JOB_NAME=${TRAVIS_JOB_NAME} 40 | - TRAVIS_JOB_NUMBER=${TRAVIS_JOB_NUMBER} 41 | - TRAVIS_JOB_WEB_URL=${TRAVIS_JOB_WEB_URL} 42 | - TRAVIS_BRANCH=${TRAVIS_BRANCH} 43 | volumes_from: 44 | - erpnext_app 45 | volumes: 46 | - /etc/localtime:/etc/localtime:ro 47 | - /etc/timezone:/etc/timezone:ro 48 | 49 | erpnext_app: 50 | build: 51 | context: ./ 52 | dockerfile: Dockerfile.${VARIANT} 53 | args: 54 | - BUILD_BRANCH=${BUILD_BRANCH} 55 | - BUILD_URL=${BUILD_URL} 56 | image: ${IMAGE_NAME} 57 | container_name: erpnext_app 58 | command: app 59 | #restart: always 60 | ports: 61 | - 8000:8000 62 | depends_on: 63 | - erpnext_db 64 | links: 65 | - erpnext_db 66 | environment: 67 | # Docker setup 68 | - DOCKER_DB_ALLOWED_HOSTS= 69 | - DOCKER_APPS_TIMEOUT=900 70 | # Frappe setup 71 | - FRAPPE_APP_INIT=erpnext erpnext_ocr 72 | - FRAPPE_DEFAULT_PROTOCOL=http:// 73 | - FRAPPE_DEFAULT_SITE=${ERPNEXT_SITE} 74 | - FRAPPE_LOGGING=1 75 | - DEVELOPER_MODE=0 76 | - ALLOW_TESTS=1 77 | # Admin user setup 78 | - ADMIN_PASSWORD=${ERPNEXT_ADMIN_PWD} 79 | - ENCRYPTION_KEY=${ERPNEXT_ENCRYPTION_KEY} 80 | # Database setup 81 | - DB_TYPE=mariadb 82 | - DB_HOST=erpnext_db 83 | - DB_PORT=3306 84 | - DB_NAME=${ERPNEXT_DB_NAME} 85 | - DB_PASSWORD=${ERPNEXT_DB_PWD} 86 | - DB_ROOT_PASSWORD=${ERPNEXT_DB_ROOT_PWD} 87 | # Mail setup 88 | - MAIL_MUTED=true 89 | # Redis setup 90 | - REDIS_CACHE_HOST=erpnext_redis_cache 91 | - REDIS_QUEUE_HOST=erpnext_redis_queue 92 | - REDIS_SOCKETIO_HOST=erpnext_redis_socketio 93 | volumes: 94 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 95 | - ${ERPNEXT_HOME}/logs:/home/frappe/frappe-bench/logs 96 | - /etc/localtime:/etc/localtime:ro 97 | - /etc/timezone:/etc/timezone:ro 98 | 99 | erpnext_web: 100 | image: nginx:alpine 101 | container_name: erpnext_web 102 | #restart: always 103 | volumes: 104 | - ./docker-nginx.conf:/etc/nginx/conf.d/default.conf:ro 105 | # If you need SSL connection, you can provide your own certificates 106 | # - ./certs:/etc/letsencrypt 107 | # - ./certs-data:/data/letsencrypt 108 | volumes_from: 109 | - erpnext_app 110 | depends_on: 111 | - erpnext_app 112 | ports: 113 | - 80:80 114 | # If you need SSL connection 115 | # - '443:443' 116 | links: 117 | - erpnext_app 118 | - erpnext_socketio 119 | 120 | erpnext_db: 121 | image: mariadb:10 122 | container_name: erpnext_db 123 | #restart: always 124 | command: --character_set_client=utf8 --bind-address=0.0.0.0 --character-set-client-handshake=FALSE --character-set-server=utf8mb4 --collation-server=utf8mb4_unicode_ci --sql-mode="ALLOW_INVALID_DATES" 125 | environment: 126 | - MYSQL_ROOT_PASSWORD=${ERPNEXT_DB_ROOT_PWD} 127 | # Following parameters are not needed with Frappe 12 or higher since it will create DB itself 128 | - MYSQL_DATABASE=${ERPNEXT_DB_NAME} 129 | - MYSQL_USER=${ERPNEXT_DB_NAME} 130 | - MYSQL_PASSWORD=${ERPNEXT_DB_PWD} 131 | volumes: 132 | - /srv/erpnext/db:/var/lib/mysql 133 | - /etc/localtime:/etc/localtime:ro 134 | - /etc/timezone:/etc/timezone:ro 135 | 136 | erpnext_scheduler: 137 | #build: ./ 138 | image: ${IMAGE_NAME} 139 | container_name: erpnext_scheduler 140 | command: scheduler 141 | #restart: always 142 | depends_on: 143 | - erpnext_app 144 | environment: 145 | # Docker setup 146 | - DOCKER_APPS_TIMEOUT=900 147 | volumes: 148 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 149 | - /etc/localtime:/etc/localtime:ro 150 | - /etc/timezone:/etc/timezone:ro 151 | 152 | erpnext_worker_default: 153 | #build: ./ 154 | image: ${IMAGE_NAME} 155 | container_name: erpnext_worker_default 156 | command: worker-default 157 | #restart: always 158 | depends_on: 159 | - erpnext_app 160 | environment: 161 | # Docker setup 162 | - DOCKER_APPS_TIMEOUT=900 163 | volumes: 164 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 165 | - /etc/localtime:/etc/localtime:ro 166 | - /etc/timezone:/etc/timezone:ro 167 | 168 | erpnext_worker_long: 169 | #build: ./ 170 | image: ${IMAGE_NAME} 171 | container_name: erpnext_worker_long 172 | command: worker-long 173 | #restart: always 174 | depends_on: 175 | - erpnext_app 176 | environment: 177 | # Docker setup 178 | - DOCKER_APPS_TIMEOUT=900 179 | volumes: 180 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 181 | - /etc/localtime:/etc/localtime:ro 182 | - /etc/timezone:/etc/timezone:ro 183 | 184 | erpnext_worker_short: 185 | #build: ./ 186 | image: ${IMAGE_NAME} 187 | container_name: erpnext_worker_short 188 | command: worker-short 189 | #restart: always 190 | depends_on: 191 | - erpnext_app 192 | environment: 193 | # Docker setup 194 | - DOCKER_APPS_TIMEOUT=900 195 | volumes: 196 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 197 | - /etc/localtime:/etc/localtime:ro 198 | - /etc/timezone:/etc/timezone:ro 199 | 200 | erpnext_socketio: 201 | #build: ./ 202 | image: ${IMAGE_NAME} 203 | container_name: erpnext_socketio 204 | command: node-socketio 205 | #restart: always 206 | ports: 207 | - 3000:3000 208 | depends_on: 209 | - erpnext_app 210 | environment: 211 | # Docker setup 212 | - DOCKER_APPS_TIMEOUT=900 213 | volumes: 214 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 215 | - /etc/localtime:/etc/localtime:ro 216 | - /etc/timezone:/etc/timezone:ro 217 | 218 | erpnext_redis_cache: 219 | image: redis:alpine 220 | container_name: erpnext_redis_cache 221 | #restart: always 222 | volumes: 223 | - ./services/erpnext/conf/redis_cache.conf:/etc/conf.d/redis.conf:ro 224 | command: ["redis-server","/etc/conf.d/redis.conf"] 225 | 226 | erpnext_redis_queue: 227 | image: redis:alpine 228 | container_name: erpnext_redis_queue 229 | #restart: always 230 | 231 | erpnext_redis_socketio: 232 | image: redis:alpine 233 | container_name: erpnext_redis_socketio 234 | #restart: always 235 | -------------------------------------------------------------------------------- /.travis/docker-compose.postgres.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | # https://docs.docker.com/docker-hub/builds/automated-testing/ 5 | sut: 6 | build: 7 | context: ./ 8 | dockerfile: Dockerfile.test 9 | command: sh /docker_test.sh 10 | # Only for travis-ci, do not name test container in dockerhub 11 | container_name: sut 12 | depends_on: 13 | - erpnext_db 14 | - erpnext_app 15 | - erpnext_web 16 | - erpnext_scheduler 17 | - erpnext_worker_default 18 | - erpnext_worker_long 19 | - erpnext_worker_short 20 | - erpnext_socketio 21 | - erpnext_redis_cache 22 | - erpnext_redis_queue 23 | - erpnext_redis_socketio 24 | environment: 25 | # Container setup 26 | - NODE_TYPE=test 27 | # Docker setup 28 | - DOCKER_APPS_TIMEOUT=900 29 | - DOCKER_DEBUG=1 30 | # Test setup 31 | - TEST_VERSION=${VERSION} 32 | - TRAVIS_BUILD_ID=${TRAVIS_BUILD_ID} 33 | - TRAVIS_BUILD_NUMBER=${TRAVIS_BUILD_NUMBER} 34 | - TRAVIS_BUILD_WEB_URL=${TRAVIS_BUILD_WEB_URL} 35 | - TRAVIS_COMMIT=${TRAVIS_COMMIT} 36 | - TRAVIS_COMMIT_MESSAGE=${TRAVIS_COMMIT_MESSAGE} 37 | - TRAVIS_COMMIT_RANGE=${TRAVIS_COMMIT_RANGE} 38 | - TRAVIS_JOB_ID=${TRAVIS_JOB_ID} 39 | - TRAVIS_JOB_NAME=${TRAVIS_JOB_NAME} 40 | - TRAVIS_JOB_NUMBER=${TRAVIS_JOB_NUMBER} 41 | - TRAVIS_JOB_WEB_URL=${TRAVIS_JOB_WEB_URL} 42 | - TRAVIS_BRANCH=${TRAVIS_BRANCH} 43 | volumes_from: 44 | - erpnext_app 45 | volumes: 46 | - /etc/localtime:/etc/localtime:ro 47 | - /etc/timezone:/etc/timezone:ro 48 | 49 | erpnext_app: 50 | build: 51 | context: ./ 52 | dockerfile: Dockerfile.${VARIANT} 53 | args: 54 | - BUILD_BRANCH=${BUILD_BRANCH} 55 | - BUILD_URL=${BUILD_URL} 56 | image: ${IMAGE_NAME} 57 | container_name: erpnext_app 58 | #restart: always 59 | ports: 60 | - 8000:8000 61 | depends_on: 62 | - erpnext_db 63 | links: 64 | - erpnext_db 65 | environment: 66 | # Container setup 67 | - NODE_TYPE=app 68 | # Frappe setup 69 | - FRAPPE_APP_INIT=erpnext erpnext_ocr 70 | - FRAPPE_DEFAULT_PROTOCOL=http:// 71 | - FRAPPE_DEFAULT_SITE=${ERPNEXT_SITE} 72 | - FRAPPE_LOGGING=1 73 | - DEVELOPER_MODE=0 74 | - ALLOW_TESTS=1 75 | # Admin user setup 76 | - ADMIN_PASSWORD=${ERPNEXT_ADMIN_PWD} 77 | - ENCRYPTION_KEY=${ERPNEXT_ENCRYPTION_KEY} 78 | # Database setup 79 | - DB_TYPE=postgres 80 | - DB_HOST=erpnext_db 81 | - DB_PORT=5432 82 | - DB_NAME=${ERPNEXT_DB_NAME} 83 | - DB_PASSWORD=${ERPNEXT_DB_PWD} 84 | - DB_ROOT_LOGIN=${ERPNEXT_DB_ROOT_LOGIN} 85 | - DB_ROOT_PASSWORD=${ERPNEXT_DB_ROOT_PWD} 86 | # Mail setup 87 | - MAIL_MUTED=true 88 | # Redis setup 89 | - REDIS_CACHE_HOST=erpnext_redis_cache 90 | - REDIS_QUEUE_HOST=erpnext_redis_queue 91 | - REDIS_SOCKETIO_HOST=erpnext_redis_socketio 92 | volumes: 93 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 94 | - ${ERPNEXT_HOME}/logs:/home/frappe/frappe-bench/logs 95 | - /etc/localtime:/etc/localtime:ro 96 | - /etc/timezone:/etc/timezone:ro 97 | 98 | erpnext_web: 99 | image: nginx:alpine 100 | container_name: erpnext_web 101 | #restart: always 102 | volumes: 103 | - ./docker-nginx.conf:/etc/nginx/conf.d/default.conf:ro 104 | # If you need SSL connection, you can provide your own certificates 105 | # - ./certs:/etc/letsencrypt 106 | # - ./certs-data:/data/letsencrypt 107 | volumes_from: 108 | - erpnext_app 109 | depends_on: 110 | - erpnext_app 111 | ports: 112 | - 80:80 113 | # If you need SSL connection 114 | # - '443:443' 115 | links: 116 | - erpnext_app 117 | - erpnext_socketio 118 | 119 | erpnext_db: 120 | image: postgres:10-alpine 121 | container_name: erpnext_db 122 | #restart: always 123 | stdin_open: true 124 | tty: true 125 | command: postgres -c 'max_connections=500' 126 | environment: 127 | - POSTGRES_USER=${ERPNEXT_DB_ROOT_LOGIN} 128 | - POSTGRES_PASSWORD=${ERPNEXT_DB_ROOT_PWD} 129 | volumes: 130 | - /srv/erpnext/db/data:/var/lib/postgresql/data 131 | - /etc/localtime:/etc/localtime:ro 132 | - /etc/timezone:/etc/timezone:ro 133 | 134 | erpnext_scheduler: 135 | #build: ./ 136 | image: ${IMAGE_NAME} 137 | container_name: erpnext_scheduler 138 | #restart: always 139 | depends_on: 140 | - erpnext_app 141 | environment: 142 | - NODE_TYPE=scheduler 143 | # Docker setup 144 | - DOCKER_APPS_TIMEOUT=900 145 | volumes: 146 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 147 | - /etc/localtime:/etc/localtime:ro 148 | - /etc/timezone:/etc/timezone:ro 149 | 150 | erpnext_worker_default: 151 | #build: ./ 152 | image: ${IMAGE_NAME} 153 | container_name: erpnext_worker_default 154 | #restart: always 155 | depends_on: 156 | - erpnext_app 157 | environment: 158 | - NODE_TYPE=worker-default 159 | # Docker setup 160 | - DOCKER_APPS_TIMEOUT=900 161 | volumes: 162 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 163 | - /etc/localtime:/etc/localtime:ro 164 | - /etc/timezone:/etc/timezone:ro 165 | 166 | erpnext_worker_long: 167 | #build: ./ 168 | image: ${IMAGE_NAME} 169 | container_name: erpnext_worker_long 170 | #restart: always 171 | depends_on: 172 | - erpnext_app 173 | environment: 174 | - NODE_TYPE=worker-long 175 | # Docker setup 176 | - DOCKER_APPS_TIMEOUT=900 177 | volumes: 178 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 179 | - /etc/localtime:/etc/localtime:ro 180 | - /etc/timezone:/etc/timezone:ro 181 | 182 | erpnext_worker_short: 183 | #build: ./ 184 | image: ${IMAGE_NAME} 185 | container_name: erpnext_worker_short 186 | #restart: always 187 | depends_on: 188 | - erpnext_app 189 | environment: 190 | - NODE_TYPE=worker-short 191 | # Docker setup 192 | - DOCKER_APPS_TIMEOUT=900 193 | volumes: 194 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 195 | - /etc/localtime:/etc/localtime:ro 196 | - /etc/timezone:/etc/timezone:ro 197 | 198 | erpnext_socketio: 199 | #build: ./ 200 | image: ${IMAGE_NAME} 201 | container_name: erpnext_socketio 202 | #restart: always 203 | ports: 204 | - 3000:3000 205 | depends_on: 206 | - erpnext_app 207 | environment: 208 | - NODE_TYPE=node-socketio 209 | # Docker setup 210 | - DOCKER_APPS_TIMEOUT=900 211 | volumes: 212 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 213 | - /etc/localtime:/etc/localtime:ro 214 | - /etc/timezone:/etc/timezone:ro 215 | 216 | erpnext_redis_cache: 217 | image: redis:alpine 218 | container_name: erpnext_redis_cache 219 | #restart: always 220 | volumes: 221 | - ./services/erpnext/conf/redis_cache.conf:/etc/conf.d/redis.conf:ro 222 | command: ["redis-server","/etc/conf.d/redis.conf"] 223 | 224 | erpnext_redis_queue: 225 | image: redis:alpine 226 | container_name: erpnext_redis_queue 227 | #restart: always 228 | 229 | erpnext_redis_socketio: 230 | image: redis:alpine 231 | container_name: erpnext_redis_socketio 232 | #restart: always 233 | -------------------------------------------------------------------------------- /.travis/docker-nginx.conf: -------------------------------------------------------------------------------- 1 | 2 | upstream bench-frappe { 3 | server erpnext_app:8000 fail_timeout=0; 4 | } 5 | 6 | upstream bench-socketio-server { 7 | server erpnext_socketio:3000 fail_timeout=0; 8 | } 9 | 10 | server { 11 | listen 80; 12 | 13 | server_name localhost; 14 | 15 | root /home/frappe/frappe-bench/sites; 16 | 17 | location /assets { 18 | try_files $uri =404; 19 | } 20 | 21 | location ~ ^/protected/(.*) { 22 | internal; 23 | try_files /$host/$1 =404; 24 | } 25 | 26 | location /socket.io { 27 | proxy_http_version 1.1; 28 | proxy_set_header Upgrade $http_upgrade; 29 | proxy_set_header Connection "upgrade"; 30 | proxy_set_header X-Frappe-Site-Name $host; 31 | proxy_set_header Origin $scheme://$http_host; 32 | proxy_set_header Host $host; 33 | 34 | proxy_pass http://bench-socketio-server; 35 | } 36 | 37 | location / { 38 | try_files /$host/public/$uri @webserver; 39 | } 40 | 41 | location @webserver { 42 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 43 | proxy_set_header X-Forwarded-Proto $scheme; 44 | proxy_set_header X-Frappe-Site-Name $host; 45 | proxy_set_header Host $host; 46 | proxy_set_header X-Use-X-Accel-Redirect True; 47 | proxy_read_timeout 300; 48 | proxy_redirect off; 49 | 50 | proxy_pass http://bench-frappe; 51 | } 52 | 53 | # error pages 54 | error_page 502 /502.html; 55 | location /502.html { 56 | root /home/frappe/bench-repo/bench/config/templates; 57 | internal; 58 | } 59 | 60 | # optimizations 61 | sendfile on; 62 | keepalive_timeout 15; 63 | client_max_body_size 50m; 64 | client_body_buffer_size 16K; 65 | client_header_buffer_size 1k; 66 | 67 | # enable gzip compresion 68 | # based on https://mattstauffer.co/blog/enabling-gzip-on-nginx-servers-including-laravel-forge 69 | gzip on; 70 | gzip_http_version 1.1; 71 | gzip_comp_level 5; 72 | gzip_min_length 256; 73 | gzip_proxied any; 74 | gzip_vary on; 75 | gzip_types 76 | application/atom+xml 77 | application/javascript 78 | application/json 79 | application/rss+xml 80 | application/vnd.ms-fontobject 81 | application/x-font-ttf 82 | application/font-woff 83 | application/x-web-app-manifest+json 84 | application/xhtml+xml 85 | application/xml 86 | font/opentype 87 | image/svg+xml 88 | image/x-icon 89 | text/css 90 | text/plain 91 | text/x-component 92 | ; 93 | # text/html is always compressed by HttpGzipModule 94 | } 95 | -------------------------------------------------------------------------------- /.travis/docker_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/sh 2 | 3 | set -e 4 | 5 | ################################################################################ 6 | # Testing docker containers 7 | 8 | echo "Waiting to ensure everything is fully ready for the tests..." 9 | sleep 60 10 | 11 | echo "Checking content of sites directory..." 12 | if [ ! -f "./sites/apps.txt" ] || [ ! -f "./sites/.docker-app-init" ] || [ ! -f "./sites/currentsite.txt" ] || [ ! -f "./sites/.docker-site-init" ] || [ ! -f "./sites/.docker-init" ]; then 13 | echo 'Apps and site are not initalized?!' 14 | ls -al "./sites" 15 | exit 1 16 | fi 17 | 18 | echo "Checking main containers are reachable..." 19 | if ! sudo ping -c 10 -q erpnext_db ; then 20 | echo 'Database container is not responding!' 21 | echo 'Check the following logs for details:' 22 | tail -n 100 logs/*.log 23 | exit 2 24 | fi 25 | 26 | if ! sudo ping -c 10 -q erpnext_app ; then 27 | echo 'App container is not responding!' 28 | echo 'Check the following logs for details:' 29 | tail -n 100 logs/*.log 30 | exit 4 31 | fi 32 | 33 | if ! sudo ping -c 10 -q erpnext_web ; then 34 | echo 'Web container is not responding!' 35 | echo 'Check the following logs for details:' 36 | tail -n 100 logs/*.log 37 | exit 8 38 | fi 39 | 40 | 41 | ################################################################################ 42 | # Success 43 | echo 'Docker tests successful' 44 | 45 | 46 | ################################################################################ 47 | # Automated Unit tests 48 | # https://docs.docker.com/docker-hub/builds/automated-testing/ 49 | # https://frappe.io/docs/user/en/testing 50 | ################################################################################ 51 | 52 | FRAPPE_APP_TO_TEST=erpnext_ocr 53 | 54 | ################################################################################ 55 | # Frappe Unit tests 56 | # https://frappe.io/docs/user/en/guides/automated-testing/unit-testing 57 | 58 | FRAPPE_APP_UNIT_TEST_REPORT="$(pwd)/sites/.${FRAPPE_APP_TO_TEST}_unit_tests.xml" 59 | FRAPPE_APP_UNIT_TEST_PROFILE="$(pwd)/sites/.${FRAPPE_APP_TO_TEST}_unit_tests.prof" 60 | 61 | if [ -n "${FRAPPE_APP_TO_TEST}" ]; then 62 | 63 | echo "Preparing Frappe application '${FRAPPE_APP_TO_TEST}' tests..." 64 | 65 | bench set-config allow_tests true -g 66 | 67 | bench doctor 68 | bench enable-scheduler 69 | bench doctor 70 | 71 | #bench run-tests --help 72 | 73 | echo "Executing Unit Tests of '${FRAPPE_APP_TO_TEST}' app..." 74 | if [ "${TEST_VERSION}" = "10" ]; then 75 | bench run-tests \ 76 | --app "${FRAPPE_APP_TO_TEST}" \ 77 | --junit-xml-output "${FRAPPE_APP_UNIT_TEST_REPORT}" \ 78 | --profile > "${FRAPPE_APP_UNIT_TEST_PROFILE}" 79 | else 80 | bench run-tests \ 81 | --app "${FRAPPE_APP_TO_TEST}" \ 82 | --coverage \ 83 | --junit-xml-output "${FRAPPE_APP_UNIT_TEST_REPORT}" \ 84 | --profile > "${FRAPPE_APP_UNIT_TEST_PROFILE}" 85 | fi 86 | 87 | fi 88 | 89 | ## Check result of tests 90 | if [ -f "${FRAPPE_APP_UNIT_TEST_REPORT}" ]; then 91 | echo "Checking Frappe application '${FRAPPE_APP_TO_TEST}' unit tests report..." 92 | 93 | if grep -E '(errors|failures)="[1-9][0-9]*"' "${FRAPPE_APP_UNIT_TEST_REPORT}"; then 94 | echo "Unit Tests of '${FRAPPE_APP_TO_TEST}' app failed! See logs for details." 95 | #cat "${FRAPPE_APP_UNIT_TEST_REPORT}" 96 | exit 1 97 | else 98 | echo "Unit Tests of '${FRAPPE_APP_TO_TEST}' app successful!" 99 | #cat "${FRAPPE_APP_UNIT_TEST_REPORT}" 100 | fi 101 | fi 102 | 103 | if [ -f ./sites/.coverage ]; then 104 | set +e 105 | cp ./sites/.coverage ./.coverage 106 | 107 | echo "Unit Tests coverage report of '${FRAPPE_APP_TO_TEST}' app:" 108 | coverage report -m 109 | 110 | echo "Sending Unit Tests coverage of '${FRAPPE_APP_TO_TEST}' app to Coveralls..." 111 | coveralls -b "$(pwd)/apps/${FRAPPE_APP_TO_TEST}" -d "$(pwd)/sites/.coverage" 112 | 113 | # TODO When frappe supports coverage report in XML format 114 | # https://github.com/frappe/frappe/issues/9696 115 | coverage xml 116 | 117 | echo "Sending Unit Tests coverage of '${FRAPPE_APP_TO_TEST}' app to Codacy..." 118 | wget -qO - https://coverage.codacy.com/get.sh | sh -s report -l Python -r "$(pwd)/coverage.xml" 119 | 120 | #echo "Sending Unit Tests coverage of '${FRAPPE_APP_TO_TEST}' app to CodeCov..." 121 | #bash <(curl -s https://codecov.io/bash) 122 | 123 | rm ./.coverage 124 | set -e 125 | fi 126 | 127 | if [ -f "${FRAPPE_APP_UNIT_TEST_PROFILE}" ]; then 128 | echo "Checking Frappe application '${FRAPPE_APP_TO_TEST}' unit tests profile..." 129 | 130 | # XXX Are there any online services that could receive and display profiles? 131 | #cat "${FRAPPE_APP_UNIT_TEST_PROFILE}" 132 | fi 133 | 134 | 135 | ################################################################################ 136 | # TODO QUnit (JS) Unit tests 137 | # https://frappe.io/docs/user/en/guides/automated-testing/qunit-testing 138 | 139 | #bench run-ui-tests --help 140 | 141 | #echo "Executing UI Tests of '${FRAPPE_APP_TO_TEST}' app..." 142 | #if [ "${TEST_VERSION}" = "10" ] || [ "${TEST_VERSION}" = "11" ]; then 143 | # bench run-ui-tests --app ${FRAPPE_APP_TO_TEST} 144 | #else 145 | # bench run-ui-tests ${FRAPPE_APP_TO_TEST} 146 | #fi 147 | 148 | ## TODO Check result of UI tests 149 | 150 | 151 | 152 | ################################################################################ 153 | # TODO Generate docs 154 | 155 | #bench build-docs --help 156 | 157 | echo "Generating docs for '${FRAPPE_APP_TO_TEST}' app..." 158 | if [ "${TEST_VERSION}" = "10" ] || [ "${TEST_VERSION}" = "11" ]; then 159 | set +e 160 | bench build-docs \ 161 | --target ${FRAPPE_APP_TO_TEST} \ 162 | --docs-version ${FRAPPE_APP_TO_TEST} \ 163 | ${FRAPPE_APP_TO_TEST} 164 | set -e 165 | else 166 | echo "Building docs is not available for this version of Frappe (${TEST_VERSION})" 167 | fi 168 | 169 | ## TODO Check docs generated properly 170 | 171 | 172 | ################################################################################ 173 | # Success 174 | echo 'Frappe app '${FRAPPE_APP_TO_TEST}' tests finished' 175 | echo 'Check the CI reports and logs for details.' 176 | exit 0 177 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## Unreleased 8 | 9 | ### Added 10 | 11 | ### Changed 12 | 13 | ### Fixed 14 | 15 | ### Removed 16 | 17 | 18 | 19 | ## 1.1.1 - 2020-05-02 20 | 21 | ### Fixed 22 | 23 | - 🚑 Fixed setup.py install format 24 | 25 | ### Changed 26 | 27 | - 🎨 Improve format and quality warnings 28 | 29 | 30 | 31 | ## 1.1.0 - 2020-04-19 32 | 33 | ### Added 34 | 35 | - ✨ Add Read OCR to desktop 36 | - ✨ Implemented pyspellchecker on OCR results 37 | - 🌐 Add russian translation (#22) 38 | - ✨ Add default language in OCR Read (#20) 39 | - ✨ Add read time field (#25) 40 | - ⚡️ Add background job (#23) 41 | - ✨ Download new languages (#27) 42 | - ✨ Text based Doctype import (#26) for #15 43 | 44 | ### Fixed 45 | 46 | - 🔒 Upgrade minimist 47 | 48 | 49 | 50 | ## 1.0.1 - 2019-11-29 51 | 52 | Differences with previous release: [1.0.0...1.0.1](https://github.com/Monogramm/erpnext_ocr/compare/1.0.0...1.0.1) 53 | 54 | ### Fixed 55 | 56 | - :globe_with_meridians: Fix FR translation for PDF resolution 57 | 58 | 59 | 60 | ## 1.0.0 - 2019-11-27 61 | 62 | Differences with previous release: [0.9.0...1.0.0](https://github.com/Monogramm/erpnext_ocr/compare/0.9.0...1.0.0) 63 | 64 | ### Added 65 | 66 | - :sparkles: Progress bar during document read 67 | - :construction_worker: Add unit tests and coverage analysis to CI 68 | - :sparkles: Read only field to indicate Language available for OCR 69 | - :sparkles: Add OCR settings 70 | 71 | ### Changed 72 | 73 | - :wrench: Allow all users to read with OCR 74 | - :zap: Replace pytesseract by tesserocr 75 | 76 | 77 | 78 | ## 0.9.0 - 2019-11-06 79 | 80 | ### Added 81 | 82 | - PDF management in `OCR Read` 83 | - `OCR Language` to manage available tesseract traindata files 84 | - French translations 85 | - GitHub issue and feature templates 86 | - GitHub bots config ([stale](https://github.com/apps/stale) and [behaviorbot](https://github.com/behaviorbot)) 87 | - [Travis-CI](https://travis-ci.org/) using [docker images](https://github.com/Monogramm/docker-erpnext) to setup ERPNext test environment 88 | - Contributing guidelines 89 | - This CHANGELOG file to hopefully help to track changes done to the project. 90 | 91 | ### Changed 92 | 93 | - PIP requirements for easier (auto) install 94 | - README documentation on requirements, installation and common issues 95 | - Desktop icon, color, name and docs 96 | - Repository name (changed case) 97 | - Author/maintainer info 98 | 99 | ### Fixed 100 | 101 | - Python 3 compatibility 102 | - Management of public/private files upload 103 | 104 | ### Removed 105 | 106 | - Sales Invoice custom fields and scripts 107 | - OCR Receipt for Sales Invoice 108 | - ABBYY OCR 109 | - Zapier webhook 110 | - Aimara / jstree / treeview 111 | 112 | ## Legacy - 2018-02-12 113 | 114 | ### Added 115 | 116 | - All the good work from [John Vincent Fiel](https://github.com/jvfiel) on the source of this project. 117 | 118 | Source of fork: [jvfiel/ERPNext-OCR](https://github.com/jvfiel/ERPNext-OCR/tree/master) 119 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | - Using welcoming and inclusive language 18 | - Being respectful of differing viewpoints and experiences 19 | - Gracefully accepting constructive criticism 20 | - Focusing on what is best for the community 21 | - Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | - The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | - Trolling, insulting/derogatory comments, and personal or political attacks 28 | - Public or private harassment 29 | - Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | - Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at opensource at monogramm dot io. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | First of all, **thank you** for contributing, **you are awesome**! 5 | 6 | You can report bugs or request new features by creating an [issue](https://github.com/Monogramm/erpnext_ocr/issues), or submit a [pull request](https://github.com/Monogramm/erpnext_ocr/pulls) with your contribution. 7 | 8 | Here are a few rules to follow in order to ease code reviews and discussions before maintainers accept and merge your work. 9 | 10 | You MUST follow the [Best of the Best Practices (BOBP) Guide for Python](https://gist.github.com/sloria/7001839). If you don't know about any of them, you should really read the recommendations. 11 | 12 | You SHOULD write tests. 13 | 14 | You SHOULD write documentation. 15 | 16 | Please, write [commit messages that make sense](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html), and [rebase your branch](http://git-scm.com/book/en/Git-Branching-Rebasing) before submitting your Pull Request. 17 | 18 | If possible, use [gitmoji](https://gitmoji.carloscuesta.me/) in your commit message to ease code reviews. 19 | 20 | One may ask you to [squash your commits](http://gitready.com/advanced/2009/02/10/squashing-commits-with-rebase.html) too. This is used to "clean" your Pull Request before merging it (we don't want commits such as `fix tests`, `fix 2`, `fix 3`, etc.). 21 | 22 | Also, while creating your Pull Request on GitHub, you MUST write a description which gives the context and/or explains why you are creating it. If your Pull Request is related to one or several issues, you SHOULD reference them in your PR description. See GitHub documentation on [how to close issues using keywords](https://help.github.com/en/articles/closing-issues-using-keywords). 23 | 24 | Thank you! 25 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM monogramm/docker-erpnext:11-debian 2 | 3 | # Install Google Chrome & Chrome WebDriver for UI tests 4 | RUN set -ex; \ 5 | sudo apt-get update -q; \ 6 | sudo apt-get install -y --no-install-recommends \ 7 | unzip \ 8 | ; \ 9 | CHROMEDRIVER_VERSION=`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`; \ 10 | sudo mkdir -p "/opt/chromedriver-${CHROMEDRIVER_VERSION}"; \ 11 | sudo curl -sS -o /tmp/chromedriver_linux64.zip http://chromedriver.storage.googleapis.com/$CHROMEDRIVER_VERSION/chromedriver_linux64.zip; \ 12 | sudo unzip -qq /tmp/chromedriver_linux64.zip -d "/opt/chromedriver-${CHROMEDRIVER_VERSION}"; \ 13 | sudo rm /tmp/chromedriver_linux64.zip; \ 14 | sudo chmod +x "/opt/chromedriver-${CHROMEDRIVER_VERSION}/chromedriver"; \ 15 | sudo ln -fs "/opt/chromedriver-${CHROMEDRIVER_VERSION}/chromedriver" /usr/local/bin/chromedriver; \ 16 | export PATH="$PATH;/usr/local/bin/chromedriver" 17 | 18 | # Build environment variables 19 | ENV TESSDATA_PREFIX=/home/$FRAPPE_USER/tessdata 20 | 21 | # Install Tesseract dependencies 22 | RUN set -ex; \ 23 | sudo apt-get update -q; \ 24 | sudo apt-get install -y --no-install-recommends \ 25 | ghostscript \ 26 | imagemagick \ 27 | libmagickwand-dev \ 28 | tesseract-ocr \ 29 | libtesseract-dev \ 30 | libleptonica-dev \ 31 | pkg-config \ 32 | ; \ 33 | sudo rm -rf /var/lib/apt/lists/*; \ 34 | mkdir -p "$TESSDATA_PREFIX"; \ 35 | sudo chown -R "${FRAPPE_USER}:${FRAPPE_USER}" "${TESSDATA_PREFIX}" ; \ 36 | curl -sS -o "${TESSDATA_PREFIX}/eng.traineddata" https://raw.github.com/tesseract-ocr/tessdata/master/eng.traineddata; \ 37 | curl -sS -o "${TESSDATA_PREFIX}/equ.traineddata" https://raw.github.com/tesseract-ocr/tessdata/master/equ.traineddata; \ 38 | curl -sS -o "${TESSDATA_PREFIX}/osd.traineddata" https://raw.github.com/tesseract-ocr/tessdata/master/osd.traineddata; \ 39 | curl -sS -o "${TESSDATA_PREFIX}/fra.traineddata" https://raw.github.com/tesseract-ocr/tessdata/master/fra.traineddata; \ 40 | curl -sS -o "${TESSDATA_PREFIX}/deu.traineddata" https://raw.github.com/tesseract-ocr/tessdata/master/deu.traineddata; \ 41 | curl -sS -o "${TESSDATA_PREFIX}/spa.traineddata" https://raw.github.com/tesseract-ocr/tessdata/master/spa.traineddata; \ 42 | curl -sS -o "${TESSDATA_PREFIX}/por.traineddata" https://raw.github.com/tesseract-ocr/tessdata/master/por.traineddata; \ 43 | sudo chmod -R 755 "${TESSDATA_PREFIX}" ; \ 44 | sudo sed -i \ 45 | -e 's/rights="none" pattern="PDF"/rights="read" pattern="PDF"/g' \ 46 | /etc/ImageMagick*/policy.xml \ 47 | ; \ 48 | sudo mkdir -p "/home/${FRAPPE_USER}"/frappe-bench/logs; \ 49 | sudo touch "/home/${FRAPPE_USER}"/frappe-bench/logs/bench.log; \ 50 | sudo chmod 777 \ 51 | "/home/${FRAPPE_USER}"/frappe-bench/logs \ 52 | "/home/${FRAPPE_USER}"/frappe-bench/logs/* \ 53 | ; 54 | 55 | # Build environment variables 56 | ARG FRAPPE_APP_TO_TEST=${FRAPPE_APP_TO_TEST} 57 | 58 | # Copy the whole repository to app folder for manual install 59 | COPY --chown=frappe:frappe . "/home/${FRAPPE_USER}/frappe-bench/apps/${FRAPPE_APP_TO_TEST}" 60 | 61 | # Install current app 62 | RUN set -ex; \ 63 | ./env/bin/pip install -q -U -e "./apps/${FRAPPE_APP_TO_TEST}"; \ 64 | bench build --app "${FRAPPE_APP_TO_TEST}" 65 | 66 | VOLUME "/home/${FRAPPE_USER}/frappe-bench/apps/${FRAPPE_APP_TO_TEST}/public" 67 | -------------------------------------------------------------------------------- /Dockerfile.test: -------------------------------------------------------------------------------- 1 | FROM docker-erpnext:erpnext_ocr-dev 2 | 3 | ADD .travis/docker_test.sh /docker_test.sh 4 | 5 | RUN set -ex; \ 6 | sudo chmod 755 /docker_test.sh; \ 7 | sudo pip install coverage==4.5.4; \ 8 | sudo pip install python-coveralls 9 | 10 | EXPOSE 4444 11 | 12 | # Default Chrome configuration 13 | ENV DISPLAY=:20.0 \ 14 | SCREEN_GEOMETRY="1440x900x24" \ 15 | CHROMEDRIVER_PORT=4444 \ 16 | CHROMEDRIVER_WHITELISTED_IPS="127.0.0.1" \ 17 | CHROMEDRIVER_URL_BASE='' \ 18 | CHROMEDRIVER_EXTRA_ARGS='' 19 | 20 | # Test environment variables 21 | ENV TEST_VERSION=11 22 | 23 | CMD ["/docker_test.sh"] 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Monogramm 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include requirements.txt 3 | include *.json 4 | include *.md 5 | include *.py 6 | include *.txt 7 | recursive-include erpnext_ocr *.css 8 | recursive-include erpnext_ocr *.csv 9 | recursive-include erpnext_ocr *.html 10 | recursive-include erpnext_ocr *.ico 11 | recursive-include erpnext_ocr *.js 12 | recursive-include erpnext_ocr *.json 13 | recursive-include erpnext_ocr *.md 14 | recursive-include erpnext_ocr *.png 15 | recursive-include erpnext_ocr *.py 16 | recursive-include erpnext_ocr *.svg 17 | recursive-include erpnext_ocr *.txt 18 | recursive-exclude erpnext_ocr *.pyc -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License: MIT][uri_license_image]][uri_license] 2 | [![Managed with Taiga.io](https://img.shields.io/badge/managed%20with-TAIGA.io-709f14.svg)](https://tree.taiga.io/project/monogrammbot-monogrammerpnext_ocr/ "Managed with Taiga.io") 3 | [![Build Status](https://travis-ci.org/Monogramm/erpnext_ocr.svg)](https://travis-ci.org/Monogramm/erpnext_ocr) 4 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/e154ec72926346d4ba4951c25d906d33)](https://www.codacy.com/gh/Monogramm/erpnext_ocr?utm_source=github.com&utm_medium=referral&utm_content=Monogramm/erpnext_ocr&utm_campaign=Badge_Grade) 5 | [![Coverage Status](https://coveralls.io/repos/github/Monogramm/erpnext_ocr/badge.svg?branch=master)](https://coveralls.io/github/Monogramm/erpnext_ocr?branch=master) 6 | 7 | ## ERPNext OCR 8 | 9 | > :alembic: **Experimental** Frappe OCR application with [tesseract](https://github.com/tesseract-ocr/tesseract). 10 | 11 | This project is a fork of [ERPNext-OCR](https://github.com/jvfiel/ERPNext-OCR) by [John Vincent Fiel](https://github.com/jvfiel). Its aim is to fix and cleanup the original source code and add some new features. 12 | 13 | Check out more on [ERPNext Discuss](https://discuss.erpnext.com/t/erpnext-ocr-app/33834/7). 14 | 15 | ## :chart_with_upwards_trend: Changes 16 | 17 | See [CHANGELOG](./CHANGELOG.md) 18 | 19 | ## :bookmark: Roadmap 20 | 21 | See [Taiga.io](https://tree.taiga.io/project/monogrammbot-monogrammerpnext_ocr/ "Taiga.io monogrammbot-monogrammerpnext_ocr") 22 | 23 | ## :construction: Install 24 | 25 | ### Pre-requisites: tesseract-python and imagemagick 26 | 27 | Install tesseract-ocr, plus imagemagick and ghostscript (to work with pdf files) using this command on Debian: 28 | 29 | ```sh 30 | sudo apt-get install tesseract-ocr imagemagick libmagickwand-dev ghostscript 31 | ``` 32 | 33 | ### Install Frappe application 34 | 35 | ```sh 36 | bench get-app --branch develop erpnext_ocr https://github.com/Monogramm/erpnext_ocr 37 | bench install-app erpnext_ocr 38 | ``` 39 | 40 | When installing Frappe app, the following python requirements will be installed: 41 | 42 | - python binding for tesseract, [tesserocr](https://pypi.org/project/tesserocr/) 43 | 44 | - image processing library in python, [pillow](https://pypi.org/project/Pillow/) 45 | 46 | - HTTP library in python, [requests](https://pypi.org/project/requests/) 47 | 48 | - python binding for imagemagick, [wand](https://pypi.org/project/Wand/) 49 | 50 | ## :rocket: Usage 51 | 52 | **File Being Read**: 53 | 54 | ![File Being Read](./erpnext_ocr/tests/test_data/Picture_010.png) 55 | 56 | **Sample Screenshot**: 57 | 58 | ![Sample Screenshot](./erpnext_ocr/tests/test_data/Picture_010_screenshot.png) 59 | 60 | ### Tesseract trained data 61 | 62 | In order to use OCR with different languages, you need to install the appropriate trained data files. 63 | Check tesseract Wiki for details: 64 | 65 | ### Development 66 | 67 | If you wish to develop or just test locally this application, you can use `docker-compose up -d` at the root of the this repository. 68 | You can then access your ERPNext OCR dev env at `http://localhost:8080`. 69 | 70 | ### Known issues 71 | 72 | - `wand.exceptions.PolicyError: not authorized '/opt/sample.pdf' @ error/constitute.c/ReadImage/412` 73 | 74 | - This can happen due to security configuration in imagemagick preventing it to read PDF files. 75 | 76 | - Reference: 77 | - 78 | - 79 | 80 | - `wand.exceptions.WandRuntimeError: MagickReadImage returns false, but did raise ImageMagick exception. This can occurs when a delegate is missing, or returns EXIT_SUCCESS without generating a raster.` 81 | 82 | - This might happen if you're missing a dependency to convert PDF, most of the time `ghostscript` 83 | 84 | - References: 85 | - 86 | 87 | - `OSError: encoder error -2 when writing image file` 88 | 89 | - This might happen when trying to open a TIFF image, but the real error is "_hidden_" and only displayed in console. 90 | - If the original error in console is `Fax3SetupState: Bits/sample must be 1 for Group 3/4 encoding/decoding.` that usually happens when TIFF image compression is not valid / recognized. 91 | 92 | ## :white_check_mark: Run tests 93 | 94 | ```sh 95 | bench run-tests --app erpnext_ocr 96 | ``` 97 | 98 | ## :bust_in_silhouette: Authors 99 | 100 | **Monogramm** 101 | 102 | - Website: 103 | - Github: [@Monogramm](https://github.com/Monogramm) 104 | 105 | **John Vincent Fiel** 106 | 107 | - Github: [@jvfiel](https://github.com/jvfiel) 108 | 109 | ## :handshake: Contributing 110 | 111 | Contributions, issues and feature requests are welcome!
Feel free to check [issues page](https://github.com/Monogramm/erpnext_ocr/issues). 112 | [Check the contributing guide](./CONTRIBUTING.md).
113 | 114 | ## :thumbsup: Show your support 115 | 116 | Give a :star: if this project helped you! 117 | 118 | ## :page_facing_up: License 119 | 120 | Copyright © 2019 [Monogramm](https://github.com/Monogramm).
121 | This project is [MIT](uri_license) licensed. 122 | 123 | * * * 124 | 125 | _This README was generated with :heart: by [readme-md-generator](https://github.com/kefranabg/readme-md-generator)_ 126 | 127 | [uri_license]: https://opensource.org/licenses/MIT 128 | 129 | [uri_license_image]: https://img.shields.io/badge/license-MIT-blue 130 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | # https://docs.docker.com/docker-hub/builds/automated-testing/ 5 | sut: 6 | build: 7 | context: ./ 8 | dockerfile: Dockerfile.test 9 | command: sh /docker_test.sh 10 | # Only for travis-ci, do not name test container in dockerhub 11 | container_name: sut 12 | depends_on: 13 | - erpnext_db 14 | - erpnext_app 15 | - erpnext_web 16 | - erpnext_scheduler 17 | - erpnext_worker_default 18 | - erpnext_worker_long 19 | - erpnext_worker_short 20 | - erpnext_socketio 21 | - erpnext_redis_cache 22 | - erpnext_redis_queue 23 | - erpnext_redis_socketio 24 | environment: 25 | # Container setup 26 | - NODE_TYPE=test 27 | # Docker setup 28 | - DOCKER_APPS_TIMEOUT=900 29 | - DOCKER_DEBUG=1 30 | volumes_from: 31 | - erpnext_app 32 | volumes: 33 | - /etc/localtime:/etc/localtime:ro 34 | - /etc/timezone:/etc/timezone:ro 35 | 36 | erpnext_app: 37 | build: 38 | context: ./ 39 | dockerfile: Dockerfile 40 | args: 41 | - FRAPPE_APP_TO_TEST=${FRAPPE_APP_TO_TEST} 42 | image: ${IMAGE_NAME} 43 | container_name: erpnext_app 44 | command: app 45 | #restart: always 46 | ports: 47 | - 8000:8000 48 | depends_on: 49 | - erpnext_db 50 | links: 51 | - erpnext_db 52 | environment: 53 | # Docker setup 54 | - DOCKER_DB_ALLOWED_HOSTS= 55 | - DOCKER_APPS_TIMEOUT=900 56 | # Frappe setup 57 | - FRAPPE_APP_INIT=frappe erpnext ${FRAPPE_APP_TO_TEST} 58 | - FRAPPE_DEFAULT_PROTOCOL=http:// 59 | - FRAPPE_DEFAULT_SITE=${ERPNEXT_SITE} 60 | - FRAPPE_LOGGING=1 61 | - DEVELOPER_MODE=1 62 | - ALLOW_TESTS=1 63 | #- Amin user setup 64 | - ADMIN_PASSWORD=${ERPNEXT_ADMIN_PWD} 65 | - ENCRYPTION_KEY=${ERPNEXT_ENCRYPTION_KEY} 66 | # Database setup 67 | - DB_TYPE=mariadb 68 | - DB_HOST=erpnext_db 69 | - DB_PORT=3306 70 | - DB_NAME=${ERPNEXT_DB_NAME} 71 | - DB_PASSWORD=${ERPNEXT_DB_PWD} 72 | - DB_ROOT_PASSWORD=${ERPNEXT_DB_ROOT_PWD} 73 | # Mail setup 74 | - MAIL_MUTED=true 75 | # Redis setup 76 | - REDIS_CACHE_HOST=erpnext_redis_cache 77 | - REDIS_QUEUE_HOST=erpnext_redis_queue 78 | - REDIS_SOCKETIO_HOST=erpnext_redis_socketio 79 | volumes: 80 | # Mount the whole repository to app folder for live developments (untested) 81 | #- .:/home/frappe/frappe-bench/apps/${FRAPPE_APP_TO_TEST} 82 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 83 | - ${ERPNEXT_HOME}/logs:/home/frappe/frappe-bench/logs 84 | - /etc/localtime:/etc/localtime:ro 85 | - /etc/timezone:/etc/timezone:ro 86 | 87 | erpnext_web: 88 | image: nginx:alpine 89 | container_name: erpnext_web 90 | #restart: always 91 | volumes: 92 | - .travis/docker-nginx.conf:/etc/nginx/conf.d/default.conf:ro 93 | # If you need SSL connection, you can provide your own certificates 94 | # - ./certs:/etc/letsencrypt 95 | # - ./certs-data:/data/letsencrypt 96 | volumes_from: 97 | - erpnext_app 98 | depends_on: 99 | - erpnext_app 100 | ports: 101 | - 8080:80 102 | # If you need SSL connection 103 | # - '8443:443' 104 | links: 105 | - erpnext_app 106 | - erpnext_socketio 107 | 108 | erpnext_db: 109 | image: mariadb:10 110 | container_name: erpnext_db 111 | #restart: always 112 | command: --character_set_client=utf8 --bind-address=0.0.0.0 --character-set-client-handshake=FALSE --character-set-server=utf8mb4 --collation-server=utf8mb4_unicode_ci --sql-mode="ALLOW_INVALID_DATES" 113 | environment: 114 | - MYSQL_ROOT_PASSWORD=${ERPNEXT_DB_ROOT_PWD} 115 | # Following parameters are not needed with Frappe 12 or higher since it will create DB itself 116 | - MYSQL_DATABASE=${ERPNEXT_DB_NAME} 117 | - MYSQL_USER=${ERPNEXT_DB_NAME} 118 | - MYSQL_PASSWORD=${ERPNEXT_DB_PWD} 119 | volumes: 120 | - /srv/erpnext/db:/var/lib/mysql 121 | - /etc/localtime:/etc/localtime:ro 122 | - /etc/timezone:/etc/timezone:ro 123 | 124 | erpnext_scheduler: 125 | #build: ./ 126 | image: ${IMAGE_NAME} 127 | container_name: erpnext_scheduler 128 | command: scheduler 129 | #restart: always 130 | depends_on: 131 | - erpnext_app 132 | environment: 133 | # Docker setup 134 | - DOCKER_APPS_TIMEOUT=900 135 | volumes: 136 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 137 | - /etc/localtime:/etc/localtime:ro 138 | - /etc/timezone:/etc/timezone:ro 139 | 140 | erpnext_worker_default: 141 | #build: ./ 142 | image: ${IMAGE_NAME} 143 | container_name: erpnext_worker_default 144 | command: worker-default 145 | #restart: always 146 | depends_on: 147 | - erpnext_app 148 | environment: 149 | # Docker setup 150 | - DOCKER_APPS_TIMEOUT=900 151 | volumes: 152 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 153 | - /etc/localtime:/etc/localtime:ro 154 | - /etc/timezone:/etc/timezone:ro 155 | 156 | erpnext_worker_long: 157 | #build: ./ 158 | image: ${IMAGE_NAME} 159 | container_name: erpnext_worker_long 160 | command: worker-long 161 | #restart: always 162 | depends_on: 163 | - erpnext_app 164 | environment: 165 | # Docker setup 166 | - DOCKER_APPS_TIMEOUT=900 167 | volumes: 168 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 169 | - /etc/localtime:/etc/localtime:ro 170 | - /etc/timezone:/etc/timezone:ro 171 | 172 | erpnext_worker_short: 173 | #build: ./ 174 | image: ${IMAGE_NAME} 175 | container_name: erpnext_worker_short 176 | command: worker-short 177 | #restart: always 178 | depends_on: 179 | - erpnext_app 180 | environment: 181 | # Docker setup 182 | - DOCKER_APPS_TIMEOUT=900 183 | volumes: 184 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 185 | - /etc/localtime:/etc/localtime:ro 186 | - /etc/timezone:/etc/timezone:ro 187 | 188 | erpnext_socketio: 189 | #build: ./ 190 | image: ${IMAGE_NAME} 191 | container_name: erpnext_socketio 192 | command: node-socketio 193 | #restart: always 194 | ports: 195 | - 3000:3000 196 | depends_on: 197 | - erpnext_app 198 | environment: 199 | # Docker setup 200 | - DOCKER_APPS_TIMEOUT=900 201 | volumes: 202 | - ${ERPNEXT_HOME}/sites:/home/frappe/frappe-bench/sites 203 | - /etc/localtime:/etc/localtime:ro 204 | - /etc/timezone:/etc/timezone:ro 205 | 206 | erpnext_redis_cache: 207 | image: redis:alpine 208 | container_name: erpnext_redis_cache 209 | #restart: always 210 | volumes: 211 | - ./services/erpnext/conf/redis_cache.conf:/etc/conf.d/redis.conf:ro 212 | command: ["redis-server","/etc/conf.d/redis.conf"] 213 | 214 | erpnext_redis_queue: 215 | image: redis:alpine 216 | container_name: erpnext_redis_queue 217 | #restart: always 218 | 219 | erpnext_redis_socketio: 220 | image: redis:alpine 221 | container_name: erpnext_redis_socketio 222 | #restart: always 223 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # **ERPNext OCR** Documentation site 2 | 3 | This directory contains the code for the **ERPNext OCR** docs site, [monogramm.github.io/erpnext_ocr](https://monogramm.github.io/erpnext_ocr). 4 | 5 | For the user guide and technical documentation, check the Frappe [app documentation](https://github.com/Monogramm/erpnext_ocr/blob/master/erpnext_ocr/docs) or compile it locally using [recod_frappe_devtools](https://github.com/Monogramm/recod_frappe_devtools). 6 | 7 | ## Contributing 8 | 9 | For information about contributing, see the [Contributing page](https://github.com/Monogramm/erpnext_ocr/blob/master/CONTRIBUTING.md). 10 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /docs/assets/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/docs/assets/.gitkeep -------------------------------------------------------------------------------- /docs/user/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/docs/user/.gitkeep -------------------------------------------------------------------------------- /erpnext_ocr/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | __version__ = '1.0.1' 5 | 6 | -------------------------------------------------------------------------------- /erpnext_ocr/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/config/__init__.py -------------------------------------------------------------------------------- /erpnext_ocr/config/desktop.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # See license.txt 4 | """Configuration for desktop.""" 5 | 6 | from __future__ import unicode_literals 7 | 8 | from frappe import _ 9 | 10 | 11 | def get_data(): 12 | """Returns the application desktop icons configuration.""" 13 | return [ 14 | { 15 | "module_name": "OCR Read", 16 | "_doctype": "OCR Read", 17 | "color": "#00bcd4", 18 | "icon": "fa fa-eye", 19 | "type": "link", 20 | "link": "List/OCR Read" 21 | }, 22 | 23 | { 24 | "module_name": "ERPNext OCR", 25 | "color": "#00bcd4", 26 | "icon": "octicon octicon-eye", 27 | "type": "module", 28 | "label": _("ERPNext OCR"), 29 | "hidden": 1 30 | } 31 | ] 32 | -------------------------------------------------------------------------------- /erpnext_ocr/config/docs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # See license.txt 4 | """Configuration for docs.""" 5 | 6 | from __future__ import unicode_literals 7 | 8 | 9 | source_link = "https://github.com/Monogramm/erpnext_ocr" 10 | docs_base_url = "https://monogramm.github.io/erpnext_ocr" 11 | headline = "ERPNext OCR Integration" 12 | sub_heading = "Optical Character Recognition using tesseract within ERPNext" 13 | 14 | 15 | def get_context(context): 16 | """Returns the application documentation context. 17 | 18 | :param context: application documentation context""" 19 | context.brand_html = "ERPNext OCR" 20 | context.source_link = source_link 21 | context.docs_base_url = docs_base_url 22 | context.headline = headline 23 | context.sub_heading = sub_heading 24 | -------------------------------------------------------------------------------- /erpnext_ocr/config/erpnext_ocr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # See license.txt 4 | """Configuration for desktop.""" 5 | 6 | from __future__ import unicode_literals 7 | 8 | from frappe import _ 9 | 10 | 11 | def get_data(): 12 | """Returns the module desktop links configuration.""" 13 | return [ 14 | { 15 | "label": _("OCR Read"), 16 | "items": [ 17 | { 18 | "type": "doctype", 19 | "name": "OCR Read", 20 | "description": _("OCR Read"), 21 | } 22 | ] 23 | }, 24 | { 25 | "label": _("OCR Import"), 26 | "items": [ 27 | { 28 | "type": "doctype", 29 | "name": "OCR Import", 30 | "description": _("OCR Import"), 31 | } 32 | ] 33 | } 34 | ] 35 | -------------------------------------------------------------------------------- /erpnext_ocr/docs/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | -------------------------------------------------------------------------------- /erpnext_ocr/docs/assets/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/docs/assets/.gitkeep -------------------------------------------------------------------------------- /erpnext_ocr/docs/index.md: -------------------------------------------------------------------------------- 1 | # **ERPNext OCR** Documentation 2 | 3 | This is the Documentation for **ERPNext OCR**. 4 | 5 | ## Docs 6 | 7 | See GitHub Pages at [monogramm.github.io/erpnext_ocr](https://monogramm.github.io/erpnext_ocr/). 8 | 9 | ## Contributing 10 | 11 | Contributions, issues and feature requests are welcome!
Feel free to check [issues page](https://github.com/Monogramm/erpnext_ocr/issues). 12 | [Check the contributing guide](./CONTRIBUTING.md).
13 | 14 | ## License 15 | 16 | Copyright © 2020 [Monogramm](https://github.com/Monogramm).
17 | This project is [MIT](https://opensource.org/licenses/MIT) licensed. 18 | -------------------------------------------------------------------------------- /erpnext_ocr/docs/user/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/docs/user/.gitkeep -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_import/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_import/ocr_import.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Monogramm and contributors 2 | // For license information, please see license.txt 3 | 4 | frappe.ui.form.on('OCR Import', { 5 | refresh: function(frm) { 6 | 7 | } 8 | }); 9 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_import/ocr_import.json: -------------------------------------------------------------------------------- 1 | { 2 | "allow_copy": 0, 3 | "allow_events_in_timeline": 0, 4 | "allow_guest_to_view": 0, 5 | "allow_import": 0, 6 | "allow_rename": 0, 7 | "autoname": "field:doctype_link", 8 | "beta": 0, 9 | "creation": "2019-11-25 16:37:26.611197", 10 | "custom": 0, 11 | "docstatus": 0, 12 | "doctype": "DocType", 13 | "document_type": "", 14 | "editable_grid": 1, 15 | "engine": "InnoDB", 16 | "fields": [ 17 | { 18 | "allow_bulk_edit": 0, 19 | "allow_in_quick_entry": 0, 20 | "allow_on_submit": 0, 21 | "bold": 0, 22 | "collapsible": 0, 23 | "columns": 0, 24 | "fetch_if_empty": 0, 25 | "fieldname": "doctype_link", 26 | "fieldtype": "Link", 27 | "hidden": 0, 28 | "ignore_user_permissions": 0, 29 | "ignore_xss_filter": 0, 30 | "in_filter": 0, 31 | "in_global_search": 0, 32 | "in_list_view": 1, 33 | "in_standard_filter": 1, 34 | "label": "Doctype", 35 | "length": 0, 36 | "no_copy": 0, 37 | "options": "DocType", 38 | "permlevel": 0, 39 | "precision": "", 40 | "print_hide": 0, 41 | "print_hide_if_no_value": 0, 42 | "read_only": 0, 43 | "remember_last_selected_value": 0, 44 | "report_hide": 0, 45 | "reqd": 0, 46 | "search_index": 0, 47 | "set_only_once": 0, 48 | "translatable": 0, 49 | "unique": 1 50 | }, 51 | { 52 | "allow_bulk_edit": 0, 53 | "allow_in_quick_entry": 0, 54 | "allow_on_submit": 0, 55 | "bold": 0, 56 | "collapsible": 0, 57 | "columns": 0, 58 | "fetch_if_empty": 0, 59 | "fieldname": "mappings", 60 | "fieldtype": "Table", 61 | "hidden": 0, 62 | "ignore_user_permissions": 0, 63 | "ignore_xss_filter": 0, 64 | "in_filter": 0, 65 | "in_global_search": 0, 66 | "in_list_view": 0, 67 | "in_standard_filter": 0, 68 | "label": "Mappings", 69 | "length": 0, 70 | "no_copy": 0, 71 | "options": "OCR Import Mapping", 72 | "permlevel": 0, 73 | "precision": "", 74 | "print_hide": 0, 75 | "print_hide_if_no_value": 0, 76 | "read_only": 0, 77 | "remember_last_selected_value": 0, 78 | "report_hide": 0, 79 | "reqd": 0, 80 | "search_index": 0, 81 | "set_only_once": 0, 82 | "translatable": 0, 83 | "unique": 0 84 | } 85 | ], 86 | "has_web_view": 0, 87 | "hide_heading": 0, 88 | "hide_toolbar": 0, 89 | "idx": 0, 90 | "image_view": 0, 91 | "in_create": 0, 92 | "is_submittable": 0, 93 | "issingle": 0, 94 | "istable": 0, 95 | "max_attachments": 0, 96 | "modified": "2020-01-24 06:38:56.285558", 97 | "modified_by": "Administrator", 98 | "module": "ERPNext OCR", 99 | "name": "OCR Import", 100 | "name_case": "", 101 | "owner": "Administrator", 102 | "permissions": [ 103 | { 104 | "amend": 0, 105 | "cancel": 0, 106 | "create": 1, 107 | "delete": 1, 108 | "email": 1, 109 | "export": 1, 110 | "if_owner": 0, 111 | "import": 0, 112 | "permlevel": 0, 113 | "print": 1, 114 | "read": 1, 115 | "report": 1, 116 | "role": "System Manager", 117 | "set_user_permissions": 0, 118 | "share": 1, 119 | "submit": 0, 120 | "write": 1 121 | }, 122 | { 123 | "amend": 0, 124 | "cancel": 0, 125 | "create": 1, 126 | "delete": 1, 127 | "email": 1, 128 | "export": 1, 129 | "if_owner": 0, 130 | "import": 0, 131 | "permlevel": 0, 132 | "print": 1, 133 | "read": 1, 134 | "report": 1, 135 | "role": "ERPNext OCR Manager", 136 | "set_user_permissions": 0, 137 | "share": 1, 138 | "submit": 0, 139 | "write": 1 140 | } 141 | ], 142 | "quick_entry": 1, 143 | "read_only": 0, 144 | "read_only_onload": 0, 145 | "show_name_in_global_search": 0, 146 | "sort_field": "modified", 147 | "sort_order": "DESC", 148 | "title_field": "doctype_link", 149 | "track_changes": 1, 150 | "track_seen": 0, 151 | "track_views": 0 152 | } -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_import/ocr_import.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and contributors 3 | # For license information, please see license.txt 4 | 5 | from __future__ import unicode_literals 6 | 7 | import re 8 | 9 | import frappe 10 | from frappe.model.document import Document 11 | 12 | from erpnext_ocr.erpnext_ocr.doctype.ocr_import_mapping.ocr_import_mapping import find_field, generate_child_doctype 13 | 14 | 15 | class OCRImport(Document): 16 | pass 17 | 18 | 19 | @frappe.whitelist() 20 | def generate_doctype(doctype_import_link, read_result, ignore_mandatory=False, ignore_validate=False): 21 | """ 22 | Generate doctype from raw text. 23 | :param ignore_validate: Ignore validation 24 | :param ignore_mandatory: Ignore mandatory fields 25 | :param doctype_import_link: 26 | :param read_result: text from document 27 | :return: generated doctype 28 | """ 29 | 30 | doctype_import_doc = frappe.get_doc("OCR Import", doctype_import_link) 31 | generated_doc = frappe.new_doc(doctype_import_link) 32 | 33 | if ignore_mandatory: 34 | generated_doc.company = "_Test Company" 35 | generated_doc.price_list = "_Test Price List" 36 | generated_doc.flags.ignore_mandatory = ignore_mandatory 37 | generated_doc.flags.ignore_validate = ignore_validate 38 | 39 | list_with_errors = [] 40 | list_with_table_values = [] 41 | 42 | for field in doctype_import_doc.mappings: 43 | try: 44 | found_field = find_field(field, read_result) 45 | if found_field is None: 46 | frappe.throw( 47 | frappe._("Cannot find field '{0}' in text").format(field.field)) 48 | if field.value_type == "Table": 49 | iter_of_str = re.finditer(field.regexp, read_result) 50 | for item_match in iter_of_str: 51 | raw_table_doc = generated_doc.append(field.field) 52 | raw_table_doc.flags.ignore_mandatory = ignore_mandatory 53 | item_str = item_match.group() 54 | table_doc = generate_child_doctype(field.link_to_child_doc, 55 | item_str, 56 | raw_table_doc) 57 | list_with_table_values.append(table_doc) 58 | generated_doc.__dict__[ 59 | field.field] = list_with_table_values 60 | elif field.value_type == "Date": 61 | generated_doc.__dict__[ 62 | field.field] = frappe.utils.get_datetime(found_field) 63 | else: 64 | generated_doc.__dict__[field.field] = found_field 65 | except KeyError: 66 | list_with_errors.append("Field {} doesn't exist in doctype". 67 | format(doctype_import_doc)) 68 | if list_with_errors: 69 | frappe.throw(list_with_errors) 70 | try: 71 | generated_doc.set_new_name() 72 | generated_doc.insert() 73 | except frappe.exceptions.DuplicateEntryError: 74 | frappe.throw("Generated doc already exists") 75 | return generated_doc 76 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_import/test_ocr_import.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | // rename this file from _test_[name] to test_[name] to activate 3 | // and remove above this line 4 | 5 | QUnit.test("test: OCR Import", function (assert) { 6 | let done = assert.async(); 7 | 8 | // number of asserts 9 | assert.expect(1); 10 | 11 | frappe.run_serially([ 12 | // insert a new OCR Import 13 | () => frappe.tests.make('OCR Import', [ 14 | // values to be set 15 | {key: 'value'} 16 | ]), 17 | () => { 18 | assert.equal(cur_frm.doc.key, 'value'); 19 | }, 20 | () => done() 21 | ]); 22 | 23 | }); 24 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_import/test_ocr_import.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # For license information, please see license.txt 4 | from __future__ import unicode_literals 5 | 6 | import datetime 7 | import os 8 | import unittest 9 | 10 | import frappe 11 | 12 | from erpnext_ocr.erpnext_ocr.doctype.ocr_import.ocr_import import generate_doctype 13 | 14 | # test recodes will automatically be generated by frappe 15 | test_data = frappe.get_test_records('OCR Import') 16 | 17 | TEST_RESULT_FOR_SI = ''' 18 | \n\nD. Brawn Manufacture\n\nInvoice no. DVT-AX-345678\n\nPayment date: 03/12/2006\n\n \n\n 19 | \n\n \n\n \n\nReference Designation ty Unit price “Total CHF Sales\nWork\nSERVICE D COMPLETE OVERHAUL 1 5500.00 500,00 220\nSERVICE D REFRESHING COMPLETE CASE 1 380,00 380.00 220\nAND RHODIUM BATH\nExterior parts:\nJO.297.085.FP FLAT GASKET. 1 3.00 3.00 220\nJO.197.075.FP FLAT GASKET 1 4.00 4.00 220\nJO.199.059.08 FLAT ROUND GASKET 1 6.00 600 220\nVi.261.036.8C W.G.FIXATION SCREWS 10 4.00 40.00 220\nAL465.085.BC WHITE GOLD "FOIL" 1 70.00 70.00 220\nPAIR OF HAND\nLENGTH: 10/13.50MM\nCALIBRE 2868\nSPECIAL DISCOUNT -3003.00 — -3003.00\nDiscount “500.00 “800.00\nTotal CHF 2100.00\nRETURN AFTER REPAIR\n\nNO COMMERCIAL VALUE\n''' 20 | 21 | TEST_RESULT_FOR_ITEM = ''' 22 | Series STO-ITEM-.YYYY.-\n\nItem Code fdsa\n\nItem Name fdsa\n\nItem Group Consumable\nDefault Unit of Measure Nos\n\nfdsa\n\nShelf Life In Days\n\nEnd of Life 31-12-2099\nDefault Material Request Purchase\nType\n\nSr UOM\n\n1 Nos\n\nMax Sample Quantity\n\nVariant Based On\n\nSr Company\n\n1 fdsa\n\nIs Purchase Item\nMinimum Order Qty\n\nSafety Stock\nCountry of Origin Russian Federation\n\nIs Sales Item\n\nWeightage\n\nMaintain Stock\n\nInclude Item In Manufacturing\n\nOpening Stock\n\nValuation Rate\nStandard Selling Rate\n\nAllow over delivery or receipt\nupto this percent\n\n0 Weight Per Unit\n\nItem Attribute\n\nDefault Warehouse\n\nStores - F\n\nv Lead Time in days\n\n0 Last Purchase Rate\n\nv Max Discount (%)\nNo of Months\n\nNo of Months\n\nv\n\n0\nRUB 0,00\n\nRUB 123,00\n\n0\n\nConversion Factor\n\n‘l\n 23 | ''' 24 | 25 | 26 | class TestOCRImport(unittest.TestCase): 27 | def setUp(self): 28 | before_tests() 29 | frappe.set_user("Administrator") 30 | self.item_ocr_read = frappe.get_doc( 31 | {"doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), 32 | os.path.pardir, os.path.pardir, 33 | os.path.pardir, 34 | "tests", "test_data", 35 | "item.pdf"), "language": "eng"}) 36 | self.item_ocr_read.ocr_import = "Item" 37 | # self.item_ocr_read.read_image() 38 | self.item_ocr_read.read_result = TEST_RESULT_FOR_ITEM 39 | 40 | # Creating OCR Read doctype for Sales Invoice 41 | self.sales_invoice_ocr_read = frappe.get_doc( 42 | {"doctype": "OCR Read", "file_to_read": os.path.join(os.path.dirname(__file__), 43 | os.path.pardir, os.path.pardir, 44 | os.path.pardir, 45 | "tests", "test_data", 46 | "Picture_010.png"), "language": "eng"}) 47 | # self.sales_invoice_ocr_read.read_image() 48 | self.sales_invoice_ocr_read.read_result = TEST_RESULT_FOR_SI 49 | if frappe.__version__[:2] != "10": 50 | comp = frappe.get_doc("Company", "_Test Company") 51 | comp.stock_adjustment_account = frappe.get_all("Account")[0]['name'] 52 | comp.save() 53 | global_default = frappe.get_doc("Global Defaults") 54 | global_default.default_company = "_Test Company" 55 | global_default.current_fiscal_year = '_Test Fiscal Year 2012' 56 | global_default.save() 57 | 58 | def tearDown(self): 59 | self.item_ocr_read.delete() 60 | self.sales_invoice_ocr_read.delete() 61 | stock_entries = frappe.get_all("Stock Entry", 62 | filters=[['total_amount', '=', '15129'], ['docstatus', '!=', '2']]) 63 | for entry in stock_entries: 64 | frappe.get_doc("Stock Entry", entry).cancel() 65 | 66 | def test_generate_doctype_item(self): 67 | item_ocr_import = frappe.get_doc("OCR Import", "Item") 68 | generated_item = generate_doctype(item_ocr_import.name, self.item_ocr_read.read_result, ignore_mandatory=True) 69 | self.assertEqual(generated_item.item_code, "fdsa") 70 | self.assertEqual(generated_item.item_group, "Consumable") 71 | generated_item.delete() 72 | 73 | def test_generating_sales_invoice(self): 74 | set_date_format("dd/mm/yyyy") 75 | sales_invoice_ocr_import = frappe.get_doc("OCR Import", "Sales Invoice") 76 | self.assertRaises(frappe.ValidationError, generate_doctype, sales_invoice_ocr_import.name, 77 | self.sales_invoice_ocr_read.read_result) # Due date before now 78 | # read_result = self.sales_invoice_ocr_read.read_result.encode('ascii', errors="ignore").replace("03/12/2006", 79 | # "03/12/2099") 80 | read_result = self.sales_invoice_ocr_read.read_result 81 | sales_invoice = generate_doctype(sales_invoice_ocr_import.name, read_result, ignore_mandatory=True, 82 | ignore_validate=True) 83 | 84 | self.assertEqual(sales_invoice.due_date, datetime.datetime(2006, 3, 12, 0, 0)) 85 | self.assertEqual(sales_invoice.party_account_currency, 86 | frappe.get_doc("Company", frappe.get_all("Company")[0]).default_currency) 87 | 88 | 89 | def set_date_format(date_format): 90 | settings = frappe.get_doc("System Settings") 91 | settings.date_format = date_format 92 | settings.save() 93 | 94 | 95 | def before_tests(): 96 | frappe.clear_cache() 97 | # complete setup if missing 98 | from frappe.desk.page.setup_wizard.setup_wizard import setup_complete 99 | if not frappe.get_list("Company"): 100 | setup_complete({ 101 | "currency": "USD", 102 | "full_name": "Test User", 103 | "company_name": "Wind Power LLC", 104 | "timezone": "America/New_York", 105 | "company_abbr": "WP", 106 | "industry": "Manufacturing", 107 | "country": "United States", 108 | "fy_start_date": "2020-01-01", 109 | "fy_end_date": "2020-12-31", 110 | "language": "english", 111 | "company_tagline": "Testing", 112 | "email": "test@erpnext.com", 113 | "password": "test", 114 | "chart_of_accounts": "Standard", 115 | "domains": ["Manufacturing"], 116 | }) 117 | 118 | frappe.db.sql("delete from `tabLeave Allocation`") 119 | frappe.db.sql("delete from `tabLeave Application`") 120 | frappe.db.sql("delete from `tabSalary Slip`") 121 | frappe.db.sql("delete from `tabItem Price`") 122 | 123 | frappe.db.set_value("Stock Settings", None, "auto_insert_price_list_rate_if_missing", 0) 124 | 125 | frappe.db.commit() 126 | 127 | 128 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_import/test_records.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "buying": 1, 4 | "currency": "INR", 5 | "doctype": "Price List", 6 | "enabled": 1, 7 | "price_list_name": "_Test Price List", 8 | "selling": 1 9 | }, 10 | { 11 | "doctype": "Fiscal Year", 12 | "year": "_Test Fiscal Year 2012", 13 | "year_end_date": "2020-12-31", 14 | "year_start_date": "2020-01-01" 15 | }, 16 | { 17 | "abbr": "_TC", 18 | "company_name": "_Test Company", 19 | "country": "India", 20 | "default_currency": "INR", 21 | "doctype": "Company", 22 | "chart_of_accounts": "Standard" 23 | }, 24 | { 25 | "doctype": "Item Group", 26 | "is_group": 0, 27 | "item_group_name": "All Item Groups" 28 | }, 29 | { 30 | "doctype": "UOM", 31 | "uom_name": "Nos" 32 | }, 33 | { 34 | "doctype": "Item Group", 35 | "is_group": 0, 36 | "item_group_name": "Consumable", 37 | "parent_item_group": "All Item Groups" 38 | }, 39 | { 40 | "name": "Item", 41 | "owner": "Administrator", 42 | "creation": "2020-02-12 12:31:08.827979", 43 | "modified": "2020-02-12 12:31:25.637464", 44 | "modified_by": "Administrator", 45 | "idx": 0, 46 | "docstatus": 0, 47 | "doctype_link": "Item", 48 | "doctype": "OCR Import", 49 | "mappings": [ 50 | { 51 | "name": "e069f720c4", 52 | "owner": "Administrator", 53 | "creation": "2020-02-12 12:31:25.584898", 54 | "modified": "2020-02-12 12:31:25.637464", 55 | "modified_by": "Administrator", 56 | "parent": "Item", 57 | "parentfield": "mappings", 58 | "parenttype": "OCR Import", 59 | "idx": 1, 60 | "docstatus": 0, 61 | "field": "item_code", 62 | "regexp": "Item Code (\\w+)", 63 | "value": "0", 64 | "value_type": "Regex group", 65 | "doctype": "OCR Import Mapping" 66 | }, 67 | { 68 | "name": "7869fbf9c1", 69 | "owner": "Administrator", 70 | "creation": "2020-02-12 12:31:25.622633", 71 | "modified": "2020-02-12 12:31:25.637464", 72 | "modified_by": "Administrator", 73 | "parent": "Item", 74 | "parentfield": "mappings", 75 | "parenttype": "OCR Import", 76 | "idx": 2, 77 | "docstatus": 0, 78 | "field": "item_group", 79 | "regexp": "Item Group (\\w+)", 80 | "value": "0", 81 | "value_type": "Regex group", 82 | "doctype": "OCR Import Mapping" 83 | } 84 | ], 85 | "__last_sync_on": "2020-02-12T19:00:00.441Z" 86 | }, 87 | { 88 | "name": "Sales Invoice Item", 89 | "owner": "Administrator", 90 | "creation": "2020-02-15 13:28:43.222135", 91 | "modified": "2020-02-15 14:33:36.907480", 92 | "modified_by": "Administrator", 93 | "idx": 0, 94 | "docstatus": 0, 95 | "doctype_link": "Sales Invoice Item", 96 | "doctype": "OCR Import", 97 | "mappings": [ 98 | { 99 | "name": "9fdf7803ae", 100 | "owner": "Administrator", 101 | "creation": "2020-02-15 13:28:43.222135", 102 | "modified": "2020-02-15 14:33:36.907480", 103 | "modified_by": "Administrator", 104 | "parent": "Sales Invoice Item", 105 | "parentfield": "mappings", 106 | "parenttype": "OCR Import", 107 | "idx": 1, 108 | "docstatus": 0, 109 | "field": "item_code", 110 | "regexp": "(\\w+.\\w)", 111 | "value": "frappe.get_doc(\"Item\", pattern_result[0]).save().item_code if frappe.db.exists(\"Item\", pattern_result[0]) else frappe.get_doc({\"doctype\": \"Item\", \"item_code\": pattern_result[0],\"item_group\": \"Consumable\",\"stock_uom\":\"Nos\"}).insert().item_code", 112 | "value_type": "Python", 113 | "doctype": "OCR Import Mapping" 114 | }, 115 | { 116 | "name": "45b0d218c7", 117 | "owner": "Administrator", 118 | "creation": "2020-02-15 13:28:43.222135", 119 | "modified": "2020-02-15 14:33:36.907480", 120 | "modified_by": "Administrator", 121 | "parent": "Sales Invoice Item", 122 | "parentfield": "mappings", 123 | "parenttype": "OCR Import", 124 | "idx": 2, 125 | "docstatus": 0, 126 | "field": "item_name", 127 | "regexp": "(\\w+.\\w)", 128 | "value": "", 129 | "value_type": "Regex group", 130 | "doctype": "OCR Import Mapping" 131 | }, 132 | { 133 | "name": "87fa0af790", 134 | "owner": "Administrator", 135 | "creation": "2020-02-15 13:28:43.222135", 136 | "modified": "2020-02-15 14:33:36.907480", 137 | "modified_by": "Administrator", 138 | "parent": "Sales Invoice Item", 139 | "parentfield": "mappings", 140 | "parenttype": "OCR Import", 141 | "idx": 3, 142 | "docstatus": 0, 143 | "field": "qty", 144 | "regexp": "1", 145 | "value": "0", 146 | "value_type": "Regex group", 147 | "doctype": "OCR Import Mapping" 148 | }, 149 | { 150 | "name": "8250d8fc0e", 151 | "owner": "Administrator", 152 | "creation": "2020-02-15 13:28:43.222135", 153 | "modified": "2020-02-15 14:33:36.907480", 154 | "modified_by": "Administrator", 155 | "parent": "Sales Invoice Item", 156 | "parentfield": "mappings", 157 | "parenttype": "OCR Import", 158 | "idx": 4, 159 | "docstatus": 0, 160 | "field": "rate", 161 | "regexp": "[0-9]+", 162 | "value": "0", 163 | "value_type": "Regex group", 164 | "doctype": "OCR Import Mapping" 165 | }, 166 | { 167 | "name": "497dba9553", 168 | "owner": "Administrator", 169 | "creation": "2020-02-15 13:28:43.222135", 170 | "modified": "2020-02-15 14:33:36.907480", 171 | "modified_by": "Administrator", 172 | "parent": "Sales Invoice Item", 173 | "parentfield": "mappings", 174 | "parenttype": "OCR Import", 175 | "idx": 5, 176 | "docstatus": 0, 177 | "field": "description", 178 | "value": "frappe.get_doc(\"Item\", \"SERVICE D\").description", 179 | "value_type": "Python", 180 | "doctype": "OCR Import Mapping" 181 | }, 182 | { 183 | "name": "ce0b6da05b", 184 | "owner": "Administrator", 185 | "creation": "2020-02-15 13:28:43.222135", 186 | "modified": "2020-02-15 14:33:36.907480", 187 | "modified_by": "Administrator", 188 | "parent": "Sales Invoice Item", 189 | "parentfield": "mappings", 190 | "parenttype": "OCR Import", 191 | "idx": 6, 192 | "docstatus": 0, 193 | "field": "margin_rate_or_amount", 194 | "regexp": "[0-9]+\\.[0-9]{2}", 195 | "value": "0", 196 | "value_type": "Regex group", 197 | "doctype": "OCR Import Mapping" 198 | }, 199 | { 200 | "name": "3e1ec628fa", 201 | "owner": "Administrator", 202 | "creation": "2020-02-15 13:28:43.222135", 203 | "modified": "2020-02-15 14:33:36.907480", 204 | "modified_by": "Administrator", 205 | "parent": "Sales Invoice Item", 206 | "parentfield": "mappings", 207 | "parenttype": "OCR Import", 208 | "idx": 7, 209 | "docstatus": 0, 210 | "field": "uom", 211 | "value": "frappe.get_doc(\"Item\", \"SERVICE D\").uoms[0].uom", 212 | "value_type": "Python", 213 | "doctype": "OCR Import Mapping" 214 | }, 215 | { 216 | "name": "4a279903fc", 217 | "owner": "Administrator", 218 | "creation": "2020-02-15 13:28:43.222135", 219 | "modified": "2020-02-15 14:33:36.907480", 220 | "modified_by": "Administrator", 221 | "parent": "Sales Invoice Item", 222 | "parentfield": "mappings", 223 | "parenttype": "OCR Import", 224 | "idx": 8, 225 | "docstatus": 0, 226 | "field": "currency", 227 | "value": "frappe.get_doc(\"Company\", frappe.get_all(\"Company\")[0]).default_currency", 228 | "value_type": "Python", 229 | "doctype": "OCR Import Mapping" 230 | }, 231 | { 232 | "name": "e760c693f7", 233 | "owner": "Administrator", 234 | "creation": "2020-02-15 13:28:43.222135", 235 | "modified": "2020-02-15 14:33:36.907480", 236 | "modified_by": "Administrator", 237 | "parent": "Sales Invoice Item", 238 | "parentfield": "mappings", 239 | "parenttype": "OCR Import", 240 | "idx": 9, 241 | "docstatus": 0, 242 | "field": "amount", 243 | "regexp": "[0-9]+\\.[0-9]{2}", 244 | "value": "0", 245 | "value_type": "Regex group", 246 | "doctype": "OCR Import Mapping" 247 | }, 248 | { 249 | "name": "1c042f5542", 250 | "owner": "Administrator", 251 | "creation": "2020-02-15 13:28:43.222135", 252 | "modified": "2020-02-15 14:33:36.907480", 253 | "modified_by": "Administrator", 254 | "parent": "Sales Invoice Item", 255 | "parentfield": "mappings", 256 | "parenttype": "OCR Import", 257 | "idx": 10, 258 | "docstatus": 0, 259 | "field": "base_rate", 260 | "regexp": "[0-9]+", 261 | "value": "0", 262 | "value_type": "Regex group", 263 | "doctype": "OCR Import Mapping" 264 | }, 265 | { 266 | "name": "e189de4124", 267 | "owner": "Administrator", 268 | "creation": "2020-02-15 13:28:43.222135", 269 | "modified": "2020-02-15 14:33:36.907480", 270 | "modified_by": "Administrator", 271 | "parent": "Sales Invoice Item", 272 | "parentfield": "mappings", 273 | "parenttype": "OCR Import", 274 | "idx": 11, 275 | "docstatus": 0, 276 | "field": "income_account", 277 | "value": "frappe.get_doc(\"Company\", frappe.get_all(\"Company\")[0]).default_inventory_account", 278 | "value_type": "Python", 279 | "doctype": "OCR Import Mapping" 280 | }, 281 | { 282 | "name": "3c524e9633", 283 | "owner": "Administrator", 284 | "creation": "2020-02-15 13:28:43.222135", 285 | "modified": "2020-02-15 14:33:36.907480", 286 | "modified_by": "Administrator", 287 | "parent": "Sales Invoice Item", 288 | "parentfield": "mappings", 289 | "parenttype": "OCR Import", 290 | "idx": 12, 291 | "docstatus": 0, 292 | "field": "against_income_account", 293 | "value": "'0'", 294 | "value_type": "Python", 295 | "doctype": "OCR Import Mapping" 296 | }, 297 | { 298 | "name": "dcd6d8dc14", 299 | "owner": "Administrator", 300 | "creation": "2020-02-15 13:28:43.222135", 301 | "modified": "2020-02-15 14:33:36.907480", 302 | "modified_by": "Administrator", 303 | "parent": "Sales Invoice Item", 304 | "parentfield": "mappings", 305 | "parenttype": "OCR Import", 306 | "idx": 13, 307 | "docstatus": 0, 308 | "field": "base_amount", 309 | "regexp": "[0-9]+\\.[0-9]{2}", 310 | "value": "0", 311 | "value_type": "Regex group", 312 | "doctype": "OCR Import Mapping" 313 | }, 314 | { 315 | "name": "f64df0f1b8", 316 | "owner": "Administrator", 317 | "creation": "2020-02-15 13:28:43.222135", 318 | "modified": "2020-02-15 14:33:36.907480", 319 | "modified_by": "Administrator", 320 | "parent": "Sales Invoice Item", 321 | "parentfield": "mappings", 322 | "parenttype": "OCR Import", 323 | "idx": 14, 324 | "docstatus": 0, 325 | "field": "conversion_factor", 326 | "value": "'1'", 327 | "value_type": "Python", 328 | "doctype": "OCR Import Mapping" 329 | } 330 | ] 331 | }, 332 | { 333 | "name": "Sales Invoice", 334 | "owner": "Administrator", 335 | "creation": "2020-02-07 09:14: 39.429156", 336 | "modified": "2020-02-10 05: 27: 04.505324", 337 | "modified_by": "Administrator", 338 | "idx": 0, 339 | "docstatus": 0, 340 | "doctype_link": "Sales Invoice", 341 | "doctype": "OCR Import", 342 | "mappings": [ 343 | { 344 | "name": "5c88c4ab3e", 345 | "owner": "Administrator", 346 | "creation": "2020-02-07 09: 14: 39.429156", 347 | "modified": "2020-02-10 05: 27: 04.505324", 348 | "modified_by": "Administrator", 349 | "parent": "Sales Invoice", 350 | "parentfield": "mappings", 351 | "parenttype": "OCR Import", 352 | "idx": 1, 353 | "docstatus": 0, 354 | "field": "items", 355 | "regexp": ".*\\W\\d+\\W([-]?\\d+[\\.,]\\d{2})\\W([-]?\\d+[\\.,]\\d{2})", 356 | "value": "0", 357 | "value_type": "Table", 358 | "link_to_child_doc": "Sales Invoice Item", 359 | "doctype": "OCR Import Mapping" 360 | }, 361 | { 362 | "name": "38a9e868f4", 363 | "owner": "Administrator", 364 | "creation": "2020-02-07 09: 14: 39.429156", 365 | "modified": "2020-02-10 05:27: 04.505324", 366 | "modified_by": "Administrator", 367 | "parent": "Sales Invoice", 368 | "parentfield": "mappings", 369 | "parenttype": "OCR Import", 370 | "idx": 2, 371 | "docstatus": 0, 372 | "field": "name", 373 | "regexp": "qwert", 374 | "value": "0", 375 | "value_type": "Python", 376 | "doctype": "OCR Import Mapping" 377 | }, 378 | { 379 | "name": "196d7f562f", 380 | "owner": "Administrator", 381 | "creation": "2020-02-07 09: 14: 39.429156", 382 | "modified": "2020-02-10 05:27: 04.505324", 383 | "modified_by": "Administrator", 384 | "parent": "Sales Invoice", 385 | "parentfield": "mappings", 386 | "parenttype": "OCR Import", 387 | "idx": 3, 388 | "docstatus": 0, 389 | "field": "due_date", 390 | "regexp": "\\d{2}/\\d{2}/\\d{4}", 391 | "value": "0", 392 | "value_type": "Date", 393 | "doctype": "OCR Import Mapping" 394 | }, 395 | { 396 | "name": "5984501ce3", 397 | "owner": "Administrator", 398 | "creation": "2020-02-07 09:14: 39.429156", 399 | "modified": "2020-02-10 05: 27: 04.505324", 400 | "modified_by": "Administrator", 401 | "parent": "Sales Invoice", 402 | "parentfield": "mappings", 403 | "parenttype": "OCR Import", 404 | "idx": 4, 405 | "docstatus": 0, 406 | "field": "against_income_account", 407 | "regexp": "", 408 | "value": "frappe.get_doc(\"Company\", frappe.get_all(\"Company\")[0]).default_receivable_account", 409 | "value_type": "Python", 410 | "doctype": "OCR Import Mapping" 411 | }, 412 | { 413 | "name": "56f7630967", 414 | "owner": "Administrator", 415 | "creation": "2020-02-07 09:14:39.429156", 416 | "modified": "2020-02-10 05:27:04.505324", 417 | "modified_by": "Administrator", 418 | "parent": "Sales Invoice", 419 | "parentfield": "mappings", 420 | "parenttype": "OCR Import", 421 | "idx": 5, 422 | "docstatus": 0, 423 | "field": "party_account_currency", 424 | "regexp": "", 425 | "value": "frappe.get_doc(\"Company\", frappe.get_all(\"Company\")[0]).default_currency", 426 | "value_type": "Python", 427 | "doctype": "OCR Import Mapping" 428 | }, 429 | { 430 | "name": "fc0b298eec", 431 | "owner": "Administrator", 432 | "creation": "2020-02-07 09:14:39.429156", 433 | "modified": "2020-02-10 05:27:04.505324", 434 | "modified_by": "Administrator", 435 | "parent": "Sales Invoice", 436 | "parentfield": "mappings", 437 | "parenttype": "OCR Import", 438 | "idx": 6, 439 | "docstatus": 0, 440 | "field": "debit_to", 441 | "value": "frappe.get_doc(\"Company\", frappe.get_all(\"Company\")[0]).default_receivable_account", 442 | "value_type": "Python", 443 | "doctype": "OCR Import Mapping" 444 | }, 445 | { 446 | "name": "3d0d328325", 447 | "owner": "Administrator", 448 | "creation": "2020-02-07 09:14:39.429156", 449 | "modified": "2020-02-10 05:27:04.505324", 450 | "modified_by": "Administrator", 451 | "parent": "Sales Invoice", 452 | "parentfield": "mappings", 453 | "parenttype": "OCR Import", 454 | "idx": 7, 455 | "docstatus": 0, 456 | "field": "conversion_factor", 457 | "value": "'1'", 458 | "value_type": "Python", 459 | "doctype": "OCR Import Mapping" 460 | } 461 | ], 462 | "__last_sync_on": "2020-02-10T11:04:00.038Z" 463 | } 464 | ] -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_import_mapping/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_import_mapping/ocr_import_mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "allow_copy": 0, 3 | "allow_events_in_timeline": 0, 4 | "allow_guest_to_view": 0, 5 | "allow_import": 0, 6 | "allow_rename": 0, 7 | "beta": 0, 8 | "creation": "2019-11-25 16:38:02.878014", 9 | "custom": 0, 10 | "docstatus": 0, 11 | "doctype": "DocType", 12 | "document_type": "", 13 | "editable_grid": 1, 14 | "engine": "InnoDB", 15 | "fields": [ 16 | { 17 | "allow_bulk_edit": 0, 18 | "allow_in_quick_entry": 0, 19 | "allow_on_submit": 0, 20 | "bold": 0, 21 | "collapsible": 0, 22 | "columns": 0, 23 | "fetch_if_empty": 0, 24 | "fieldname": "field", 25 | "fieldtype": "Data", 26 | "hidden": 0, 27 | "ignore_user_permissions": 0, 28 | "ignore_xss_filter": 0, 29 | "in_filter": 0, 30 | "in_global_search": 0, 31 | "in_list_view": 1, 32 | "in_standard_filter": 1, 33 | "label": "Field", 34 | "length": 0, 35 | "no_copy": 0, 36 | "permlevel": 0, 37 | "precision": "", 38 | "print_hide": 0, 39 | "print_hide_if_no_value": 0, 40 | "read_only": 0, 41 | "remember_last_selected_value": 0, 42 | "report_hide": 0, 43 | "reqd": 1, 44 | "search_index": 0, 45 | "set_only_once": 0, 46 | "translatable": 0, 47 | "unique": 0 48 | }, 49 | { 50 | "allow_bulk_edit": 0, 51 | "allow_in_quick_entry": 0, 52 | "allow_on_submit": 0, 53 | "bold": 0, 54 | "collapsible": 0, 55 | "columns": 0, 56 | "fetch_if_empty": 0, 57 | "fieldname": "regexp", 58 | "fieldtype": "Data", 59 | "hidden": 0, 60 | "ignore_user_permissions": 0, 61 | "ignore_xss_filter": 0, 62 | "in_filter": 0, 63 | "in_global_search": 0, 64 | "in_list_view": 1, 65 | "in_standard_filter": 1, 66 | "label": "Regexp", 67 | "length": 0, 68 | "no_copy": 0, 69 | "permlevel": 0, 70 | "precision": "", 71 | "print_hide": 0, 72 | "print_hide_if_no_value": 0, 73 | "read_only": 0, 74 | "remember_last_selected_value": 0, 75 | "report_hide": 0, 76 | "reqd": 0, 77 | "search_index": 0, 78 | "set_only_once": 0, 79 | "translatable": 0, 80 | "unique": 0 81 | }, 82 | { 83 | "allow_bulk_edit": 0, 84 | "allow_in_quick_entry": 0, 85 | "allow_on_submit": 0, 86 | "bold": 0, 87 | "collapsible": 0, 88 | "columns": 0, 89 | "default": "0", 90 | "fetch_if_empty": 0, 91 | "fieldname": "value", 92 | "fieldtype": "Long Text", 93 | "hidden": 0, 94 | "ignore_user_permissions": 0, 95 | "ignore_xss_filter": 0, 96 | "in_filter": 0, 97 | "in_global_search": 0, 98 | "in_list_view": 0, 99 | "in_standard_filter": 0, 100 | "label": "Value", 101 | "length": 0, 102 | "no_copy": 0, 103 | "permlevel": 0, 104 | "precision": "", 105 | "print_hide": 0, 106 | "print_hide_if_no_value": 0, 107 | "read_only": 0, 108 | "remember_last_selected_value": 0, 109 | "report_hide": 0, 110 | "reqd": 0, 111 | "search_index": 0, 112 | "set_only_once": 0, 113 | "translatable": 0, 114 | "unique": 0 115 | }, 116 | { 117 | "allow_bulk_edit": 0, 118 | "allow_in_quick_entry": 0, 119 | "allow_on_submit": 0, 120 | "bold": 0, 121 | "collapsible": 0, 122 | "columns": 0, 123 | "fetch_if_empty": 0, 124 | "fieldname": "value_type", 125 | "fieldtype": "Select", 126 | "hidden": 0, 127 | "ignore_user_permissions": 0, 128 | "ignore_xss_filter": 0, 129 | "in_filter": 0, 130 | "in_global_search": 0, 131 | "in_list_view": 0, 132 | "in_standard_filter": 0, 133 | "label": "Value type", 134 | "length": 0, 135 | "no_copy": 0, 136 | "options": "Table\nPython\nRegex group\nDate", 137 | "permlevel": 0, 138 | "precision": "", 139 | "print_hide": 0, 140 | "print_hide_if_no_value": 0, 141 | "read_only": 0, 142 | "remember_last_selected_value": 0, 143 | "report_hide": 0, 144 | "reqd": 0, 145 | "search_index": 0, 146 | "set_only_once": 0, 147 | "translatable": 0, 148 | "unique": 0 149 | }, 150 | { 151 | "allow_bulk_edit": 0, 152 | "allow_in_quick_entry": 0, 153 | "allow_on_submit": 0, 154 | "bold": 0, 155 | "collapsible": 0, 156 | "columns": 0, 157 | "depends_on": "eval:doc.value_type == 'Table'", 158 | "fetch_if_empty": 0, 159 | "fieldname": "link_to_child_doc", 160 | "fieldtype": "Link", 161 | "hidden": 0, 162 | "ignore_user_permissions": 0, 163 | "ignore_xss_filter": 0, 164 | "in_filter": 0, 165 | "in_global_search": 0, 166 | "in_list_view": 0, 167 | "in_standard_filter": 0, 168 | "label": "Link to Import Mapping", 169 | "length": 0, 170 | "no_copy": 0, 171 | "options": "OCR Import", 172 | "permlevel": 0, 173 | "precision": "", 174 | "print_hide": 0, 175 | "print_hide_if_no_value": 0, 176 | "read_only": 0, 177 | "remember_last_selected_value": 0, 178 | "report_hide": 0, 179 | "reqd": 0, 180 | "search_index": 0, 181 | "set_only_once": 0, 182 | "translatable": 0, 183 | "unique": 0 184 | } 185 | ], 186 | "has_web_view": 0, 187 | "hide_heading": 0, 188 | "hide_toolbar": 0, 189 | "idx": 0, 190 | "image_view": 0, 191 | "in_create": 0, 192 | "is_submittable": 0, 193 | "issingle": 0, 194 | "istable": 1, 195 | "max_attachments": 0, 196 | "modified": "2020-02-07 06:51:55.329413", 197 | "modified_by": "Administrator", 198 | "module": "ERPNext OCR", 199 | "name": "OCR Import Mapping", 200 | "name_case": "", 201 | "owner": "Administrator", 202 | "permissions": [], 203 | "quick_entry": 1, 204 | "read_only": 0, 205 | "read_only_onload": 0, 206 | "show_name_in_global_search": 0, 207 | "sort_field": "modified", 208 | "sort_order": "DESC", 209 | "track_changes": 1, 210 | "track_seen": 0, 211 | "track_views": 0 212 | } -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_import_mapping/ocr_import_mapping.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and contributors 3 | # For license information, please see license.txt 4 | 5 | from __future__ import unicode_literals 6 | 7 | import re 8 | 9 | import frappe 10 | from frappe.model.document import Document 11 | from frappe.utils import cint 12 | 13 | 14 | class OCRImportMapping(Document): 15 | pass 16 | 17 | 18 | @frappe.whitelist() 19 | def generate_child_doctype(doctype_import_link, string_raw_table_value, table_doc): 20 | """ 21 | Generate child for some doctype. 22 | :param doctype_import_link: link to OCR Import 23 | :param string_raw_table_value: String for future child 24 | :param doctype_import_doc: 25 | :param table_doc: 26 | :return: 27 | """ 28 | ocr_import_table = frappe.get_doc("OCR Import", 29 | doctype_import_link) 30 | for table_field in ocr_import_table.mappings: 31 | found_field = find_field(table_field, string_raw_table_value) 32 | if found_field is not None: 33 | table_doc.__dict__[table_field.field] = found_field 34 | raw_date = table_doc.__dict__[table_field.field] 35 | if table_field == 'Date': 36 | table_doc.__dict__[ 37 | table_field.field] = frappe.utils.get_datetime(raw_date) 38 | 39 | table_doc.parent = ocr_import_table.name 40 | table_doc.save() 41 | 42 | return table_doc 43 | 44 | 45 | def find_field(field, read_result): 46 | """ 47 | :param field: node from mapping 48 | :param read_result: text from document 49 | :return: string with value 50 | """ 51 | pattern_result = None 52 | if field.regexp: 53 | pattern_result = re.findall(field.regexp, read_result) 54 | 55 | if field.value_type == "Python": 56 | found_field = eval(field.value) # skipcq: PYL-W0123 57 | else: 58 | found_field = pattern_result.pop(cint(field.value)) 59 | 60 | return found_field 61 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_language/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Monogramm and contributors 2 | // For license information, please see license.txt 3 | 4 | frappe.ui.form.on('OCR Language', 5 | { 6 | download: function (frm) { 7 | frappe.call({ 8 | method: "download_tesseract", 9 | doc: frm.doc, 10 | success: function (r) { 11 | cur_frm.set_value("is_supported", "Yes"); 12 | } 13 | }) 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.json: -------------------------------------------------------------------------------- 1 | { 2 | "allow_copy": 0, 3 | "allow_events_in_timeline": 0, 4 | "allow_guest_to_view": 0, 5 | "allow_import": 1, 6 | "allow_rename": 0, 7 | "autoname": "field:code", 8 | "beta": 0, 9 | "creation": "2019-08-16 22:25:38.331812", 10 | "custom": 0, 11 | "docstatus": 0, 12 | "doctype": "DocType", 13 | "document_type": "Setup", 14 | "editable_grid": 1, 15 | "engine": "InnoDB", 16 | "fields": [ 17 | { 18 | "allow_bulk_edit": 0, 19 | "allow_in_quick_entry": 0, 20 | "allow_on_submit": 0, 21 | "bold": 0, 22 | "collapsible": 0, 23 | "columns": 0, 24 | "fetch_if_empty": 0, 25 | "fieldname": "code", 26 | "fieldtype": "Data", 27 | "hidden": 0, 28 | "ignore_user_permissions": 0, 29 | "ignore_xss_filter": 0, 30 | "in_filter": 0, 31 | "in_global_search": 0, 32 | "in_list_view": 1, 33 | "in_standard_filter": 0, 34 | "label": "Code", 35 | "length": 0, 36 | "no_copy": 0, 37 | "permlevel": 0, 38 | "precision": "", 39 | "print_hide": 0, 40 | "print_hide_if_no_value": 0, 41 | "read_only": 0, 42 | "remember_last_selected_value": 0, 43 | "report_hide": 0, 44 | "reqd": 1, 45 | "search_index": 0, 46 | "set_only_once": 0, 47 | "translatable": 0, 48 | "unique": 1 49 | }, 50 | { 51 | "allow_bulk_edit": 0, 52 | "allow_in_quick_entry": 0, 53 | "allow_on_submit": 0, 54 | "bold": 0, 55 | "collapsible": 0, 56 | "columns": 0, 57 | "fetch_if_empty": 0, 58 | "fieldname": "lang", 59 | "fieldtype": "Link", 60 | "hidden": 0, 61 | "ignore_user_permissions": 0, 62 | "ignore_xss_filter": 0, 63 | "in_filter": 0, 64 | "in_global_search": 0, 65 | "in_list_view": 1, 66 | "in_standard_filter": 0, 67 | "label": "Language", 68 | "length": 0, 69 | "no_copy": 0, 70 | "options": "Language", 71 | "permlevel": 0, 72 | "precision": "", 73 | "print_hide": 0, 74 | "print_hide_if_no_value": 0, 75 | "read_only": 0, 76 | "remember_last_selected_value": 0, 77 | "report_hide": 0, 78 | "reqd": 1, 79 | "search_index": 0, 80 | "set_only_once": 0, 81 | "translatable": 0, 82 | "unique": 1 83 | }, 84 | { 85 | "allow_bulk_edit": 0, 86 | "allow_in_quick_entry": 0, 87 | "allow_on_submit": 0, 88 | "bold": 0, 89 | "collapsible": 0, 90 | "columns": 0, 91 | "fetch_if_empty": 0, 92 | "fieldname": "is_supported", 93 | "fieldtype": "Read Only", 94 | "hidden": 0, 95 | "ignore_user_permissions": 0, 96 | "ignore_xss_filter": 0, 97 | "in_filter": 0, 98 | "in_global_search": 0, 99 | "in_list_view": 0, 100 | "in_standard_filter": 0, 101 | "label": "Is supported", 102 | "length": 0, 103 | "no_copy": 0, 104 | "permlevel": 0, 105 | "precision": "", 106 | "print_hide": 0, 107 | "print_hide_if_no_value": 0, 108 | "read_only": 0, 109 | "remember_last_selected_value": 0, 110 | "report_hide": 0, 111 | "reqd": 0, 112 | "search_index": 0, 113 | "set_only_once": 0, 114 | "translatable": 0, 115 | "unique": 0 116 | }, 117 | { 118 | "allow_bulk_edit": 0, 119 | "allow_in_quick_entry": 0, 120 | "allow_on_submit": 0, 121 | "bold": 0, 122 | "collapsible": 0, 123 | "columns": 0, 124 | "default": "Default", 125 | "fetch_if_empty": 0, 126 | "fieldname": "type_of_ocr", 127 | "fieldtype": "Select", 128 | "hidden": 0, 129 | "ignore_user_permissions": 0, 130 | "ignore_xss_filter": 0, 131 | "in_filter": 0, 132 | "in_global_search": 0, 133 | "in_list_view": 0, 134 | "in_standard_filter": 0, 135 | "label": "Type of OCR", 136 | "length": 0, 137 | "no_copy": 0, 138 | "options": "Default\nFast\nBest\nCustom", 139 | "permlevel": 0, 140 | "precision": "", 141 | "print_hide": 0, 142 | "print_hide_if_no_value": 0, 143 | "read_only": 0, 144 | "remember_last_selected_value": 0, 145 | "report_hide": 0, 146 | "reqd": 0, 147 | "search_index": 0, 148 | "set_only_once": 0, 149 | "translatable": 0, 150 | "unique": 0 151 | }, 152 | { 153 | "allow_bulk_edit": 0, 154 | "allow_in_quick_entry": 0, 155 | "allow_on_submit": 0, 156 | "bold": 0, 157 | "collapsible": 0, 158 | "columns": 0, 159 | "depends_on": "eval:parent.type_of_ocr", 160 | "fetch_if_empty": 0, 161 | "fieldname": "download", 162 | "fieldtype": "Button", 163 | "hidden": 0, 164 | "ignore_user_permissions": 0, 165 | "ignore_xss_filter": 0, 166 | "in_filter": 0, 167 | "in_global_search": 0, 168 | "in_list_view": 0, 169 | "in_standard_filter": 0, 170 | "label": "Download OCR", 171 | "length": 0, 172 | "no_copy": 0, 173 | "permlevel": 0, 174 | "precision": "", 175 | "print_hide": 0, 176 | "print_hide_if_no_value": 0, 177 | "read_only": 0, 178 | "remember_last_selected_value": 0, 179 | "report_hide": 0, 180 | "reqd": 0, 181 | "search_index": 0, 182 | "set_only_once": 0, 183 | "translatable": 0, 184 | "unique": 0 185 | } 186 | ], 187 | "has_web_view": 0, 188 | "hide_heading": 0, 189 | "hide_toolbar": 0, 190 | "icon": "fa fa-globe", 191 | "idx": 0, 192 | "image_view": 0, 193 | "in_create": 0, 194 | "is_submittable": 0, 195 | "issingle": 0, 196 | "istable": 0, 197 | "max_attachments": 0, 198 | "modified": "2020-02-19 05:32:09.910691", 199 | "modified_by": "Administrator", 200 | "module": "ERPNext OCR", 201 | "name": "OCR Language", 202 | "name_case": "", 203 | "owner": "Administrator", 204 | "permissions": [ 205 | { 206 | "amend": 0, 207 | "cancel": 0, 208 | "create": 1, 209 | "delete": 1, 210 | "email": 1, 211 | "export": 1, 212 | "if_owner": 0, 213 | "import": 0, 214 | "permlevel": 0, 215 | "print": 1, 216 | "read": 1, 217 | "report": 1, 218 | "role": "System Manager", 219 | "set_user_permissions": 0, 220 | "share": 1, 221 | "submit": 0, 222 | "write": 1 223 | } 224 | ], 225 | "quick_entry": 0, 226 | "read_only": 0, 227 | "read_only_onload": 0, 228 | "search_fields": "code,lang", 229 | "show_name_in_global_search": 0, 230 | "sort_field": "modified", 231 | "sort_order": "DESC", 232 | "title_field": "code", 233 | "track_changes": 1, 234 | "track_seen": 0, 235 | "track_views": 0 236 | } -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and contributors 3 | # For license information, please see license.txt 4 | 5 | from __future__ import unicode_literals 6 | 7 | import os 8 | import requests 9 | 10 | import frappe 11 | from frappe import _ 12 | from frappe.model.document import Document 13 | 14 | import tesserocr 15 | 16 | 17 | @frappe.whitelist() 18 | def check_language(lang): 19 | """Check a language availability. Returns a user friendly text.""" 20 | return _("Yes") if lang_available(lang) else _("No") 21 | 22 | 23 | @frappe.whitelist() 24 | def lang_available(lang): 25 | """Call Tesseract OCR to verify language is available.""" 26 | list_of_languages = tesserocr.get_languages()[1] 27 | if len(lang) == 2: 28 | return frappe.get_doc("OCR Language", {"lang": lang}).code in list_of_languages 29 | 30 | return lang in list_of_languages 31 | 32 | 33 | @frappe.whitelist() 34 | def get_current_language(user): 35 | """Get Tesseract language matching current user or system settings.""" 36 | user = frappe.get_doc("User", user) 37 | language = user.language 38 | if not language: 39 | settings = frappe.get_doc("System Settings") 40 | language = settings.language 41 | 42 | lang_code = frappe.get_doc("OCR Language", {"lang": language}).name 43 | return lang_code if lang_code is not None else "eng" 44 | 45 | 46 | class OCRLanguage(Document): 47 | def __init__(self, *args, **kwargs): 48 | super(OCRLanguage, self).__init__(*args, **kwargs) 49 | self.TESSDATA_LINK = "https://github.com/tesseract-ocr/tessdata{}/blob/master/{}.traineddata?raw=true" 50 | if self.code: 51 | self.is_supported = check_language(self.code) 52 | 53 | def download_tesseract(self): 54 | if self.type_of_ocr == 'Default': 55 | path = self.TESSDATA_LINK.format("", self.name) 56 | else: 57 | path = self.TESSDATA_LINK.format( 58 | "_" + self.type_of_ocr.lower(), self.name) 59 | 60 | res = requests.get(path) 61 | dest = os.getenv("TESSDATA_PREFIX", "/usr/share/tesseract-ocr/tessdata/") + \ 62 | "/" + self.name + ".traineddata" 63 | 64 | if self.type_of_ocr == 'Custom': 65 | frappe.throw( 66 | _("Download is not available for custom OCR data.")) 67 | with open(dest, "wb") as file: 68 | file.write(res.content) 69 | 70 | if os.path.exists(dest): 71 | self.is_supported = check_language(self.code) 72 | self.save() 73 | else: 74 | frappe.throw(_("File could not be downloaded.")) 75 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_language/test_ocr_language.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | // rename this file from _test_[name] to test_[name] to activate 3 | // and remove above this line 4 | 5 | QUnit.test("test: OCR Language", function (assert) { 6 | let done = assert.async(); 7 | let random_code = frappe.utils.get_random(3); 8 | let random_lang = frappe.utils.get_random(2); 9 | 10 | // number of asserts 11 | assert.expect(1); 12 | 13 | frappe.run_serially([ 14 | // insert a new OCR Language 15 | () => frappe.tests.make('OCR Language', [ 16 | // values to be set 17 | {code: random_code, lang: random_lang} 18 | ]), 19 | () => { 20 | assert.equal(cur_frm.doc.code, random_code); 21 | assert.equal(cur_frm.doc.lang, random_lang); 22 | }, 23 | () => done() 24 | ]); 25 | 26 | }); 27 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_language/test_ocr_language.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # For license information, please see license.txt 4 | 5 | from __future__ import unicode_literals 6 | 7 | import frappe 8 | import unittest 9 | 10 | from erpnext_ocr.erpnext_ocr.doctype.ocr_language.ocr_language import lang_available, check_language, \ 11 | get_current_language 12 | 13 | 14 | def create_test_data(): 15 | # Create test user 16 | if not frappe.db.exists("User", "test_user_ocr@example.com"): 17 | test_user = frappe.new_doc("User") 18 | test_user.name = 'test_user_ocr' 19 | test_user.first_name = 'test_user_ocr' 20 | test_user.email = 'test_user_ocr@example.com' 21 | test_user.language = "en" 22 | test_user.insert(ignore_permissions=True) 23 | 24 | if not frappe.db.exists("User", "test_admin_ocr@example.com"): 25 | test_user = frappe.new_doc("User") 26 | test_user.name = 'test_admin_ocr' 27 | test_user.first_name = 'test_admin_ocr' 28 | test_user.email = 'test_admin_ocr@example.com' 29 | test_user.insert(ignore_permissions=True) 30 | 31 | if not frappe.db.exists("OCR Language", "sin"): 32 | frappe.get_doc({ 33 | "doctype": "OCR Language", 34 | "code": "sin", 35 | "lang": "si" 36 | }).insert() 37 | 38 | if not frappe.db.exists("OCR Language", "uzb"): 39 | frappe.get_doc({ 40 | "doctype": "OCR Language", 41 | "code": "uzb", 42 | "lang": "uz" 43 | }).insert() 44 | 45 | if not frappe.db.exists("OCR Language", "ukr"): 46 | frappe.get_doc({ 47 | "doctype": "OCR Language", 48 | "code": "ukr", 49 | "lang": "uk" 50 | }).insert() 51 | 52 | frappe.flags.test_ocr_language_created = True 53 | 54 | 55 | def delete_test_data(): 56 | if frappe.db.exists("User", "test_user_ocr@example.com"): 57 | frappe.db.sql("""delete from `tabUser` where email='test_user_ocr@example.com'""") # ValidationError without SQL 58 | frappe.db.sql("""delete from `tabEmail Queue`""") 59 | 60 | if frappe.db.exists("User", "test_admin_ocr@example.com"): 61 | frappe.db.sql("""delete from `tabUser` where email='test_admin_ocr@example.com'""") # ValidationError without SQL 62 | frappe.db.sql("""delete from `tabEmail Queue`""") 63 | 64 | if frappe.flags.test_ocr_language_created: 65 | frappe.get_doc("OCR Language", "sin").delete() 66 | #frappe.db.sql("""delete from `tabOCR Language` where code='sin'""") 67 | 68 | if frappe.flags.test_ocr_language_created: 69 | frappe.get_doc("OCR Language", "uzb").delete() 70 | #frappe.db.sql("""delete from `tabOCR Language` where code='uzb'""") 71 | 72 | if frappe.flags.test_ocr_language_created: 73 | frappe.get_doc("OCR Language", "ukr").delete() 74 | #frappe.db.sql("""delete from `tabOCR Language` where code='sin_custom'""") 75 | 76 | frappe.flags.test_ocr_language_created = False 77 | 78 | class TestOCRLanguage(unittest.TestCase): 79 | def setUp(self): 80 | frappe.set_user("Administrator") 81 | create_test_data() 82 | 83 | def tearDown(self): 84 | frappe.set_user("Administrator") 85 | delete_test_data() 86 | 87 | def test_en_lang_available(self): 88 | self.assertTrue(lang_available("en")) 89 | 90 | def test_eng_lang_available(self): 91 | self.assertTrue(lang_available("eng")) 92 | 93 | def test_osd_lang_available(self): 94 | self.assertTrue(lang_available("osd")) 95 | 96 | def test_equ_lang_available(self): 97 | self.assertTrue(lang_available("equ")) 98 | 99 | def test_666_lang_available(self): 100 | self.assertFalse(lang_available("666")) 101 | 102 | def test_en_check_language(self): 103 | self.assertEqual(check_language("en"), frappe._("Yes")) 104 | 105 | def test_eng_check_language(self): 106 | self.assertEqual(check_language("eng"), frappe._("Yes")) 107 | 108 | def test_osd_check_language(self): 109 | self.assertEqual(check_language("osd"), frappe._("Yes")) 110 | 111 | def test_equ_check_language(self): 112 | self.assertEqual(check_language("equ"), frappe._("Yes")) 113 | 114 | def test_666_check_language(self): 115 | self.assertEqual(check_language("666"), frappe._("No")) 116 | 117 | def test_get_current_language(self): 118 | self.assertEqual("eng", get_current_language("test_user_ocr@example.com")) 119 | 120 | def test_get_current_language_admin(self): 121 | self.assertEqual("eng", get_current_language("test_admin_ocr@example.com")) 122 | 123 | def test_download_tesseract_default(self): 124 | sin_lang = frappe.get_doc("OCR Language", "sin") 125 | if sin_lang.is_supported == 'No': 126 | sin_lang.type_of_ocr = "Default" 127 | sin_lang.download_tesseract() 128 | self.assertEqual(sin_lang.is_supported, "Yes") 129 | 130 | def test_download_tesseract_best(self): 131 | uzb_lang = frappe.get_doc("OCR Language", "uzb") 132 | if uzb_lang.is_supported == 'No': 133 | uzb_lang.type_of_ocr = "Best" 134 | uzb_lang.download_tesseract() 135 | self.assertEqual(uzb_lang.is_supported, "Yes") 136 | 137 | def test_download_tesseract_custom(self): 138 | ukr_lang = frappe.get_doc("OCR Language", "ukr") 139 | if ukr_lang.is_supported == 'No': 140 | ukr_lang.type_of_ocr = "Custom" 141 | self.assertRaises(frappe.ValidationError, ukr_lang.download_tesseract) 142 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_read/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.js: -------------------------------------------------------------------------------- 1 | frappe.ui.form.on('OCR Read', { 2 | setup: function (frm) { 3 | frappe.call({ 4 | method: "erpnext_ocr.erpnext_ocr.doctype.ocr_language.ocr_language.get_current_language", 5 | args: { 6 | 'user': frappe.user['name'] 7 | }, 8 | callback: function (r) { 9 | cur_frm.set_value("language", r.message); 10 | } 11 | }) 12 | }, 13 | read_image: function (frm) { 14 | frappe.hide_msgprint(true); 15 | frappe.realtime.on("ocr_progress_bar", function (data) { 16 | frappe.hide_msgprint(true); 17 | frappe.show_progress(__("Reading the file"), data.progress[0], data.progress[1]); 18 | }); 19 | frappe.call({ 20 | method: "read_image", 21 | doc: cur_frm.doc, 22 | args: { 23 | "spell_checker": frm.doc.spell_checker 24 | }, 25 | callback: function (r) { 26 | cur_dialog.hide(); 27 | frappe.msgprint(r.message.message); 28 | cur_frm.refresh(); 29 | } 30 | }); 31 | }, 32 | import: function (frm) { 33 | if (typeof frm.doc.ocr_import != "undefined" && frm.doc.ocr_import !== '') { 34 | frappe.call({ 35 | method: "erpnext_ocr.erpnext_ocr.doctype.ocr_import.ocr_import.generate_doctype", 36 | args: { 37 | "doctype_import_link": frm.doc.ocr_import, 38 | "read_result": frm.doc.read_result 39 | }, 40 | callback: function (r) { 41 | console.log(r.message); 42 | frappe.show_alert({ 43 | message: __('Doctype {0} generated', 44 | ['' + r.message.name + '']), 45 | indicator: 'green' 46 | }); 47 | } 48 | }) 49 | } 50 | else { 51 | frappe.throw("Field Template is None"); 52 | } 53 | } 54 | }); 55 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.json: -------------------------------------------------------------------------------- 1 | { 2 | "allow_copy": 0, 3 | "allow_events_in_timeline": 0, 4 | "allow_guest_to_view": 0, 5 | "allow_import": 0, 6 | "allow_rename": 0, 7 | "autoname": "", 8 | "beta": 0, 9 | "creation": "2018-01-13 18:30:17.344511", 10 | "custom": 0, 11 | "docstatus": 0, 12 | "doctype": "DocType", 13 | "document_type": "Document", 14 | "editable_grid": 1, 15 | "engine": "InnoDB", 16 | "fields": [ 17 | { 18 | "allow_bulk_edit": 0, 19 | "allow_in_quick_entry": 0, 20 | "allow_on_submit": 0, 21 | "bold": 0, 22 | "collapsible": 0, 23 | "columns": 0, 24 | "fetch_if_empty": 0, 25 | "fieldname": "language", 26 | "fieldtype": "Link", 27 | "hidden": 0, 28 | "ignore_user_permissions": 0, 29 | "ignore_xss_filter": 0, 30 | "in_filter": 1, 31 | "in_global_search": 0, 32 | "in_list_view": 1, 33 | "in_standard_filter": 0, 34 | "label": "Language", 35 | "length": 0, 36 | "no_copy": 0, 37 | "options": "OCR Language", 38 | "permlevel": 0, 39 | "precision": "", 40 | "print_hide": 0, 41 | "print_hide_if_no_value": 0, 42 | "read_only": 0, 43 | "remember_last_selected_value": 0, 44 | "report_hide": 0, 45 | "reqd": 1, 46 | "search_index": 0, 47 | "set_only_once": 0, 48 | "translatable": 0, 49 | "unique": 0 50 | }, 51 | { 52 | "allow_bulk_edit": 0, 53 | "allow_in_quick_entry": 0, 54 | "allow_on_submit": 0, 55 | "bold": 0, 56 | "collapsible": 0, 57 | "columns": 0, 58 | "depends_on": "", 59 | "fetch_if_empty": 0, 60 | "fieldname": "file_to_read", 61 | "fieldtype": "Attach", 62 | "hidden": 0, 63 | "ignore_user_permissions": 0, 64 | "ignore_xss_filter": 0, 65 | "in_filter": 0, 66 | "in_global_search": 0, 67 | "in_list_view": 1, 68 | "in_standard_filter": 0, 69 | "label": "Image or PDF to Read", 70 | "length": 0, 71 | "no_copy": 0, 72 | "permlevel": 0, 73 | "precision": "", 74 | "print_hide": 0, 75 | "print_hide_if_no_value": 0, 76 | "read_only": 0, 77 | "remember_last_selected_value": 0, 78 | "report_hide": 0, 79 | "reqd": 0, 80 | "search_index": 0, 81 | "set_only_once": 0, 82 | "translatable": 0, 83 | "unique": 0 84 | }, 85 | { 86 | "allow_bulk_edit": 0, 87 | "allow_in_quick_entry": 0, 88 | "allow_on_submit": 0, 89 | "bold": 0, 90 | "collapsible": 0, 91 | "columns": 0, 92 | "fetch_if_empty": 0, 93 | "fieldname": "read_section_break", 94 | "fieldtype": "Section Break", 95 | "hidden": 0, 96 | "ignore_user_permissions": 0, 97 | "ignore_xss_filter": 0, 98 | "in_filter": 0, 99 | "in_global_search": 0, 100 | "in_list_view": 0, 101 | "in_standard_filter": 0, 102 | "length": 0, 103 | "no_copy": 0, 104 | "permlevel": 0, 105 | "precision": "", 106 | "print_hide": 0, 107 | "print_hide_if_no_value": 0, 108 | "read_only": 0, 109 | "remember_last_selected_value": 0, 110 | "report_hide": 0, 111 | "reqd": 0, 112 | "search_index": 0, 113 | "set_only_once": 0, 114 | "translatable": 0, 115 | "unique": 0 116 | }, 117 | { 118 | "allow_bulk_edit": 0, 119 | "allow_in_quick_entry": 0, 120 | "allow_on_submit": 0, 121 | "bold": 0, 122 | "collapsible": 0, 123 | "columns": 0, 124 | "depends_on": "eval:parent.file_to_read", 125 | "fetch_if_empty": 0, 126 | "fieldname": "read_image", 127 | "fieldtype": "Button", 128 | "hidden": 0, 129 | "ignore_user_permissions": 0, 130 | "ignore_xss_filter": 0, 131 | "in_filter": 0, 132 | "in_global_search": 0, 133 | "in_list_view": 0, 134 | "in_standard_filter": 0, 135 | "label": "Read file", 136 | "length": 0, 137 | "no_copy": 0, 138 | "permlevel": 0, 139 | "precision": "", 140 | "print_hide": 0, 141 | "print_hide_if_no_value": 0, 142 | "read_only": 0, 143 | "remember_last_selected_value": 0, 144 | "report_hide": 0, 145 | "reqd": 0, 146 | "search_index": 0, 147 | "set_only_once": 0, 148 | "translatable": 0, 149 | "unique": 0 150 | }, 151 | { 152 | "allow_bulk_edit": 0, 153 | "allow_in_quick_entry": 0, 154 | "allow_on_submit": 0, 155 | "bold": 0, 156 | "collapsible": 0, 157 | "columns": 0, 158 | "depends_on": "eval:parent.file_to_read", 159 | "fetch_if_empty": 0, 160 | "fieldname": "spell_checker", 161 | "fieldtype": "Check", 162 | "hidden": 0, 163 | "ignore_user_permissions": 0, 164 | "ignore_xss_filter": 0, 165 | "in_filter": 0, 166 | "in_global_search": 0, 167 | "in_list_view": 0, 168 | "in_standard_filter": 0, 169 | "label": "Enable Spell Checker", 170 | "length": 0, 171 | "no_copy": 0, 172 | "permlevel": 0, 173 | "precision": "", 174 | "print_hide": 0, 175 | "print_hide_if_no_value": 0, 176 | "read_only": 0, 177 | "remember_last_selected_value": 0, 178 | "report_hide": 0, 179 | "reqd": 0, 180 | "search_index": 0, 181 | "set_only_once": 0, 182 | "translatable": 0, 183 | "unique": 0 184 | }, 185 | { 186 | "allow_bulk_edit": 0, 187 | "allow_in_quick_entry": 0, 188 | "allow_on_submit": 0, 189 | "bold": 0, 190 | "collapsible": 0, 191 | "columns": 0, 192 | "depends_on": "eval:parent.file_to_read", 193 | "fetch_if_empty": 0, 194 | "fieldname": "result_section_break", 195 | "fieldtype": "Section Break", 196 | "hidden": 0, 197 | "ignore_user_permissions": 0, 198 | "ignore_xss_filter": 0, 199 | "in_filter": 0, 200 | "in_global_search": 0, 201 | "in_list_view": 0, 202 | "in_standard_filter": 0, 203 | "length": 0, 204 | "no_copy": 0, 205 | "permlevel": 0, 206 | "precision": "", 207 | "print_hide": 0, 208 | "print_hide_if_no_value": 0, 209 | "read_only": 0, 210 | "remember_last_selected_value": 0, 211 | "report_hide": 0, 212 | "reqd": 0, 213 | "search_index": 0, 214 | "set_only_once": 0, 215 | "translatable": 0, 216 | "unique": 0 217 | }, 218 | { 219 | "allow_bulk_edit": 0, 220 | "allow_in_quick_entry": 0, 221 | "allow_on_submit": 0, 222 | "bold": 0, 223 | "collapsible": 0, 224 | "columns": 0, 225 | "fetch_if_empty": 0, 226 | "fieldname": "read_result", 227 | "fieldtype": "Long Text", 228 | "hidden": 0, 229 | "ignore_user_permissions": 0, 230 | "ignore_xss_filter": 0, 231 | "in_filter": 0, 232 | "in_global_search": 0, 233 | "in_list_view": 0, 234 | "in_standard_filter": 0, 235 | "label": "Read Result", 236 | "length": 0, 237 | "no_copy": 0, 238 | "permlevel": 0, 239 | "precision": "", 240 | "print_hide": 0, 241 | "print_hide_if_no_value": 0, 242 | "read_only": 0, 243 | "remember_last_selected_value": 0, 244 | "report_hide": 0, 245 | "reqd": 0, 246 | "search_index": 0, 247 | "set_only_once": 0, 248 | "translatable": 0, 249 | "unique": 0 250 | }, 251 | { 252 | "allow_bulk_edit": 0, 253 | "allow_in_quick_entry": 0, 254 | "allow_on_submit": 0, 255 | "bold": 0, 256 | "collapsible": 0, 257 | "columns": 0, 258 | "depends_on": "eval:parent.read_result", 259 | "fetch_if_empty": 0, 260 | "fieldname": "read_time", 261 | "fieldtype": "Read Only", 262 | "hidden": 0, 263 | "ignore_user_permissions": 0, 264 | "ignore_xss_filter": 0, 265 | "in_filter": 0, 266 | "in_global_search": 0, 267 | "in_list_view": 0, 268 | "in_standard_filter": 0, 269 | "label": "Read Time", 270 | "length": 0, 271 | "no_copy": 0, 272 | "permlevel": 0, 273 | "precision": "", 274 | "print_hide": 0, 275 | "print_hide_if_no_value": 0, 276 | "read_only": 1, 277 | "remember_last_selected_value": 0, 278 | "report_hide": 0, 279 | "reqd": 0, 280 | "search_index": 0, 281 | "set_only_once": 0, 282 | "translatable": 0, 283 | "unique": 0 284 | }, 285 | { 286 | "allow_bulk_edit": 0, 287 | "allow_in_quick_entry": 0, 288 | "allow_on_submit": 0, 289 | "bold": 0, 290 | "collapsible": 0, 291 | "columns": 0, 292 | "depends_on": "eval:parent.read_result", 293 | "fetch_if_empty": 0, 294 | "fieldname": "ocr_import", 295 | "fieldtype": "Link", 296 | "hidden": 0, 297 | "ignore_user_permissions": 0, 298 | "ignore_xss_filter": 0, 299 | "in_filter": 0, 300 | "in_global_search": 0, 301 | "in_list_view": 0, 302 | "in_standard_filter": 0, 303 | "label": "Template", 304 | "length": 0, 305 | "no_copy": 0, 306 | "options": "OCR Import", 307 | "permlevel": 0, 308 | "precision": "", 309 | "print_hide": 0, 310 | "print_hide_if_no_value": 0, 311 | "read_only": 0, 312 | "remember_last_selected_value": 0, 313 | "report_hide": 0, 314 | "reqd": 0, 315 | "search_index": 0, 316 | "set_only_once": 0, 317 | "translatable": 0, 318 | "unique": 0 319 | }, 320 | { 321 | "allow_bulk_edit": 0, 322 | "allow_in_quick_entry": 0, 323 | "allow_on_submit": 0, 324 | "bold": 0, 325 | "collapsible": 0, 326 | "columns": 0, 327 | "depends_on": "eval:parent.read_result", 328 | "fetch_if_empty": 0, 329 | "fieldname": "import", 330 | "fieldtype": "Button", 331 | "hidden": 0, 332 | "ignore_user_permissions": 0, 333 | "ignore_xss_filter": 0, 334 | "in_filter": 0, 335 | "in_global_search": 0, 336 | "in_list_view": 0, 337 | "in_standard_filter": 0, 338 | "label": "Generate", 339 | "length": 0, 340 | "no_copy": 0, 341 | "permlevel": 0, 342 | "precision": "", 343 | "print_hide": 0, 344 | "print_hide_if_no_value": 0, 345 | "read_only": 0, 346 | "remember_last_selected_value": 0, 347 | "report_hide": 0, 348 | "reqd": 0, 349 | "search_index": 0, 350 | "set_only_once": 0, 351 | "translatable": 0, 352 | "unique": 0 353 | } 354 | ], 355 | "has_web_view": 0, 356 | "hide_heading": 0, 357 | "hide_toolbar": 0, 358 | "icon": "fa fa-eye", 359 | "idx": 0, 360 | "image_view": 0, 361 | "in_create": 0, 362 | "is_submittable": 0, 363 | "issingle": 0, 364 | "istable": 0, 365 | "max_attachments": 0, 366 | "modified": "2020-01-26 17:18:30.367446", 367 | "modified_by": "Administrator", 368 | "module": "ERPNext OCR", 369 | "name": "OCR Read", 370 | "name_case": "", 371 | "owner": "Administrator", 372 | "permissions": [ 373 | { 374 | "amend": 0, 375 | "cancel": 0, 376 | "create": 1, 377 | "delete": 1, 378 | "email": 1, 379 | "export": 1, 380 | "if_owner": 1, 381 | "import": 0, 382 | "permlevel": 0, 383 | "print": 1, 384 | "read": 1, 385 | "report": 1, 386 | "role": "All", 387 | "set_user_permissions": 0, 388 | "share": 1, 389 | "submit": 0, 390 | "write": 1 391 | }, 392 | { 393 | "amend": 0, 394 | "cancel": 0, 395 | "create": 1, 396 | "delete": 1, 397 | "email": 1, 398 | "export": 1, 399 | "if_owner": 0, 400 | "import": 0, 401 | "permlevel": 0, 402 | "print": 1, 403 | "read": 1, 404 | "report": 1, 405 | "role": "System Manager", 406 | "set_user_permissions": 0, 407 | "share": 1, 408 | "submit": 0, 409 | "write": 1 410 | }, 411 | { 412 | "amend": 0, 413 | "cancel": 0, 414 | "create": 1, 415 | "delete": 1, 416 | "email": 1, 417 | "export": 1, 418 | "if_owner": 0, 419 | "import": 0, 420 | "permlevel": 0, 421 | "print": 1, 422 | "read": 1, 423 | "report": 1, 424 | "role": "ERPNext OCR Manager", 425 | "set_user_permissions": 0, 426 | "share": 1, 427 | "submit": 0, 428 | "write": 1 429 | } 430 | ], 431 | "quick_entry": 0, 432 | "read_only": 0, 433 | "read_only_onload": 0, 434 | "search_fields": "file_to_read,language", 435 | "show_name_in_global_search": 0, 436 | "sort_field": "modified", 437 | "sort_order": "DESC", 438 | "title_field": "file_to_read", 439 | "track_changes": 0, 440 | "track_seen": 0, 441 | "track_views": 0 442 | } -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2018, John Vincent Fiel and contributors 3 | # Copyright (c) 2021, Monogramm and Contributors 4 | # For license information, please see license.txt 5 | 6 | from __future__ import unicode_literals 7 | 8 | import io 9 | import os 10 | import re 11 | import time 12 | 13 | 14 | from spellchecker import SpellChecker 15 | 16 | import frappe 17 | from frappe.model.document import Document 18 | 19 | from erpnext_ocr.erpnext_ocr.doctype.ocr_language.ocr_language import lang_available 20 | 21 | 22 | def get_words_from_text(message): 23 | """ 24 | This function return only list of words from text. Example: Cat in gloves, 25 | catches: no mice ->[cat, in, gloves, catches, no, mice] 26 | """ 27 | message = re.sub(r'\W+', " ", message) 28 | word_list = list(filter(None, message.split())) 29 | return word_list 30 | 31 | 32 | def get_spellchecked_text(message, language): 33 | """ 34 | :param message: return text with correction: 35 | Example: Cet in glaves cetches no mice -> Cat in gloves catches no mice 36 | """ 37 | lang = frappe.get_doc("OCR Language", language).lang 38 | spell_checker = SpellChecker(lang) 39 | only_words = get_words_from_text(message) 40 | misspelled = spell_checker.unknown(only_words) 41 | for word in misspelled: 42 | corrected_word = spell_checker.correction(word) 43 | message = message.replace(word, corrected_word) 44 | return message 45 | 46 | 47 | class OCRRead(Document): 48 | def __init__(self, *args, **kwargs): 49 | self.read_result = None 50 | self.read_time = None 51 | super(OCRRead, self).__init__(*args, **kwargs) 52 | 53 | def read_image(self): 54 | return read_ocr(self) 55 | 56 | def read_image_bg(self, is_async=True, now=False): 57 | return frappe.enqueue("erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read.read_ocr", 58 | queue="long", timeout=1500, is_async=is_async, 59 | now=now, **{'obj': self}) 60 | 61 | 62 | @frappe.whitelist() 63 | def read_ocr(obj): 64 | """Call Tesseract OCR to extract the text from a OCR Read object.""" 65 | 66 | if obj is None: 67 | frappe.msgprint(frappe._("OCR read requires OCR Read doctype."), 68 | raise_exception=True) 69 | 70 | start_time = time.time() 71 | text = read_document( 72 | obj.file_to_read, obj.language or 'eng', obj.spell_checker) 73 | delta_time = time.time() - start_time 74 | 75 | obj.read_time = str(delta_time) 76 | obj.read_result = text 77 | obj.save() 78 | 79 | return text 80 | 81 | 82 | @frappe.whitelist() 83 | def read_document(path, lang='eng', spellcheck=False, event="ocr_progress_bar"): 84 | """Call Tesseract OCR to extract the text from a document.""" 85 | from PIL import Image 86 | import requests 87 | import tesserocr 88 | 89 | if path is None: 90 | return None 91 | 92 | if not lang_available(lang): 93 | frappe.msgprint(frappe._ 94 | ("The selected language is not available. Please contact your administrator."), 95 | raise_exception=True) 96 | 97 | frappe.publish_realtime(event, {"progress": "0"}, user=frappe.session.user) 98 | 99 | if path.startswith('/assets/'): 100 | # from public folder 101 | fullpath = os.path.abspath(path) 102 | elif path.startswith('/files/'): 103 | # public file 104 | fullpath = frappe.get_site_path() + '/public' + path 105 | elif path.startswith('/private/files/'): 106 | # private file 107 | fullpath = frappe.get_site_path() + path 108 | elif path.startswith('/'): 109 | # local file (mostly for tests) 110 | fullpath = os.path.abspath(path) 111 | else: 112 | # external link 113 | fullpath = requests.get(path, stream=True).raw 114 | 115 | ocr = frappe.get_doc("OCR Settings") 116 | 117 | text = " " 118 | with tesserocr.PyTessBaseAPI(lang=lang) as api: 119 | 120 | if path.endswith('.pdf'): 121 | from wand.image import Image as wi 122 | 123 | # https://stackoverflow.com/questions/43072050/pyocr-with-tesseract-runs-out-of-memory 124 | with wi(filename=fullpath, resolution=ocr.pdf_resolution) as pdf: 125 | pdf_image = pdf.convert('jpeg') 126 | i = 0 127 | size = len(pdf_image.sequence) * 3 128 | 129 | for img in pdf_image.sequence: 130 | with wi(image=img) as img_page: 131 | image_blob = img_page.make_blob('jpeg') 132 | frappe.publish_realtime( 133 | event, {"progress": [i, size]}, user=frappe.session.user) 134 | i += 1 135 | 136 | recognized_text = " " 137 | 138 | image = Image.open(io.BytesIO(image_blob)) 139 | api.SetImage(image) 140 | frappe.publish_realtime( 141 | event, {"progress": [i, size]}, user=frappe.session.user) 142 | i += 1 143 | 144 | recognized_text = api.GetUTF8Text() 145 | text = text + recognized_text 146 | frappe.publish_realtime( 147 | event, {"progress": [i, size]}, user=frappe.session.user) 148 | i += 1 149 | 150 | else: 151 | image = Image.open(fullpath) 152 | api.SetImage(image) 153 | frappe.publish_realtime( 154 | event, {"progress": [33, 100]}, user=frappe.session.user) 155 | 156 | text = api.GetUTF8Text() 157 | frappe.publish_realtime( 158 | event, {"progress": [66, 100]}, user=frappe.session.user) 159 | 160 | if spellcheck: 161 | text = get_spellchecked_text(text, lang) 162 | 163 | frappe.publish_realtime( 164 | event, {"progress": [100, 100]}, user=frappe.session.user) 165 | 166 | return text 167 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | // rename this file from _test_[name] to test_[name] to activate 3 | // and remove above this line 4 | 5 | QUnit.test("test: OCR Read", function (assert) { 6 | let done = assert.async(); 7 | 8 | // number of asserts 9 | assert.expect(1); 10 | 11 | frappe.run_serially([ 12 | // insert a new OCR Read 13 | () => frappe.tests.make('OCR Read', [ 14 | // values to be set 15 | {language: 'en'} 16 | ]), 17 | () => { 18 | assert.equal(cur_frm.doc.language, 'en'); 19 | }, 20 | () => done() 21 | ]); 22 | 23 | }); 24 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_read/test_ocr_read.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2018, John Vincent Fiel and Contributors 3 | # Copyright (c) 2021, Monogramm and Contributors 4 | # For license information, please see license.txt 5 | 6 | from __future__ import unicode_literals 7 | 8 | 9 | import frappe 10 | import unittest 11 | import os 12 | 13 | from erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read import read_ocr 14 | 15 | # Frappe default test records creation 16 | # def _make_test_records(verbose): 17 | # from frappe.test_runner import make_test_objects 18 | # 19 | # docs = [ 20 | # # [file_to_read, language] 21 | # [os.path.join(os.path.dirname(__file__), 22 | # os.path.pardir, os.path.pardir, os.path.pardir, 23 | # "tests", "test_data", "sample1.jpg"), "eng"], 24 | # [os.path.join(os.path.dirname(__file__), 25 | # os.path.pardir, os.path.pardir, os.path.pardir, 26 | # "tests", "test_data", "Picture_010.png"), "eng"], 27 | # [os.path.join(os.path.dirname(__file__), 28 | # os.path.pardir, os.path.pardir, os.path.pardir, 29 | # "tests", "test_data", "sample2.pdf"), "eng"], 30 | # ] 31 | # 32 | # test_objects = make_test_objects("OCR Read", [{ 33 | # "doctype": "OCR Read", 34 | # "file_to_read": file_to_read, 35 | # "language": language 36 | # } for file_to_read, language in docs]) 37 | # 38 | # return test_objects 39 | 40 | 41 | def create_ocr_reads(): 42 | if frappe.flags.test_ocr_reads_created: 43 | return 44 | 45 | frappe.set_user("Administrator") 46 | frappe.get_doc({ 47 | "doctype": "OCR Read", 48 | "file_to_read": os.path.join(os.path.dirname(__file__), 49 | os.path.pardir, os.path.pardir, os.path.pardir, 50 | "tests", "test_data", "sample1.jpg"), 51 | "language": "eng" 52 | }).insert() 53 | 54 | frappe.get_doc({ 55 | "doctype": "OCR Read", 56 | "file_to_read": os.path.join(os.path.dirname(__file__), 57 | os.path.pardir, os.path.pardir, os.path.pardir, 58 | "tests", "test_data", "Picture_010.png"), 59 | "language": "eng" 60 | }).insert() 61 | 62 | frappe.get_doc({ 63 | "doctype": "OCR Read", 64 | "file_to_read": os.path.join(os.path.dirname(__file__), 65 | os.path.pardir, os.path.pardir, os.path.pardir, 66 | "tests", "test_data", "sample2.pdf"), 67 | "language": "eng" 68 | }).insert() 69 | 70 | frappe.flags.test_ocr_reads_created = True 71 | 72 | 73 | def delete_ocr_reads(): 74 | if frappe.flags.test_ocr_reads_created: 75 | frappe.set_user("Administrator") 76 | 77 | for d in frappe.get_all("OCR Read"): 78 | doc = frappe.get_doc("OCR Read", d.name) 79 | doc.delete() 80 | 81 | # Delete directly in DB to avoid validation errors 82 | # frappe.db.sql("""delete from `tabOCR Read`""") 83 | 84 | frappe.flags.test_ocr_reads_created = False 85 | 86 | 87 | class TestOCRRead(unittest.TestCase): 88 | def setUp(self): 89 | create_ocr_reads() 90 | 91 | def tearDown(self): 92 | delete_ocr_reads() 93 | 94 | 95 | def test_ocr_read_image_bg(self): 96 | frappe.set_user("Administrator") 97 | doc = frappe.get_doc({ 98 | "doctype": "OCR Read", 99 | "file_to_read": os.path.join(os.path.dirname(__file__), 100 | os.path.pardir, os.path.pardir, os.path.pardir, 101 | "tests", "test_data", "sample1.jpg"), 102 | "language": "eng" 103 | }) 104 | 105 | self.assertEqual(None, doc.read_result) 106 | 107 | doc.read_image_bg(is_async=False, now=True) 108 | 109 | # Wait worker completion before moving on in the tests 110 | 111 | # Check worker completion and get "new" document after update by bg job 112 | new_doc = frappe.get_doc("OCR Read", 113 | {"file_to_read": os.path.join(os.path.dirname(__file__), 114 | os.path.pardir, os.path.pardir, os.path.pardir, 115 | "tests", "test_data", "sample1.jpg"), 116 | "language": "eng"}) 117 | 118 | self.assertEqual(new_doc.read_result, doc.read_result) 119 | self.assertIn("The quick brown fox", new_doc.read_result) 120 | self.assertIn("jumped over the 5", new_doc.read_result) 121 | self.assertIn("lazy dogs!", new_doc.read_result) 122 | self.assertNotIn("And an elephant!", new_doc.read_result) 123 | 124 | def test_ocr_read_image_bg_pdf(self): 125 | frappe.set_user("Administrator") 126 | doc = frappe.get_doc({ 127 | "doctype": "OCR Read", 128 | "file_to_read": os.path.join(os.path.dirname(__file__), 129 | os.path.pardir, os.path.pardir, os.path.pardir, 130 | "tests", "test_data", "sample2.pdf"), 131 | "language": "eng" 132 | }) 133 | 134 | self.assertEqual(None, doc.read_result) 135 | 136 | doc.read_image_bg(is_async=False, now=True) 137 | 138 | new_doc = frappe.get_doc("OCR Read", { 139 | "file_to_read": os.path.join(os.path.dirname(__file__), 140 | os.path.pardir, os.path.pardir, os.path.pardir, 141 | "tests", "test_data", "sample2.pdf"), 142 | "language": "eng"}) 143 | 144 | self.assertEqual(new_doc.read_result, doc.read_result) 145 | self.assertIn("Python Basics", new_doc.read_result) 146 | self.assertNotIn("Java", new_doc.read_result) 147 | 148 | 149 | def test_ocr_read_image(self): 150 | frappe.set_user("Administrator") 151 | doc = frappe.get_doc({ 152 | "doctype": "OCR Read", 153 | "file_to_read": os.path.join(os.path.dirname(__file__), 154 | os.path.pardir, os.path.pardir, os.path.pardir, 155 | "tests", "test_data", "sample1.jpg"), 156 | "language": "eng" 157 | }) 158 | 159 | recognized_text = doc.read_image() 160 | self.assertEqual(recognized_text, doc.read_result) 161 | 162 | self.assertIn("The quick brown fox", recognized_text) 163 | self.assertIn("jumped over the 5", recognized_text) 164 | self.assertIn("lazy dogs!", recognized_text) 165 | self.assertNotIn("And an elephant!", recognized_text) 166 | 167 | def test_ocr_read_pdf(self): 168 | frappe.set_user("Administrator") 169 | doc = frappe.get_doc({ 170 | "doctype": "OCR Read", 171 | "file_to_read": os.path.join(os.path.dirname(__file__), 172 | os.path.pardir, os.path.pardir, os.path.pardir, 173 | "tests", "test_data", "sample2.pdf"), 174 | "language": "eng" 175 | }) 176 | 177 | recognized_text = doc.read_image() 178 | 179 | self.maxDiff = None 180 | self.assertEqual(recognized_text, doc.read_result) 181 | 182 | self.assertIn("Python Basics", recognized_text) 183 | self.assertNotIn("Java", recognized_text) 184 | 185 | def test_ocr_read_list(self): 186 | # frappe.set_user("test1@example.com") 187 | frappe.set_user("Administrator") 188 | res = frappe.get_list("OCR Read", filters=[ 189 | ["OCR Read", "file_to_read", "like", "%sample%"]], fields=["name", "file_to_read"]) 190 | self.assertEqual(len(res), 2) 191 | files_to_read = [r.file_to_read for r in res] 192 | self.assertTrue(os.path.join(os.path.dirname(__file__), 193 | os.path.pardir, os.path.pardir, os.path.pardir, 194 | "tests", "test_data", "sample1.jpg") in files_to_read) 195 | self.assertTrue(os.path.join(os.path.dirname(__file__), 196 | os.path.pardir, os.path.pardir, os.path.pardir, 197 | "tests", "test_data", "sample2.pdf") in files_to_read) 198 | 199 | def test_read_ocr(self): 200 | self.assertRaises(frappe.ValidationError, read_ocr, obj=None) 201 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_settings/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_settings/ocr_settings.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Monogramm and contributors 2 | // For license information, please see license.txt 3 | 4 | frappe.ui.form.on('OCR Settings', { 5 | refresh: function(frm) { 6 | 7 | } 8 | }); 9 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_settings/ocr_settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "allow_copy": 0, 3 | "allow_events_in_timeline": 0, 4 | "allow_guest_to_view": 0, 5 | "allow_import": 0, 6 | "allow_rename": 0, 7 | "beta": 0, 8 | "creation": "2019-11-27 03:06:27.072918", 9 | "custom": 0, 10 | "docstatus": 0, 11 | "doctype": "DocType", 12 | "document_type": "Setup", 13 | "editable_grid": 1, 14 | "engine": "InnoDB", 15 | "fields": [ 16 | { 17 | "allow_bulk_edit": 0, 18 | "allow_in_quick_entry": 0, 19 | "allow_on_submit": 0, 20 | "bold": 0, 21 | "collapsible": 0, 22 | "columns": 0, 23 | "default": "200", 24 | "fetch_if_empty": 0, 25 | "fieldname": "pdf_resolution", 26 | "fieldtype": "Int", 27 | "hidden": 0, 28 | "ignore_user_permissions": 0, 29 | "ignore_xss_filter": 0, 30 | "in_filter": 0, 31 | "in_global_search": 0, 32 | "in_list_view": 1, 33 | "in_standard_filter": 0, 34 | "label": "PDF Resolution", 35 | "length": 0, 36 | "no_copy": 0, 37 | "permlevel": 0, 38 | "precision": "", 39 | "print_hide": 0, 40 | "print_hide_if_no_value": 0, 41 | "read_only": 0, 42 | "remember_last_selected_value": 0, 43 | "report_hide": 0, 44 | "reqd": 1, 45 | "search_index": 0, 46 | "set_only_once": 0, 47 | "translatable": 0, 48 | "unique": 0 49 | } 50 | ], 51 | "has_web_view": 0, 52 | "hide_heading": 0, 53 | "hide_toolbar": 0, 54 | "idx": 0, 55 | "image_view": 0, 56 | "in_create": 1, 57 | "is_submittable": 0, 58 | "issingle": 1, 59 | "istable": 0, 60 | "max_attachments": 0, 61 | "modified": "2019-11-27 03:35:39.949086", 62 | "modified_by": "Administrator", 63 | "module": "ERPNext OCR", 64 | "name": "OCR Settings", 65 | "name_case": "", 66 | "owner": "Administrator", 67 | "permissions": [ 68 | { 69 | "amend": 0, 70 | "cancel": 0, 71 | "create": 1, 72 | "delete": 1, 73 | "email": 1, 74 | "export": 0, 75 | "if_owner": 0, 76 | "import": 0, 77 | "permlevel": 0, 78 | "print": 1, 79 | "read": 1, 80 | "report": 0, 81 | "role": "System Manager", 82 | "set_user_permissions": 0, 83 | "share": 1, 84 | "submit": 0, 85 | "write": 1 86 | } 87 | ], 88 | "quick_entry": 1, 89 | "read_only": 1, 90 | "read_only_onload": 0, 91 | "show_name_in_global_search": 0, 92 | "sort_field": "modified", 93 | "sort_order": "DESC", 94 | "track_changes": 1, 95 | "track_seen": 0, 96 | "track_views": 0 97 | } -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_settings/ocr_settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and contributors 3 | # For license information, please see license.txt 4 | 5 | import frappe 6 | from frappe import _ 7 | from frappe.model.document import Document 8 | 9 | 10 | class OCRSettings(Document): 11 | def validate(self): 12 | if not self.pdf_resolution > 0: 13 | frappe.throw( 14 | _("PDF Resolution must be a positive integer eg 300 (high) or 200 (normal).")) 15 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_settings/test_ocr_settings.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | // rename this file from _test_[name] to test_[name] to activate 3 | // and remove above this line 4 | 5 | QUnit.test("test: OCR Settings", function (assert) { 6 | let done = assert.async(); 7 | 8 | // number of asserts 9 | assert.expect(1); 10 | 11 | frappe.run_serially([ 12 | // insert a new OCR Settings 13 | () => frappe.tests.make('OCR Settings', [ 14 | // values to be set 15 | {key: 'value'} 16 | ]), 17 | () => { 18 | assert.equal(cur_frm.doc.key, 'value'); 19 | }, 20 | () => done() 21 | ]); 22 | 23 | }); 24 | -------------------------------------------------------------------------------- /erpnext_ocr/erpnext_ocr/doctype/ocr_settings/test_ocr_settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # For license information, please see license.txt 4 | 5 | import frappe 6 | import unittest 7 | 8 | 9 | class TestOCRSettings(unittest.TestCase): 10 | def test_validate(self): 11 | ocr = frappe.get_doc("OCR Settings") 12 | ocr.pdf_resolution = 300 13 | ocr.validate() 14 | self.assertEqual(300, ocr.pdf_resolution) 15 | 16 | def test_validate_invalid_pdf_resolution(self): 17 | ocr = frappe.get_doc("OCR Settings") 18 | ocr.pdf_resolution = -1 19 | self.assertRaises(frappe.ValidationError, ocr.validate) 20 | 21 | ocr.pdf_resolution = 0 22 | self.assertRaises(frappe.ValidationError, ocr.validate) 23 | -------------------------------------------------------------------------------- /erpnext_ocr/fixtures/ocr_language.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "code": "eng", 4 | "docstatus": 0, 5 | "doctype": "OCR Language", 6 | "lang": "en", 7 | "modified": "2019-08-17 03:14:15.926535", 8 | "name": "eng", 9 | "parent": null, 10 | "parentfield": null, 11 | "parenttype": null 12 | }, 13 | { 14 | "code": "fra", 15 | "docstatus": 0, 16 | "doctype": "OCR Language", 17 | "lang": "fr", 18 | "modified": "2019-08-17 03:14:15.926535", 19 | "name": "fra", 20 | "parent": null, 21 | "parentfield": null, 22 | "parenttype": null 23 | }, 24 | { 25 | "code": "deu", 26 | "docstatus": 0, 27 | "doctype": "OCR Language", 28 | "lang": "de", 29 | "modified": "2019-08-17 03:14:15.926535", 30 | "name": "deu", 31 | "parent": null, 32 | "parentfield": null, 33 | "parenttype": null 34 | }, 35 | { 36 | "code": "ita", 37 | "docstatus": 0, 38 | "doctype": "OCR Language", 39 | "lang": "it", 40 | "modified": "2019-08-17 03:14:15.926535", 41 | "name": "ita", 42 | "parent": null, 43 | "parentfield": null, 44 | "parenttype": null 45 | }, 46 | { 47 | "code": "spa", 48 | "docstatus": 0, 49 | "doctype": "OCR Language", 50 | "lang": "es", 51 | "modified": "2019-08-17 03:14:15.926535", 52 | "name": "spa", 53 | "parent": null, 54 | "parentfield": null, 55 | "parenttype": null 56 | }, 57 | { 58 | "code": "por", 59 | "docstatus": 0, 60 | "doctype": "OCR Language", 61 | "lang": "pt", 62 | "modified": "2019-08-17 03:14:15.926535", 63 | "name": "por", 64 | "parent": null, 65 | "parentfield": null, 66 | "parenttype": null 67 | }, 68 | { 69 | "code": "tur", 70 | "docstatus": 0, 71 | "doctype": "OCR Language", 72 | "lang": "tr", 73 | "modified": "2019-08-17 03:14:15.926535", 74 | "name": "tur", 75 | "parent": null, 76 | "parentfield": null, 77 | "parenttype": null 78 | }, 79 | { 80 | "code": "nld", 81 | "docstatus": 0, 82 | "doctype": "OCR Language", 83 | "lang": "nl", 84 | "modified": "2019-08-17 03:14:15.926535", 85 | "name": "nld", 86 | "parent": null, 87 | "parentfield": null, 88 | "parenttype": null 89 | }, 90 | { 91 | "code": "rus", 92 | "docstatus": 0, 93 | "doctype": "OCR Language", 94 | "lang": "ru", 95 | "modified": "2019-08-17 03:14:15.926535", 96 | "name": "rus", 97 | "parent": null, 98 | "parentfield": null, 99 | "parenttype": null 100 | }, 101 | { 102 | "code": "vie", 103 | "docstatus": 0, 104 | "doctype": "OCR Language", 105 | "lang": "vi", 106 | "modified": "2019-08-17 03:14:15.926535", 107 | "name": "vie", 108 | "parent": null, 109 | "parentfield": null, 110 | "parenttype": null 111 | }, 112 | { 113 | "code": "kor", 114 | "docstatus": 0, 115 | "doctype": "OCR Language", 116 | "lang": "ko", 117 | "modified": "2019-08-17 04:19:02.882550", 118 | "name": "kor", 119 | "parent": null, 120 | "parentfield": null, 121 | "parenttype": null 122 | }, 123 | { 124 | "code": "jpn", 125 | "docstatus": 0, 126 | "doctype": "OCR Language", 127 | "lang": "ja", 128 | "modified": "2019-08-17 03:14:15.926535", 129 | "name": "jpn", 130 | "parent": null, 131 | "parentfield": null, 132 | "parenttype": null 133 | } 134 | ] -------------------------------------------------------------------------------- /erpnext_ocr/hooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # See license.txt 4 | 5 | from __future__ import unicode_literals 6 | from . import __version__ as app_version 7 | 8 | app_name = "erpnext_ocr" 9 | app_title = "ERPNext OCR" 10 | app_publisher = "Monogramm" 11 | app_description = "Optical Character Recognition using tesseract within Frappe." 12 | app_icon = "octicon octicon-eye" 13 | app_color = "#00bcd4" 14 | app_email = "opensource@monogramm.io" 15 | app_license = "MIT" 16 | 17 | # Includes in 18 | # ------------------ 19 | 20 | # include js, css files in header of desk.html 21 | # app_include_css = "/assets/erpnext_ocr/css/Aimara.css" 22 | # app_include_js = "/assets/erpnext_ocr/js/Aimara.js" 23 | #app_include_css = "/assets/erpnext_ocr/css/treeview.min.css" 24 | #app_include_js = "/assets/erpnext_ocr/js/treeview.min.js" 25 | 26 | # include js, css files in header of web template 27 | # web_include_css = "/assets/erpnext_ocr/css/treeview.min.css" 28 | # web_include_js = "/assets/erpnext_ocr/js/treeview.min.js" 29 | 30 | # include js in page 31 | # page_js = {"page" : "public/js/file.js"} 32 | 33 | # include js in doctype views 34 | #doctype_js = {"Sales Invoice" : "public/js/Aimara.js"} 35 | # doctype_list_js = {"doctype" : "public/js/doctype_list.js"} 36 | # doctype_tree_js = {"doctype" : "public/js/doctype_tree.js"} 37 | # doctype_calendar_js = {"doctype" : "public/js/doctype_calendar.js"} 38 | 39 | # Home Pages 40 | # ---------- 41 | 42 | # application home page (will override Website Settings) 43 | # home_page = "login" 44 | 45 | # website user home page (by Role) 46 | # role_home_page = { 47 | # "Role": "home_page" 48 | # } 49 | 50 | # Website user home page (by function) 51 | # get_website_user_home_page = "erpnext_ocr.utils.get_home_page" 52 | 53 | # Generators 54 | # ---------- 55 | 56 | # automatically create page for each record of this doctype 57 | # website_generators = ["Web Page"] 58 | 59 | # Installation 60 | # ------------ 61 | 62 | # before_install = "erpnext_ocr.install.before_install" 63 | # after_install = "erpnext_ocr.install.after_install" 64 | 65 | # Desk Notifications 66 | # ------------------ 67 | # See frappe.core.notifications.get_notification_config 68 | 69 | # notification_config = "erpnext_ocr.notifications.get_notification_config" 70 | 71 | # Permissions 72 | # ----------- 73 | # Permissions evaluated in scripted ways 74 | 75 | # permission_query_conditions = { 76 | # "Event": "frappe.desk.doctype.event.event.get_permission_query_conditions", 77 | # } 78 | # 79 | # has_permission = { 80 | # "Event": "frappe.desk.doctype.event.event.has_permission", 81 | # } 82 | 83 | # Document Events 84 | # --------------- 85 | # Hook on document methods and events 86 | 87 | #doc_events = { 88 | # "Sales Invoice": { 89 | # # "validate": "chanjeapp.hooks_datadog.SI.validate", 90 | # # "on_submit": "erpnext_ocr.zap_hooks.SI.submit", 91 | # # "on_cancel": "chanjeapp.hooks_datadog.SI.amend", 92 | # # "on_trash": "chanjeapp.hooks_datadog.SI.trash" 93 | # }, 94 | #} 95 | 96 | # Scheduled Tasks 97 | # --------------- 98 | 99 | # scheduler_events = { 100 | # "all": [ 101 | # "erpnext_ocr.tasks.all" 102 | # ], 103 | # "daily": [ 104 | # "erpnext_ocr.tasks.daily" 105 | # ], 106 | # "hourly": [ 107 | # "erpnext_ocr.tasks.hourly" 108 | # ], 109 | # "weekly": [ 110 | # "erpnext_ocr.tasks.weekly" 111 | # ] 112 | # "monthly": [ 113 | # "erpnext_ocr.tasks.monthly" 114 | # ] 115 | # } 116 | 117 | # Testing 118 | # ------- 119 | 120 | before_tests = "erpnext_ocr.install.before_tests" 121 | 122 | # Overriding Whitelisted Methods 123 | # ------------------------------ 124 | # 125 | # override_whitelisted_methods = { 126 | # "frappe.desk.doctype.event.event.get_events": "erpnext_ocr.event.get_events" 127 | # } 128 | 129 | # fixtures = ['Custom Script','Custom Field','Property Setter'] 130 | fixtures = [{'doctype': 'OCR Language'}] 131 | -------------------------------------------------------------------------------- /erpnext_ocr/install.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # For license information, please see license.txt 4 | 5 | from __future__ import unicode_literals 6 | 7 | import frappe 8 | 9 | 10 | def before_tests(): 11 | """Frappe trigger before application tests.""" 12 | settings = frappe.get_doc("System Settings") 13 | settings.time_zone = "Etc/GMT+3" 14 | settings.language = "en" 15 | settings.save() 16 | selling_settings = frappe.get_doc("Selling Settings") 17 | selling_settings.allow_multiple_items = 1 18 | selling_settings.save() 19 | -------------------------------------------------------------------------------- /erpnext_ocr/modules.txt: -------------------------------------------------------------------------------- 1 | ERPNext OCR -------------------------------------------------------------------------------- /erpnext_ocr/patches.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/patches.txt -------------------------------------------------------------------------------- /erpnext_ocr/public/css/treeview.min.css.map: -------------------------------------------------------------------------------- 1 | {"version":3,"sources":["src/treeview.css"],"names":[],"mappings":"AAAA,WAAa,SAAU,SACvB,8BAAgC,QAAS,MAAO,YAAa,KAC7D,mBAAqB,QAAS,KAET,WAAY,OADjC,yBAA2B,WAAY,KAAM,cAAe,IAAK,OAAQ,QAAS,MAAO,KAAM,OAAQ,KAAM,YAAa,KAAM,SAAU,SAAU,WAAY,OAAQ,IAAK,IAAK,MAAO,KAEzL,+BAAiC,WAAY,KAC7C,2BAA6B,OAAQ,QAAS,MAAO,KAAM,YAAa,IACxE,iCAAmC,MAAO,KACJ,oCAAtC,qCAA4E,QAAS,IAAK,QAAS,MACnG,oCAAsC,MAAO"} -------------------------------------------------------------------------------- /erpnext_ocr/public/js/treeview.min.js.map: -------------------------------------------------------------------------------- 1 | {"version":3,"sources":["src/treeview.js"],"names":["define","root","factory","amd","exports","module","TreeView","window","isDOMElement","obj","HTMLElement","e","nodeType","style","ownerDocument","forEach","arr","callback","scope","i","len","length","call","emit","instance","name","args","slice","arguments","events","indexOf","Error","handlers","Array","handle","setTimeout","apply","context","render","self","click","container","node","document","getElementById","leaves","renderLeaf","item","leaf","createElement","content","text","expando","setAttribute","JSON","stringify","textContent","expanded","appendChild","children","child","childLeaf","classList","add","data","push","innerHTML","map","outerHTML","join","parent","target","currentTarget","parentNode","parse","getAttribute","querySelector","contains","expand","collapse","querySelectorAll","onclick","this","prototype","skipEmit","remove","expandAll","hasAttribute","collapseAll","on","off","index","found","splice"],"mappings":"CAAC,SAAUA,GACT,cAEC,SAAUC,EAAMC,GACO,kBAAXF,IAAyBA,EAAOG,IACzCH,EAAOE,GACqB,gBAAZE,SAChBC,OAAOD,QAAUF,IAEjBD,EAAKK,SAAWJ,KAElBK,OAAQ,WACR,MAAQ,YAgBT,QAASC,GAAaC,GACjB,IACE,MAAOA,aAAeC,aACtB,MAAOC,GAGL,MAAsB,gBAARF,IAAqC,IAAjBA,EAAIG,UAAuC,gBAAdH,GAAII,OAAmD,gBAAtBJ,GAAIK,eAU1G,QAASC,GAAQC,EAAKC,EAAUC,GAC9B,GAAIC,GAAGC,EAAMJ,EAAIK,MACjB,KAAKF,EAAI,EAAGA,EAAIC,EAAKD,GAAK,EACxBF,EAASK,KAAKJ,EAAOF,EAAIG,GAAIA,GAQjC,QAASI,GAAKC,EAAUC,GACtB,GAAIC,MAAUC,MAAML,KAAKM,UAAW,EACpC,MAAIC,EAAOC,QAAQL,IAAS,GAS1B,KAAM,IAAIM,OAAMN,EAAO,sCARnBD,GAASQ,SAASP,IAASD,EAASQ,SAASP,YAAiBQ,QAChElB,EAAQS,EAASQ,SAASP,GAAO,SAAUS,GACzC3B,OAAO4B,WAAW,WAChBD,EAAOjB,SAASmB,MAAMF,EAAOG,QAASX,IACrC,KAWX,QAASY,GAAOC,GACd,GACiBC,GADbC,EAAYjC,EAAa+B,EAAKG,MAAQH,EAAKG,KAAOC,SAASC,eAAeL,EAAKG,MAC/EG,KACAC,EAAa,SAAUC,GACzB,GAAIC,GAAOL,SAASM,cAAc,OAC9BC,EAAUP,SAASM,cAAc,OACjCE,EAAOR,SAASM,cAAc,OAC9BG,EAAUT,SAASM,cAAc,MAYrC,IAVAD,EAAKK,aAAa,QAAS,aAC3BH,EAAQG,aAAa,QAAS,qBAC9BH,EAAQG,aAAa,YAAaC,KAAKC,UAAUR,IACjDI,EAAKE,aAAa,QAAS,kBAC3BF,EAAKK,YAAcT,EAAKtB,KACxB2B,EAAQC,aAAa,QAAS,iBAAmBN,EAAKU,SAAW,WAAa,KAC9EL,EAAQI,YAAcT,EAAKU,SAAW,IAAM,IAC5CP,EAAQQ,YAAYN,GACpBF,EAAQQ,YAAYP,GACpBH,EAAKU,YAAYR,GACbH,EAAKY,UAAYZ,EAAKY,SAAStC,OAAS,EAAG,CAC7C,GAAIsC,GAAWhB,SAASM,cAAc,MACtCU,GAASN,aAAa,QAAS,qBAC/BtC,EAAQgC,EAAKY,SAAU,SAAUC,GAC/B,GAAIC,GAAYf,EAAWc,EAC3BD,GAASD,YAAYG,KAElBd,EAAKU,UACRE,EAASG,UAAUC,IAAI,UAEzBf,EAAKU,YAAYC,OAEjBP,GAAQU,UAAUC,IAAI,SAExB,OAAOf,GAGTjC,GAAQwB,EAAKyB,KAAM,SAAUjB,GAC3BF,EAAOoB,KAAKnB,EAAWxB,KAAKiB,EAAMQ,MAEpCN,EAAUyB,UAAYrB,EAAOsB,IAAI,SAAUnB,GACzC,MAAOA,GAAKoB,YACXC,KAAK,IAER7B,EAAQ,SAAU7B,GAChB,GAAI2D,IAAU3D,EAAE4D,QAAU5D,EAAE6D,eAAeC,WACvCT,EAAOV,KAAKoB,MAAMJ,EAAOK,aAAa,cACtC9B,EAASyB,EAAOG,WAAWG,cAAc,qBACzC/B,GACEA,EAAOiB,UAAUe,SAAS,UAC5BtC,EAAKuC,OAAOR,EAAQzB,GAEpBN,EAAKwC,SAAST,EAAQzB,GAGxBtB,EAAKgB,EAAM,UACTgC,OAAQ5D,EACRqD,KAAMA,KAKZjD,EAAQ0B,EAAUuC,iBAAiB,mBAAoB,SAAUtC,GAC/DA,EAAKuC,QAAUzC,IAEjBzB,EAAQ0B,EAAUuC,iBAAiB,iBAAkB,SAAUtC,GAC7DA,EAAKuC,QAAUzC,IAUnB,QAASlC,GAAS0D,EAAMtB,GACtBwC,KAAKlD,YACLkD,KAAKxC,KAAOA,EACZwC,KAAKlB,KAAOA,EACZ1B,EAAO4C,MAzIT,GAAIrD,IACF,SACA,YACA,WACA,cACA,SA2OF,OA/FAvB,GAAS6E,UAAUL,OAAS,SAAUpC,EAAMG,EAAQuC,GACpC1C,EAAKkC,cAAc,iBACzBpB,YAAc,IACtBX,EAAOiB,UAAUuB,OAAO,UACpBD,GACJ7D,EAAK2D,KAAM,UACTX,OAAQ7B,EACRG,OAAQA,KAIZvC,EAAS6E,UAAUG,UAAY,WAC7B,GAAI/C,GAAO2C,IAEXnE,GADY4B,SAASC,eAAeL,EAAKG,MAAMsC,iBAAiB,iBACjD,SAAUtC,GACvB,GAAI4B,GAAS5B,EAAK+B,WACd5B,EAASyB,EAAOG,WAAWG,cAAc,qBACzCN,IAAUzB,GAAUyB,EAAOiB,aAAa,cAC1ChD,EAAKuC,OAAOR,EAAQzB,GAAQ,KAGhCtB,EAAK2D,KAAM,iBAQb5E,EAAS6E,UAAUJ,SAAW,SAAUrC,EAAMG,EAAQuC,GACtC1C,EAAKkC,cAAc,iBACzBpB,YAAc,IACtBX,EAAOiB,UAAUC,IAAI,UACjBqB,GACJ7D,EAAK2D,KAAM,YACTX,OAAQ7B,EACRG,OAAQA,KAMZvC,EAAS6E,UAAUK,YAAc,WAC/B,GAAIjD,GAAO2C,IAEXnE,GADY4B,SAASC,eAAeL,EAAKG,MAAMsC,iBAAiB,iBACjD,SAAUtC,GACvB,GAAI4B,GAAS5B,EAAK+B,WACd5B,EAASyB,EAAOG,WAAWG,cAAc,qBACzCN,IAAUzB,GAAUyB,EAAOiB,aAAa,cAC1ChD,EAAKwC,SAAST,EAAQzB,GAAQ,KAGlCtB,EAAK2D,KAAM,mBASb5E,EAAS6E,UAAUM,GAAK,SAAUhE,EAAMR,EAAUC,GAChD,KAAIW,EAAOC,QAAQL,IAAS,GAS1B,KAAM,IAAIM,OAAMN,EAAO,iCARlByD,MAAKlD,SAASP,KACjByD,KAAKlD,SAASP,OAEhByD,KAAKlD,SAASP,GAAMwC,MAClBhD,SAAUA,EACVoB,QAASnB,KAYfZ,EAAS6E,UAAUO,IAAM,SAAUjE,EAAMR,GACvC,GAAI0E,GAAOC,GAAQ,CACfV,MAAKlD,SAASP,YAAiBQ,SACjCiD,KAAKlD,SAASP,GAAMV,QAAQ,SAAUmB,EAAQf,GAC5CwE,EAAQxE,EACJe,EAAOjB,WAAaA,GAAa2E,IACnCA,GAAQ,KAGRA,GACFV,KAAKlD,SAASP,GAAMoE,OAAOF,EAAO,KAKjCrF,QAGXC,OAAOP","file":"dist/treeview.min.js"} -------------------------------------------------------------------------------- /erpnext_ocr/templates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/templates/__init__.py -------------------------------------------------------------------------------- /erpnext_ocr/templates/pages/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/templates/pages/__init__.py -------------------------------------------------------------------------------- /erpnext_ocr/templates/pages/__pycache__/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/templates/pages/__pycache__/__init__.py -------------------------------------------------------------------------------- /erpnext_ocr/tests/README.md: -------------------------------------------------------------------------------- 1 | # tesseract-python 2 | 3 | Examples to implement OCR(Optical Character Recognition) using tesseract using Python 4 | 5 | ## Installation 6 | 7 | - Install tesserct-ocr using this command: 8 | 9 | ```sh 10 | sudo apt-get install tesseract-ocr 11 | ``` 12 | 13 | - Install python binding for tesseract, tesserocr, using this pip command: 14 | 15 | ```sh 16 | pip install tesserocr 17 | ``` 18 | 19 | - Install image processing library in python, pillow using this pip command: 20 | 21 | ```sh 22 | pip install pillow 23 | ``` 24 | 25 | **For working with pdf files:** 26 | 27 | - Install imagemagick using this command: 28 | 29 | ```sh 30 | sudo apt-get install imagemagick 31 | ``` 32 | 33 | - Install python binding for imagemagick, wand, using this pip command: 34 | 35 | ```sh 36 | pip install wand 37 | ``` 38 | -------------------------------------------------------------------------------- /erpnext_ocr/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # For license information, please see license.txt 4 | -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_config_desktop.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # For license information, please see license.txt 4 | 5 | import unittest 6 | 7 | from erpnext_ocr.config.desktop import get_data 8 | 9 | 10 | class TestDesktop(unittest.TestCase): 11 | def test_get_data(self): 12 | data = get_data() 13 | 14 | self.assertIsNotNone(data) 15 | -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_config_docs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # For license information, please see license.txt 4 | 5 | import unittest 6 | 7 | from erpnext_ocr.config.docs import get_context 8 | 9 | 10 | class TestDocs(unittest.TestCase): 11 | def test_get_context(self): 12 | context = type('obj', (object,), {'brand_html' : None, 13 | 'source_link' : None, 14 | 'docs_base_url' : None, 15 | 'headline' : None, 16 | 'sub_heading' : None}) 17 | get_context(context) 18 | 19 | self.assertIsNotNone(context) 20 | self.assertIsNotNone(context.brand_html) 21 | self.assertIsNotNone(context.source_link) 22 | self.assertIsNotNone(context.docs_base_url) 23 | self.assertIsNotNone(context.headline) 24 | self.assertIsNotNone(context.sub_heading) 25 | -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_data/Picture_010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/tests/test_data/Picture_010.png -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_data/Picture_010.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/tests/test_data/Picture_010.tif -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_data/Picture_010_output.txt: -------------------------------------------------------------------------------- 1 |  D. Brawn Manufacture 2 | 3 | 4 | 5 | 6 | 7 | 8 | Invoice no. DVT-AX-345678 9 | Payment date: 03/12/2006 10 | 11 | 12 | 13 | Reference Designation Qty Unit price Total CHF Sales 14 | Work 15 | SERVICE D COMPLETE OVERHAUL 1 5500.00 5500.00 220 16 | SERVICE D REFRESHING COMPLETE CASE 1 380.00 380.00 220 17 | AND RHODIUM BATH 18 | Exterior parts: 19 | JO.297.065.FP FLAT GASKET 1 3.00 3.00 220 20 | JO.197.075.FP FLAT GASKET 1 4.00 4.00 220 21 | JO.199.059.OS FLAT ROUND GASKET 1 6.00 6.00 220 22 | VI.261.036.BC W.G.FIXATION SCREWS 10 4.00 40.00 220 23 | AI.465.055.BC WHITE GOLD "FOIL" 1 70.00 70.00 220 24 | PAIR OF HAND 25 | LENGTH: 10/13.50MM 26 | CALIBRE 2868 27 | SPECIAL DISCOUNT -3003.00 -3003.00 28 | Discount -900.00 -900.00 29 | Total CHF 2100.00 30 | 31 | RETURN AFTER REPAIR 32 | NO COMMERCIAL VALUE 33 | 34 | 35 | 36 | Payment: 37 | Mr. John Doe 38 | Green Street 15, Office 4 39 | 1234 Vermut 40 | New Caledonia 41 | 42 | Credit Card: Visa 43 | Card No: 112345678 44 | -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_data/Picture_010_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/tests/test_data/Picture_010_screenshot.png -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_data/item.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/tests/test_data/item.pdf -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_data/sample1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/tests/test_data/sample1.jpg -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_data/sample1_output.txt: -------------------------------------------------------------------------------- 1 | The quick brown fox 2 | jumped over the 5 3 | lazy dogs! -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_data/sample2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Monogramm/erpnext_ocr/c23ebc5a4124d9d2e98740e0edf7386e4621148c/erpnext_ocr/tests/test_data/sample2.pdf -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_spell_checker.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # For license information, please see license.txt 4 | 5 | import unittest 6 | 7 | from erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read import get_spellchecked_text, get_words_from_text 8 | 9 | 10 | class TestSpellChecker(unittest.TestCase): 11 | def test_spell_checker(self): 12 | text = get_spellchecked_text("An exampel. I beleive this text is not corect.", "eng") 13 | self.assertEqual("An example. I believe this text is not correct.", text) 14 | 15 | def test_get_words_from_text(self): 16 | words = get_words_from_text("Cat in gloves. Catches: no mice.") 17 | self.assertEqual(["Cat", "in", "gloves", "Catches", "no", "mice"], words) 18 | -------------------------------------------------------------------------------- /erpnext_ocr/tests/test_tesseract.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # For license information, please see license.txt 4 | 5 | import locale 6 | import unittest 7 | import os 8 | 9 | import frappe 10 | 11 | from erpnext_ocr.erpnext_ocr.doctype.ocr_read.ocr_read import read_document 12 | 13 | 14 | class TestTesseract(unittest.TestCase): 15 | def test_read_document_path_none(self): 16 | locale.setlocale(locale.LC_ALL, 'C') 17 | recognized_text = read_document(None) 18 | 19 | self.assertIsNone(recognized_text) 20 | 21 | def test_read_document_lang_not_supported(self): 22 | locale.setlocale(locale.LC_ALL, 'C') 23 | self.assertRaises(frappe.ValidationError, read_document, 24 | os.path.join(os.path.dirname(__file__), "test_data", "sample1.jpg"), 25 | "xxx") 26 | 27 | def test_read_document_image_http(self): 28 | locale.setlocale(locale.LC_ALL, 'C') 29 | recognized_text = read_document("https://github.com/Monogramm/erpnext_ocr/raw/develop/erpnext_ocr/tests/test_data/sample1.jpg", 30 | "eng") 31 | 32 | # print("recognized_text=" + recognized_text) 33 | 34 | self.assertIn("The quick brown fox", recognized_text) 35 | self.assertIn("jumped over the 5", recognized_text) 36 | self.assertIn("lazy dogs!", recognized_text) 37 | self.assertNotIn("And an elephant!", recognized_text) 38 | 39 | def test_read_document_image_jpg(self): 40 | locale.setlocale(locale.LC_ALL, 'C') 41 | recognized_text = read_document(os.path.join(os.path.dirname(__file__), 42 | "test_data", "sample1.jpg"), 43 | "eng") 44 | 45 | # print("recognized_text=" + recognized_text) 46 | 47 | self.assertIn("The quick brown fox", recognized_text) 48 | self.assertIn("jumped over the 5", recognized_text) 49 | self.assertIn("lazy dogs!", recognized_text) 50 | self.assertNotIn("And an elephant!", recognized_text) 51 | 52 | file = open(os.path.join(os.path.dirname(__file__), 53 | "test_data", "sample1_output.txt"), "r") 54 | expected_text = file.read() 55 | 56 | # Trailing spaces or EOL are acceptable 57 | self.assertTrue(expected_text in recognized_text) 58 | 59 | def test_read_document_image_png(self): 60 | locale.setlocale(locale.LC_ALL, 'C') 61 | recognized_text = read_document(os.path.join(os.path.dirname(__file__), 62 | "test_data", "Picture_010.png"), 63 | "eng") 64 | 65 | # print("recognized_text=" + recognized_text) 66 | 67 | self.assertIn("Brawn Manufacture", recognized_text) 68 | self.assertNotIn("And an elephant!", recognized_text) 69 | 70 | def test_read_document_pdf(self): 71 | locale.setlocale(locale.LC_ALL, 'C') 72 | recognized_text = read_document(os.path.join(os.path.dirname(__file__), 73 | "test_data", "sample2.pdf"), 74 | "eng") 75 | 76 | # print("recognized_text=" + recognized_text) 77 | 78 | self.assertIn("Python Basics", recognized_text) 79 | self.assertNotIn("Java", recognized_text) 80 | -------------------------------------------------------------------------------- /erpnext_ocr/translations/en.csv: -------------------------------------------------------------------------------- 1 | apps/erpnext_ocr/config/desktop.py,ERPNext OCR,Optical Character Recognition -------------------------------------------------------------------------------- /erpnext_ocr/translations/fr.csv: -------------------------------------------------------------------------------- 1 | apps/erpnext_ocr/config/desktop.py,ERPNext OCR,ERPNext ROC 2 | Doctype: OCR Language,OCR Language,Langue ROC 3 | Doctype: OCR Language,Code,Code 4 | Doctype: OCR Language,Language,Langue 5 | Doctype: OCR Language,Is supported,Est supporté 6 | Doctype: OCR Language,Type of OCR,Type de ROC 7 | Doctype: OCR Language,Download OCR,Télécharger ROC 8 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.py,Yes,Oui 9 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.py,No,Non 10 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.py,Download is not available for custom OCR data.,Le fichier n'a pas pu être téléchargé. 11 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.py,File could not be downloaded.,Le fichier n'a pas pu être téléchargé. 12 | Doctype: OCR Read,OCR Read,Lecture ROC 13 | Doctype: OCR Read,Image or PDF to Read,Image ou PDF à lire 14 | Doctype: OCR Read,Language,Langue 15 | Doctype: OCR Read,Read file,Lire le fichier 16 | Doctype: OCR Read,Enable Spell Checker,Activer la correction orthographique 17 | Doctype: OCR Read,Read Result,Résultat de la lecture 18 | Doctype: OCR Read,Read Time,Temps de lecture 19 | Doctype: OCR Read,Template,Modèle 20 | Doctype: OCR Read,Generate,Générer 21 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.js,Reading the file,Lecture du fichier 22 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.js,Doctype {0} generated,Document {0} généré 23 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py,The selected language is not available. Please contact your administrator.,La langue sélectionnée n'est pas disponible. Veuillez contacter votre administrateur. 24 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py,OCR read requires OCR Read doctype.,Une lecture ROC nécessite un document ROC. 25 | Doctype: OCR Settings,OCR Settings,Paramètres ROC 26 | Doctype: OCR Settings,PDF Resolution,Résolution PDF 27 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_settings/ocr_settings.py,PDF Resolution must be a positive integer eg 300 (high) or 200 (normal).,La résolution PDF doit être un entier positif comme 300 (haute) ou 200 (normale). 28 | Doctype: OCR Import,OCR Import,Import ROC 29 | Doctype: OCR Import,Doctype,Document 30 | Doctype: OCR Import,Mappings,Associations 31 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_import/ocr_import.py,Cannot find field '{0}' in text,Impossible de trouver le champ '{0}' dans le texte 32 | Doctype: OCR Import Mapping,OCR Import Mapping,Association d'import ROC 33 | Doctype: OCR Import Mapping,Field,Champ 34 | Doctype: OCR Import Mapping,Regexp,Expression régulière 35 | Doctype: OCR Import Mapping,Value,Valeur 36 | Doctype: OCR Import Mapping,Value type,Type de valeur 37 | Doctype: OCR Import Mapping,Link to Import Mapping,Lien vers association d'import ROC 38 | -------------------------------------------------------------------------------- /erpnext_ocr/translations/ru.csv: -------------------------------------------------------------------------------- 1 | apps/erpnext_ocr/config/desktop.py,ERPNext OCR,ERPNext OCR 2 | Doctype: OCR Language,OCR Language,Язык OCR 3 | Doctype: OCR Language,Code,Код языка 4 | Doctype: OCR Language,Language,Язык 5 | Doctype: OCR Language,Is supported,Поддеживается 6 | Doctype: OCR Language,Type of OCR,Тип OCR 7 | Doctype: OCR Language,Download OCR,Загрузить OCR 8 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.py,Yes,Да 9 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.py,No,Нет 10 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.py,Download is not available for custom OCR data.,Скачивание не доступно кастомным способом. 11 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_language/ocr_language.py,File could not be downloaded.,Файл не может быть загружен. 12 | Doctype: OCR Read,OCR Read,Чтение OCR 13 | Doctype: OCR Read,Image or PDF to Read,Изображение или pdf-файл 14 | Doctype: OCR Read,Read file,Сканировать файл 15 | Doctype: OCR Read,Enable Spell Checker,Включить проверку орфографии 16 | Doctype: OCR Read,Read Result,Текст из файла 17 | Doctype: OCR Read,Read Time,Время считывания 18 | Doctype: OCR Read,Template,Шаблон 19 | Doctype: OCR Read,Generate,Сгенерировать 20 | Doctype: OCR Read,Language,Язык 21 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.js,Reading the file,Чтение файла 22 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.js,Doctype {0} generated, Doctype {0} создана 23 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py,The selected language is not available. Please contact your administrator.,Выбранный язык не поддерживается системой. Обратитесь к администратору 24 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py,OCR read requires OCR Read doctype.,Выберите Doctype OCR Read 25 | Doctype: OCR Settings,OCR Settings,Параметры OCR 26 | Doctype: OCR Settings,PDF Resolution,Разрешение PDF 27 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_settings/ocr_settings.py,PDF Resolution must be a positive integer eg 300 (high) or 200 (normal).,Разрешение должно быть числом большим нуля 300(высокое) 200(низкое) 28 | Doctype: OCR Import,OCR Import,Вставка OCR 29 | Doctype: OCR Import,Doctype,Документ 30 | Doctype: OCR Import,Mappings,Поля для чтения 31 | apps/erpnext_ocr/erpnext_ocr/doctype/ocr_import/ocr_import.py,Cannot find field '{0}' in text,Невозможно найти поле '{0}' 32 | Doctype: OCR Import Mapping,OCR Import Mapping,Поля OCR 33 | Doctype: OCR Import Mapping,Field,Поле 34 | Doctype: OCR Import Mapping,Regexp,Регулярное выражение 35 | Doctype: OCR Import Mapping,Value,Значение 36 | Doctype: OCR Import Mapping,Value type,Тип значения 37 | Doctype: OCR Import Mapping,Link to Import Mapping,Ссылка на Doctype Поля OCR 38 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | License: MIT -------------------------------------------------------------------------------- /manage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | . .env 5 | 6 | ########################################################### 7 | # Functions 8 | 9 | log() { 10 | MESSAGE=$1 11 | 12 | echo "[$0] [$(date +%Y%m%d)] ${MESSAGE}" 13 | } 14 | 15 | dc() { 16 | docker-compose -f ${@} 17 | } 18 | 19 | build() { 20 | log 'Building container(s)...' 21 | dc "${1}" build ${@:2} 22 | } 23 | 24 | start() { 25 | log 'Starting container(s)...' 26 | dc "${1}" up -d ${@:2} 27 | } 28 | 29 | stop() { 30 | log 'Stopping container(s)...' 31 | dc "${1}" stop ${@:2} 32 | } 33 | 34 | restart() { 35 | log 'Restarting container(s)...' 36 | dc "${1}" restart ${@:2} 37 | } 38 | 39 | logs() { 40 | log 'Following container(s) logs (Ctrl + C to stop)...' 41 | dc "${1}" logs -f ${@:2} 42 | } 43 | 44 | down() { 45 | log 'Stopping and removing container(s) and data...' 46 | dc "${1}" down ${@:2} 47 | rm -rf "${ERPNEXT_HOME:/srv/erpnext/frappe}"/* 48 | } 49 | 50 | console() { 51 | dc -it "${1}" exec erpnext_app bench console ${@:2} 52 | } 53 | 54 | usage() { 55 | echo "usage: ./manage.sh COMMAND [ARGUMENTS] 56 | 57 | Commands: 58 | build Build Dev env 59 | start Start Dev env 60 | restart Retart Dev env 61 | stop Stop Dev env 62 | logs Follow logs of Dev env 63 | down Stop and remove Dev env 64 | console Send command to Dev env bench console 65 | " 66 | } 67 | 68 | ########################################################### 69 | # Runtime 70 | 71 | case "${1}" in 72 | # DEV env 73 | build) build docker-compose.yml ${@:2};; 74 | start) start docker-compose.yml ${@:2};; 75 | restart) restart docker-compose.yml ${@:2};; 76 | stop) stop docker-compose.yml ${@:2};; 77 | test) start docker-compose.yml sut 78 | logs docker-compose.yml sut;; 79 | logs) logs docker-compose.yml ${@:2};; 80 | down) down docker-compose.yml ${@:2};; 81 | console) console docker-compose.yml ${@:2};; 82 | # PROD env 83 | #build) TAG=${DOCKER_TAG} \ 84 | # VCS_REF=`git rev-parse --short HEAD` \ 85 | # BUILD_DATE=`date -u +"%Y-%m-%dT%H:%M:%SZ"` \ 86 | # build .travis/docker-compose.yml ${@:2};; 87 | #start) start .travis/docker-compose.mariadb.yml ${@:2};; 88 | #restart) restart .travis/docker-compose.mariadb.yml ${@:2};; 89 | #stop) stop .travis/docker-compose.mariadb.yml ${@:2};; 90 | #logs) logs .travis/docker-compose.mariadb.yml ${@:2};; 91 | #down) down .travis/docker-compose.mariadb.yml ${@:2};; 92 | #console) console .travis/docker-compose.mariadb.yml ${@:2};; 93 | # Help 94 | *) usage;; 95 | esac 96 | 97 | exit 0 98 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "erpnext_ocr", 3 | "description": "OCR", 4 | "author": "Monogramm", 5 | "license": "MIT", 6 | "dependencies": { 7 | "cypress": "^4.12.1", 8 | "lodash": "^4.17.21", 9 | "minimist": "^1.2.5", 10 | "npm-registry-fetch": "^8.1.5", 11 | "remark-cli": "^9.0.0", 12 | "remark-lint": "^6.0.6", 13 | "remark-preset-lint-recommended": "^3.0.4" 14 | }, 15 | "scripts": { 16 | "lint-md": "remark ." 17 | }, 18 | "remarkConfig": { 19 | "plugins": [ 20 | "remark-preset-lint-recommended" 21 | ] 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | frappe 2 | six 3 | requests 4 | pillow 5 | wand 6 | tesserocr 7 | pyspellchecker -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2021, Monogramm and Contributors 3 | # See license.txt 4 | 5 | from setuptools import setup, find_packages 6 | import re 7 | import ast 8 | 9 | with open('requirements.txt') as f: 10 | install_requires = f.read().strip().split('\n') 11 | 12 | # get version from __version__ variable in erpnext_autoinstall/__init__.py 13 | _version_re = re.compile(r'__version__\s+=\s+(.*)') 14 | 15 | with open('erpnext_ocr/__init__.py', 'rb') as f: 16 | version = str(ast.literal_eval(_version_re.search( 17 | f.read().decode('utf-8')).group(1))) 18 | 19 | setup( 20 | name='erpnext_ocr', 21 | version=version, 22 | description='Optical Character Recognition using tesseract within Frappe..', 23 | author='Monogramm', 24 | author_email='opensource@monogramm.io', 25 | packages=find_packages(), 26 | zip_safe=False, 27 | include_package_data=True, 28 | install_requires=install_requires 29 | ) 30 | --------------------------------------------------------------------------------