37 |
38 | {% endblock %}
39 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - '**'
10 |
11 | jobs:
12 | build:
13 |
14 | env:
15 | APP_ENV: test
16 |
17 | strategy:
18 | matrix:
19 | php: [ '7.3', '7.4', '8.0', '8.1' ]
20 | runPhan: [ true ]
21 | include:
22 | - php: '8.2'
23 | runPhan: false
24 |
25 | runs-on: ubuntu-latest
26 |
27 | steps:
28 | - name: Checkout
29 | uses: actions/checkout@v1
30 |
31 | - name: Set up PHP
32 | uses: shivammathur/setup-php@v2
33 | with:
34 | php-version: ${{matrix.php}}
35 | coverage: none
36 | extensions: ast
37 |
38 | - name: Read .nvmrc
39 | run: echo ::set-output name=NODE_VERSION::$(cat .nvmrc)
40 | id: nvm
41 |
42 | - name: Set up Node ${{ steps.nvm.outputs.NODE_VERSION }}
43 | uses: actions/setup-node@v2
44 | with:
45 | node-version: ${{ steps.nvm.outputs.NODE_VERSION }}
46 |
47 | - name: Install tesseract
48 | run: |
49 | sudo apt-get update
50 | sudo apt-get install -y tesseract-ocr-all
51 |
52 | - name: Install wikimedia-ocr
53 | run: |
54 | composer install
55 | npm ci
56 |
57 | - name: Test
58 | run: |
59 | composer test-common
60 | npm run test
61 | npm run build
62 | git status
63 | git status | grep "nothing to commit, working tree clean"
64 |
65 | - name: Test (Phan)
66 | if: ${{ matrix.runPhan }}
67 | run: composer phan
68 |
--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | # In all environments, the following files are loaded if they exist,
2 | # the latter taking precedence over the former:
3 | #
4 | # * .env contains default values for the environment variables needed by the app
5 | # * .env.local uncommitted file with local overrides
6 | # * .env.$APP_ENV committed environment-specific defaults
7 | # * .env.$APP_ENV.local uncommitted environment-specific overrides
8 | #
9 | # Real environment variables win over .env files.
10 | #
11 | # DO NOT DEFINE PRODUCTION SECRETS IN THIS FILE NOR IN ANY OTHER COMMITTED FILES.
12 | #
13 | # Run "composer dump-env prod" to compile .env files for production use (requires symfony/flex >=1.2).
14 | # https://symfony.com/doc/current/best_practices.html#use-environment-variables-for-infrastructure-configuration
15 |
16 | ###> symfony/framework-bundle ###
17 | APP_ENV=dev
18 | APP_SECRET=3f028b359f05763e6cf2049cec74579e
19 | ###< symfony/framework-bundle ###
20 |
21 | ###> symfony/mailer ###
22 | MAILER_DSN=smtp://mail.tools.wmflabs.org:25
23 | ###< symfony/mailer ###
24 |
25 | APP_GOOGLE_KEYFILE=
26 |
27 | APP_MAIL_SENDER=tools.wikimedia-ocr@tools.wmflabs.org
28 | APP_LOG_RECIPIENT_1=admin1@example.org
29 | APP_LOG_RECIPIENT_2=admin2@example.org
30 | APP_LOG_SUBJECT="[Wikimedia OCR]"
31 |
32 | # Comma-separated list of the host names (without protocols) of where images are stored.
33 | APP_IMAGE_HOSTS=upload.wikimedia.org,upload.wikimedia.beta.wmflabs.org
34 |
35 | REDIS_HOST=redis.svc.tools.eqiad1.wikimedia.cloud
36 |
37 | APP_CACHE_TTL=3600 # 1 hour
38 |
39 | APP_TRANSKRIBUS_USERNAME=
40 | APP_TRANSKRIBUS_PASSWORD=
41 |
--------------------------------------------------------------------------------
/src/Kernel.php:
--------------------------------------------------------------------------------
1 | import('../config/{packages}/*.yaml');
18 | $container->import('../config/{packages}/'.$this->environment.'/*.yaml');
19 |
20 | if (is_file(\dirname(__DIR__).'/config/services.yaml')) {
21 | $container->import('../config/services.yaml');
22 | $container->import('../config/{services}_'.$this->environment.'.yaml');
23 | } elseif (is_file($path = \dirname(__DIR__).'/config/services.php')) {
24 | (require $path)($container->withPath($path), $this);
25 | }
26 | }
27 |
28 | protected function configureRoutes(RoutingConfigurator $routes): void
29 | {
30 | $routes->import('../config/{routes}/'.$this->environment.'/*.yaml');
31 | $routes->import('../config/{routes}/*.yaml');
32 |
33 | if (is_file(\dirname(__DIR__).'/config/routes.yaml')) {
34 | $routes->import('../config/routes.yaml');
35 | } elseif (is_file($path = \dirname(__DIR__).'/config/routes.php')) {
36 | (require $path)($routes->withPath($path), $this);
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/i18n/br.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Fulup",
5 | "Iriep"
6 | ]
7 | },
8 | "title": "WikimediaOCR",
9 | "subtitle": "Treuzskrivañ an destenn adalek skeudennoù",
10 | "form-heading": "Treuzskrivañ ur skeudenn",
11 | "image-url": "URL ar skeudenn",
12 | "image-url-help": "Ensoc'hañ URL ur skeudenn herberc'hiet war ur servijer Wikimedia evel da skouer: $1",
13 | "image-url-error": "URL ar skeudenn a rank kregiñ gant {{PLURAL:$1|an anv domani da-heul|unan eus an anvioù domani da-heul}} hag a rank echuiñ gant un astenn restr reizh: $2",
14 | "image-alt-text": "Ar skeudenn orin",
15 | "language-code": "Yezhoù (diret)",
16 | "engine": "Lusker OCR",
17 | "engine-not-found-warning": "Neket bet kavet al lusker azgoulennet '$1'. Implijout al lusker '$2' e-plas.",
18 | "engine-invalid-langs-warning": "Direizh eo ar yezhoù da heul pe n'int ket skoret gant ar c'heflusker ; lezet int bet a-gostez : $1",
19 | "submit": "Treuzskrivañ ar bajenn a-bezh",
20 | "submit-crop": "Treuzskrivañ an takad",
21 | "drag-help": "Diuzañ ar benveg didroc'hañ ha tresañ un hirgarrezenn war ar skeudenn dindan evit treuzskrivañ ul lodenn eus ar bajenn hepken.",
22 | "drag-mode-move": "Ruzañ a lakay ar skeudenn da fiñval",
23 | "drag-mode-move-alt": "Arlun a arouez an oberiadenn 'fiñval'",
24 | "drag-mode-crop": "Ruzañ a grouo un takad didroc'hañ nevez.",
25 | "copy-to-clipboard": "Eilañ er golver",
26 | "copied-to-clipboard": "Eilet !",
27 | "documentation": "Teuliadur",
28 | "version": "Stumm $1",
29 | "report-issue": "Titourañ un draen",
30 | "langs-placeholder": "Lezel goullo evit mat vo detektet ar yezh ent emgefre."
31 | }
32 |
--------------------------------------------------------------------------------
/i18n/el.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Jimkats",
5 | "Norhorn"
6 | ]
7 | },
8 | "title": "WikimediaOCR",
9 | "form-heading": "Μεταγραφή εικόνας",
10 | "image-url": "URL εικόνας",
11 | "image-alt-text": "Η αρχική εικόνα",
12 | "language-code": "Γλώσσες (προαιρετικά)",
13 | "engine": "Μηχανή OCR",
14 | "engine-name-transkribus": "Transkribus OCR",
15 | "submit": "Μεταγραφή ολόκληρης της σελίδας",
16 | "submit-crop": "Περιοχή μεταγραφής",
17 | "copy-to-clipboard": "Αντιγραφή στο πρόχειρο",
18 | "copied-to-clipboard": "Αντιγράφηκε!",
19 | "google-error": "Η υπηρεσία Google επέστρεψε ένα σφάλμα: $1",
20 | "image-retrieval-failed": "Η ανάκτηση εικόνας απέτυχε: $1",
21 | "documentation": "Τεκμηρίωση",
22 | "api-tooltip": "Δείτε την τεκμηρίωση του API",
23 | "version": "Έκδοση $1",
24 | "report-issue": "Αναφέρετε ένα πρόβλημα",
25 | "loading-message": "Εκτέλεση μεταγραφής...",
26 | "tesseract-options": "Επιλογές Tesseract",
27 | "transkribus-language-code": "Γλωσσικό Μοντέλο",
28 | "transkribus-default-error": "Κωδικός σφάλματος '$1' :: Δεν είναι δυνατή η ολοκλήρωση του αιτήματος, προσπαθήστε ξανά!",
29 | "transkribus-no-lang-error": "Δεν επιλέχθηκε γλώσσα",
30 | "transkribus-options": "Επιλογές Transkribus",
31 | "transkribus-line-label": "Μοντέλο Ανίχνευσης Γραμμής",
32 | "transkribus-line-id-none-option": "Κανένα",
33 | "transkribus-jobs": "Εργασίες Transcribus",
34 | "transkribus-job-id": "ID εργασίας",
35 | "transkribus-job-state": "Κατάσταση",
36 | "transkribus-job-description": "Περιγραφή",
37 | "transkribus-job-start": "Ξεκίνησε",
38 | "transkribus-job-end": "Τελείωσε",
39 | "transkribus-job-waited": "Καθυστέρηση έναρξης (λεπτά)"
40 | }
41 |
--------------------------------------------------------------------------------
/i18n/roa-tara.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Joetaras"
5 | ]
6 | },
7 | "title": "UicchimedieOCR",
8 | "subtitle": "Trascrive 'u teste da l'immaggine",
9 | "form-heading": "Trascrive cumme immaggine",
10 | "image-url": "URL de l'immaggine",
11 | "image-url-help": "Mitte 'na URL de l'immaggine ospitate sus a 'u server de Uicchimedie cumme: $1",
12 | "image-url-error": "L'URL de l'immaggine adda accumenzà cu {{PLURAL:$1|stu|une de ste}} nome de dominie e addà spiccià cu 'n'estenzione de file valide: $2",
13 | "image-alt-text": "L'immaggine origgenale",
14 | "language-code": "Lènghe (facoltative)",
15 | "engine": "Motore OCR",
16 | "engine-not-found-warning": "'U motore cercate '$1' non g'ha state acchiate. Ause 'u motore predefinite '$2'.",
17 | "engine-invalid-langs-warning": "Ste lènghe non ge sò valide o non ge sò supportate da 'u motore e avènene cacate: $1",
18 | "submit": "Trascrive totte 'a pàgene",
19 | "submit-crop": "Trascrive l'arèe",
20 | "copy-to-clipboard": "Copie jndr'à l'appunde",
21 | "copied-to-clipboard": "Copiate!",
22 | "image-retrieval-failed": "Recupere de l'immaggine fallite: $1",
23 | "documentation": "Documendazione",
24 | "version": "Versione $1",
25 | "report-issue": "Segnale 'nu probbleme",
26 | "tesseract-options": "Opziune de tesseract",
27 | "tesseract-psm-label": "Metode d'a segmentazione d'a pàgene",
28 | "tesseract-psm-7": "Tratte l'immaggine cumme 'na linèe de test singole.",
29 | "tesseract-psm-8": "Tratte l'immaggine cumme 'na parole singole.",
30 | "tesseract-psm-9": "Tratte l'immaggine cumme 'na parole singole jndr'à 'nu cerchie.",
31 | "tesseract-psm-10": "Tratte l'immaggine cumme 'nu carattere singole.",
32 | "tesseract-psm-12": "Teste sparse cu OSD."
33 | }
34 |
--------------------------------------------------------------------------------
/phpunit.xml.dist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 | tests
28 |
29 |
30 |
31 |
32 |
33 | src
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
47 |
48 |
--------------------------------------------------------------------------------
/i18n/sat.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "ᱤᱧ ᱢᱟᱛᱟᱞ"
5 | ]
6 | },
7 | "title": "ᱣᱤᱠᱤᱢᱤᱰᱤᱭᱟ ᱳᱥᱤᱟᱨ",
8 | "subtitle": "ᱪᱤᱛᱟᱹᱨ ᱠᱷᱚᱱ ᱚᱞ ᱨᱮᱭᱟᱜ ᱚᱞ ᱚᱞ ᱢᱮ ᱾",
9 | "form-heading": "ᱢᱤᱫᱴᱟᱝ ᱪᱤᱛᱟᱹᱨ ᱴᱨᱟᱱᱤᱥᱠᱨᱤᱯ ᱢᱮ ᱾",
10 | "image-url": "ᱪᱤᱛᱟᱹᱨ ᱨᱮᱭᱟᱜ ᱤᱭᱩ ᱟᱨ ᱮᱞ ᱾",
11 | "image-url-help": "ᱩᱭᱠᱤᱢᱤᱰᱤᱭᱟ ᱥᱟᱨᱵᱷᱟᱨ ᱨᱮ ᱦᱳᱥᱴ ᱟᱠᱟᱱ ᱢᱤᱫᱴᱟᱝ ᱪᱤᱛᱟᱹᱨ ᱨᱮᱭᱟᱜ ᱤᱭᱩ ᱟᱨ ᱮᱞ ᱥᱮᱞᱮᱫ ᱢᱮ ᱡᱮᱞᱮᱠᱟ '1: $1'",
12 | "image-alt-text": "ᱢᱩᱲᱩᱫ ᱪᱤᱛᱟᱹᱨ ᱫᱚ ᱾",
13 | "language-code": "ᱯᱟᱹᱨᱥᱤ (ᱚᱯᱥᱟᱱᱟᱞ)",
14 | "engine": "ᱳᱥᱤᱟᱨ ᱤᱱᱡᱤᱱ",
15 | "engine-name-transkribus": "ᱴᱨᱮᱱᱥᱠᱨᱤᱵᱟᱥ ᱳᱥᱤᱟᱨ",
16 | "engine-not-found-warning": "ᱱᱮᱦᱚᱨ ᱞᱮᱱ ᱤᱱᱡᱤᱱ '$1' ᱫᱚ ᱵᱟᱝ ᱧᱟᱢ ᱟᱠᱟᱱᱟ ᱾ ᱚᱱᱟ ᱵᱚᱫᱚᱞ ᱛᱮ ᱰᱯᱷᱚᱞᱴ ᱤᱱᱡᱤᱱ '$2' ᱵᱮᱣᱦᱟᱨ ᱢᱮ ᱾",
17 | "submit": "ᱜᱚᱴᱟ ᱥᱟᱠᱟᱢ ᱫᱚ ᱚᱞ ᱢᱮ",
18 | "submit-crop": "ᱴᱚᱴᱷᱟ ᱫᱚ ᱚᱞ ᱢᱮ",
19 | "drag-help": "ᱠᱨᱚᱯ ᱴᱩᱞ ᱵᱟᱪᱷᱱᱟᱣ ᱢᱮ ᱟᱨ ᱥᱟᱠᱟᱢ ᱨᱮᱭᱟᱜ ᱢᱤᱫᱴᱟᱝ ᱴᱚᱴᱷᱟ ᱜᱮ ᱚᱞ ᱞᱟᱹᱜᱤᱫ ᱞᱟᱛᱟᱨ ᱨᱮ ᱪᱤᱛᱟᱹᱨ ᱨᱮ ᱢᱤᱫᱴᱟᱝ ᱟᱭᱢᱟᱜᱟᱱ ᱰᱨᱟᱜ ᱢᱮ ᱾",
20 | "drag-mode-move": "ᱰᱨᱟᱜᱽ ᱠᱚᱨᱟᱣ ᱪᱤᱛᱟᱹᱨ ᱫᱚ ᱪᱟᱞᱟᱣ ᱟᱭ ᱾",
21 | "drag-mode-move-alt": "'ᱢᱩᱵᱷ' ᱮᱠᱥᱚᱱ ᱨᱮᱭᱟᱜ ᱩᱫᱩᱜ ᱥᱚᱫᱚᱨ ᱟᱭᱠᱚᱱ᱾",
22 | "drag-mode-crop": "ᱰᱨᱟᱜᱽ ᱢᱤᱫᱴᱟᱝ ᱱᱟᱶᱟ ᱪᱟᱥ ᱴᱚᱴᱷᱟ ᱵᱮᱱᱟᱣᱼᱟ",
23 | "drag-mode-crop-alt": "'ᱪᱟᱥ' ᱠᱟᱹᱢᱤ ᱨᱮᱭᱟᱜ ᱩᱫᱩᱜ ᱥᱚᱫᱚᱨ ᱟᱭᱠᱚᱱ ᱾",
24 | "copy-to-clipboard": "ᱠᱞᱤᱯᱵᱳᱨᱰ ᱨᱮ ᱠᱚᱯᱤ ᱢᱮ",
25 | "copied-to-clipboard": "ᱠᱚᱯᱤ ᱠᱚᱨᱟᱣ ᱦᱩᱭ ᱟᱠᱟᱱᱟ",
26 | "google-error": "ᱜᱩᱜᱚᱞ ᱯᱚᱨᱤᱥᱮᱵᱟ ᱢᱤᱫᱴᱟᱝ ᱞᱨᱳᱴ ᱨᱩᱣᱟᱹᱲ ᱟᱠᱟᱫᱟ ᱾ $1",
27 | "image-retrieval-failed": "ᱪᱤᱛᱟᱹᱨ ᱧᱟᱢ ᱵᱟᱝ ᱦᱩᱭ ᱟᱠᱟᱱᱟ ᱾ $1",
28 | "version": "ᱵᱚᱨᱱᱚᱱ $1",
29 | "report-issue": "ᱟᱱᱟᱴ ᱨᱮ ᱨᱮᱯᱳᱨᱴ ᱢᱮ",
30 | "langs-placeholder": "ᱟᱡᱛᱮ ᱯᱟᱹᱨᱥᱤ ᱵᱟᱰᱟᱭ ᱞᱟᱹᱜᱤᱫ ᱵᱮᱠᱚᱞᱚᱠ ᱫᱚᱦᱚ ᱢᱮ ᱾",
31 | "loading-message": "ᱴᱨᱟᱱᱥᱤᱠᱯᱥᱚᱱ ᱠᱚᱨᱟᱣ...",
32 | "tesseract-psm-label": "ᱥᱟᱠᱟᱢ ᱦᱟᱹᱴᱤᱧ ᱦᱚᱨᱟ",
33 | "tesseract-psm-help": "ᱟᱨᱦᱚᱸ ᱱᱟᱯᱟᱭ ᱟᱭᱢᱟᱼᱠᱞᱩᱢ ᱨᱮᱭᱟᱜ ᱜᱚᱲᱚ ᱞᱟᱹᱜᱤᱫ \"ᱥᱯᱟᱨᱥ ᱴᱮᱥᱴ\" ᱵᱮᱣᱦᱟᱨ ᱢᱮ ᱾",
34 | "tesseract-psm-0": "ᱳᱭᱮᱨᱮᱱᱴᱮᱥᱚᱱ ᱟᱨ ᱥᱠᱨᱤᱯᱴ ᱰᱤᱴᱮᱠᱥᱚᱱ (ᱳᱮᱥᱰᱤ) ᱥᱩᱢᱩᱝ ᱾",
35 | "tesseract-psm-1": "ᱳᱹ ᱮᱥᱹᱰᱤ ᱥᱟᱶ ᱟᱡᱛᱮ ᱥᱟᱠᱟᱢ ᱦᱟᱹᱴᱤᱧ ᱾"
36 | }
37 |
--------------------------------------------------------------------------------
/i18n/hu.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Dj",
5 | "Hanna Tardos",
6 | "Tacsipacsi"
7 | ]
8 | },
9 | "title": "WikimédiaOCR",
10 | "subtitle": "Szöveg átírása képekből",
11 | "form-heading": "Kép átírása",
12 | "image-url": "Kép URL-címe",
13 | "image-url-help": "Egy tényleges képfájl teljes URL-je, a következő domainek egyikével: $1",
14 | "image-url-error": "A fájlnévnek egy érvényes kiterjesztéssel kell végződnie, és a következő {{PLURAL:$1|domainnévvel|domainnevek egyikével}} kell kezdődnie: $2",
15 | "image-alt-text": "Az eredeti kép",
16 | "language-code": "Nyelvek (nem kötelező)",
17 | "engine": "OCR-motor",
18 | "engine-not-found-warning": "A kért „$1” motor nem található. Ehelyett az alapértelmezett „$2” motort használja.",
19 | "engine-invalid-langs-warning": "A következő nyelvek érvénytelenek vagy a motor által nem támogatottak, ezért figyelmen kívül hagyták: $1",
20 | "submit": "Az egész oldal átírása",
21 | "submit-crop": "Átírási terület",
22 | "copy-to-clipboard": "Másolás a vágólapra",
23 | "copied-to-clipboard": "Kimásolva!",
24 | "google-error": "A Google-szolgáltatás hibát adott vissza: $1",
25 | "image-retrieval-failed": "A kép lekérése sikertelen: $1",
26 | "documentation": "Dokumentáció",
27 | "version": "$1 verzió",
28 | "report-issue": "Hibabejelentés",
29 | "langs-param-error": "Az OCR-motor a következő {{PLURAL:$1|nyelvet|nyelveket}} nem támogatja: $2",
30 | "tesseract-options": "Tesseract-beállítások",
31 | "transkribus-line-id-none-option": "Nincs",
32 | "transkribus-mixed-line-option": "Vegyes vonal tájolás",
33 | "transkribus-line-help": "Hagyd üresen, ha nem vagy biztos abban, hogy melyik vonalérzékelési modellt használja",
34 | "transkribus-job-id": "Feladatazonosító",
35 | "transkribus-job-state": "Állapot",
36 | "transkribus-job-description": "Leírás",
37 | "transkribus-job-start": "Elindult",
38 | "transkribus-job-end": "Kész",
39 | "transkribus-job-waited": "Indítási késleltetés (perc)"
40 | }
41 |
--------------------------------------------------------------------------------
/tests/Twig/AppExtensionTest.php:
--------------------------------------------------------------------------------
1 | projectDir,
26 | new TesseractOCR()
27 | );
28 | $transkribusEngine = new TranskribusEngine(
29 | new TranskribusClient(
30 | getenv( 'APP_TRANSKRIBUS_USERNAME' ),
31 | getenv( 'APP_TRANSKRIBUS_PASSWORD' ),
32 | new MockHttpClient(),
33 | new NullAdapter(),
34 | new NullAdapter()
35 | ),
36 | new Intuition(),
37 | $this->projectDir,
38 | new MockHttpClient()
39 | );
40 | $this->ext = new AppExtension( $tesseractEngine, $transkribusEngine );
41 | }
42 |
43 | /**
44 | * @covers AppExtension::getOcrLangName
45 | */
46 | public function testOcrLangName(): void {
47 | // Non-standard language code with name defined in models.json
48 | static::assertSame( 'Azərbaycan (qədim yazı)', $this->ext->getOcrLangName( 'aze_cyrl' ) );
49 |
50 | // Standard language code (name provided by Intuition)
51 | static::assertSame( 'English', $this->ext->getOcrLangName( 'en' ) );
52 | }
53 |
54 | /**
55 | * @covers AppExtension::getLineIdName
56 | */
57 | public function testLineIdName(): void {
58 | static::assertSame( 'Balinese Line Detection Model', $this->ext->getLineIdName( 'bali' ) );
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/i18n/ps.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "شاه زمان پټان"
5 | ]
6 | },
7 | "title": "ويکيرسنۍ ليدنيزهلوښېپېژندنه",
8 | "subtitle": "په ويکيسرچينې يا کوم بل ځای کې د ويکيرسنۍ خونديځ کې د سکين شوي انځورونو د ليک لمېسلو او کارولو لپاره يو توکی.",
9 | "form-heading": "انځور پورهلیکل",
10 | "image-url": "انځور وېبتړ",
11 | "image-url-help": "په ويکيرسنۍ پالنگر ې کوربهشوی انځور وېبتړ دننه کړئ لکه: $1",
12 | "image-alt-text": "ار انځور",
13 | "language-code": "ژبې (اختياري)",
14 | "engine": "ليدنيزهلوښېپېژندنه اينجن",
15 | "engine-name-transkribus": "پورهليکلو ليدنيزهلوښېپېژندنه",
16 | "submit": "ټول مخ پورهليکل",
17 | "submit-crop": "پورهليکلو سيمه",
18 | "drag-mode-move": "کشول به انځور وخوځوي",
19 | "copy-to-clipboard": "ټينگدړې ته لمېسل",
20 | "copied-to-clipboard": "ولمېسلشو!",
21 | "google-error": "گوگل پالنگر يوه تېروتنه راوگرځوله: $1",
22 | "documentation": "لاسوند",
23 | "api-tooltip": "ایپيآی لاسوند کتل",
24 | "version": "$1 بلبڼه",
25 | "report-issue": "د يوې ستونزې خبر ورکول",
26 | "langs-placeholder": "د ژبې خپلکاره موندلو لپاره تش پرېښودل.",
27 | "langs-param-error": "لاندې {{PLURAL:$1|ژبه|ژبې}} د ليدنيزهلوښېپېژندنې اېنجن نه ملاتړ کوي: $2",
28 | "loading-message": "د پورهليکلو ترسرهکول...",
29 | "tesseract-psm-label": "مخ وېشلو چلند",
30 | "transkribus-language-code": "ژبې چلنوال",
31 | "transkribus-no-lang-error": "هېڅ ژبه غورهشوې نه ده",
32 | "transkribus-options": "پورهلیکلو خوښنې",
33 | "transkribus-line-label": "د کرښې موندل بېلگه",
34 | "transkribus-line-id-none-option": "هېڅ",
35 | "transkribus-mixed-line-option": "د يوځای شوې کرښې لوری",
36 | "transkribus-line-help": "که تاسو ډاډه نه ياست چې کومه د کرښې موندلو لوري بېلگه وکاروئ؛ نو تش يې پرېږدئ",
37 | "transkribus-jobs": "پورهلیکلو دندې",
38 | "transkribus-job-id": "دندې پېژند",
39 | "transkribus-job-state": "دريځ",
40 | "transkribus-job-description": "څرگنداوی",
41 | "transkribus-job-start": "پیل شو",
42 | "transkribus-job-end": "پای ته ورسېد",
43 | "transkribus-job-waited": "پيل ځنډ (دقيقې)"
44 | }
45 |
--------------------------------------------------------------------------------
/assets/styles/app.css:
--------------------------------------------------------------------------------
1 | @import '~bootstrap';
2 | @import '~select2';
3 |
4 | /* Encore can't find '~select2-bootstrap-theme' with @import */
5 | @import '../../node_modules/select2-bootstrap-theme/dist/select2-bootstrap.min.css';
6 |
7 | .page-header {
8 | background-color: #f5f5f5;
9 | margin: 0 0 25px;
10 | padding: 24px 0 18px;
11 | }
12 |
13 | .container {
14 | max-width: 1170px;
15 | }
16 |
17 | .page-header .container {
18 | align-items: center;
19 | display: flex;
20 | /* Width of container + logo size and it's padding */
21 | max-width: calc( 1170px + ( (50px + 20px) * 2) );
22 | width: auto;
23 | }
24 |
25 | .logo {
26 | margin: 10px 20px 25px 0;
27 | }
28 |
29 | body.rtl .logo {
30 | float: right;
31 | margin: 10px 0 25px 20px;
32 | }
33 |
34 | .page-title {
35 | font-weight: bold;
36 | margin-bottom: 0;
37 | }
38 |
39 | .page-subtitle {
40 | font-size: 1em;
41 | }
42 |
43 | .form-heading {
44 | border-bottom: 1px solid #e5e5e5;
45 | font-size: 1.5em;
46 | margin: 25px 0;
47 | }
48 |
49 | fieldset,
50 | .alert {
51 | max-width: 541px;
52 | }
53 |
54 | /* Avoid select2 input from exceeding viewport on smaller screens */
55 | .select2-container {
56 | /* stylelint-disable declaration-no-important */
57 | width: 100% !important;
58 | }
59 |
60 | .radio:first-of-type {
61 | margin-top: 0;
62 | }
63 |
64 | .engine-options {
65 | margin-top: 30px;
66 | }
67 |
68 | .engine-help {
69 | margin-top: 10px;
70 | }
71 |
72 | .submit-btn {
73 | margin-top: 40px;
74 | }
75 |
76 | .output-buttons {
77 | text-align: right;
78 | margin-bottom: 10px;
79 | }
80 |
81 | .nojs .nojs-hide {
82 | display: none;
83 | }
84 |
85 | .loader {
86 | background-color: #f5f5f5;
87 | padding: 12px;
88 | }
89 |
90 | .loader p {
91 | margin: 0;
92 | font-weight: bold;
93 | }
94 |
95 | @keyframes loader {
96 | to {
97 | transform: rotate( 360deg );
98 | }
99 | }
100 |
101 | .glyphicon.glyphicon-refresh {
102 | margin-right: 5px;
103 | animation: loader 1500ms linear infinite;
104 | }
105 |
--------------------------------------------------------------------------------
/i18n/io.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Joao Xavier"
5 | ]
6 | },
7 | "title": "WikimedioOCR",
8 | "subtitle": "Trasskribar texti de imaji",
9 | "form-heading": "Transskribar ul imajo",
10 | "image-url": "URL dil imajo",
11 | "image-url-help": "Adjuntez URL por imajo qua esas che altra servero Wikimedia, exemple: $1",
12 | "image-url-error": "La URL di ula imajo mustas komencar kun {{PLURAL:$1|la sequanta domeno-nomo|un ek la sequanta domeno-nomi}}, e finar kun ula valida sufixo: $2",
13 | "image-alt-text": "L'originala imajo",
14 | "language-code": "Idiomi (fakultativa)",
15 | "engine": "Mashino OCR",
16 | "engine-not-found-warning": "La demandita utensilo '$1' ne uzesis. Vice ol, uzez l'utensilo ''default'', $2.",
17 | "engine-invalid-langs-warning": "La sequanta idiomi esas nevalida, o ne suportata dal utensilo. Pro to, li ignoresis: $1",
18 | "submit": "Transskriptez tota pagino",
19 | "submit-crop": "Transskribo-areo",
20 | "drag-mode-move": "Trananta, l'imajo movesos",
21 | "copy-to-clipboard": "Kopiez a ''clipboard''",
22 | "copied-to-clipboard": "Kopiita!",
23 | "google-error": "La servado Google montris eroro: $1",
24 | "image-retrieval-failed": "Faliis la rekupero dil imajo: $1",
25 | "documentation": "Dokumentigo",
26 | "api": "API",
27 | "api-tooltip": "Videz la dokumentigo dil API",
28 | "version": "Versiono $1",
29 | "report-issue": "Informez problemo",
30 | "langs-param-error": "La sequanta {{PLURAL:$1|linguo|lingui}} ne agnoskesas dal OCR-softwaro*: $2",
31 | "tesseract-options": "Tesseract-opcioni",
32 | "tesseract-psm-label": "Metodo por sementigo di pagini",
33 | "tesseract-psm-0": "Orientation and script detection (OSD) only.",
34 | "tesseract-psm-1": "Automatic page segmentation with OSD.",
35 | "tesseract-psm-2": "Automatic page segmentation, but no OSD, or OCR. (not implemented)",
36 | "tesseract-psm-3": "Fully automatic page segmentation, but no OSD. (Default)",
37 | "tesseract-psm-4": "Supozar singla kolumno di texto kun varianta grandesi.",
38 | "tesseract-psm-7": "Traktez l'imajo kom singla lineo di texto.",
39 | "tesseract-psm-8": "Traktez l'imajo kom singla vorto.",
40 | "tesseract-psm-9": "Traktez l'imajo kom singla vorto en cirklo.",
41 | "tesseract-psm-10": "Traktez l'imajo kom singla karaktero."
42 | }
43 |
--------------------------------------------------------------------------------
/src/Engine/Image.php:
--------------------------------------------------------------------------------
1 | imageUrl = $imageUrl;
31 | $this->crop = $crop;
32 | }
33 |
34 | /**
35 | * @return string
36 | */
37 | public function getUrl(): string {
38 | return $this->imageUrl;
39 | }
40 |
41 | public function needsCropping(): bool {
42 | return isset( $this->crop['width'] ) && $this->crop['width'] > 0
43 | && isset( $this->crop['height'] ) && $this->crop['height'] > 0;
44 | }
45 |
46 | /**
47 | * @return Crop
48 | */
49 | public function getCrop(): Crop {
50 | return new Crop(
51 | new Point( $this->crop['x'], $this->crop['y'] ),
52 | new Box( $this->crop['width'], $this->crop['height'] )
53 | );
54 | }
55 |
56 | public function hasData(): bool {
57 | return $this->data !== null;
58 | }
59 |
60 | /**
61 | * @return string
62 | */
63 | public function getData(): string {
64 | if ( $this->data === null ) {
65 | throw new LogicException( 'Image::setData() must be called before getData()' );
66 | }
67 | return $this->data;
68 | }
69 |
70 | /**
71 | * @param string $data
72 | */
73 | public function setData( string $data ): void {
74 | $this->data = $data;
75 | }
76 |
77 | /**
78 | * Get the image data size in bytes.
79 | * @return int
80 | */
81 | public function getSize(): int {
82 | if ( $this->data === null ) {
83 | throw new LogicException( 'Image::setData() must be called before getSize()' );
84 | }
85 | return $this->size ?? strlen( $this->data );
86 | }
87 |
88 | /**
89 | * @param int $size
90 | */
91 | public function setSize( int $size ): void {
92 | $this->size = $size;
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/src/Twig/AppExtension.php:
--------------------------------------------------------------------------------
1 | tesseractEngine = $tesseractEngine;
26 | $this->transkribusEngine = $transkribusEngine;
27 | }
28 |
29 | /**
30 | * Registry of custom TwigFunctions.
31 | * @return TwigFunction[]
32 | */
33 | public function getFunctions(): array {
34 | return [
35 | new TwigFunction( 'ocr_lang_name', [ $this, 'getOcrLangName' ] ),
36 | new TwigFunction( 'line_id_name', [ $this, 'getLineIdName' ] ),
37 | ];
38 | }
39 |
40 | /**
41 | * Registry of custom TwigFilters.
42 | * @return TwigFilter[]
43 | */
44 | public function getFilters(): array {
45 | return [
46 | new TwigFilter( 'textarea_rows', [ $this, 'getTextareaRows' ] ),
47 | ];
48 | }
49 |
50 | /**
51 | * Get the number of rows a textarea should be based on the size of the given text.
52 | * @param string $text
53 | * @return int
54 | */
55 | public function getTextareaRows( string $text ): int {
56 | return max( 10, substr_count( $text, "\n" ) );
57 | }
58 |
59 | /**
60 | * Get the name of the given language. This adds a few translations that don't exist in Intuition.
61 | * @param string|null $lang
62 | * @return string
63 | */
64 | public function getOcrLangName( ?string $lang = null ): string {
65 | return $this->tesseractEngine->getModelTitle( $lang );
66 | }
67 |
68 | /**
69 | * Get the name of the given line detection model ID.
70 | * @param string|null $lineIdLang
71 | * @return string
72 | */
73 | public function getLineIdName( ?string $lineIdLang = null ): string {
74 | return $this->transkribusEngine->getLineIdModelName( $lineIdLang );
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/check_tesseract.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -euo pipefail
4 |
5 | # Note, this assumes that the output of `tesseract --version` will remain consistent.
6 | MIN_TESSERACT_VERSION="tesseract 4"
7 |
8 | if [ -n "${DISABLE_TESSERACT_CHECK+placeholder}" ]; then
9 | echo "DISABLE_TESSERACT_CHECK is set, skipping tesseract check."
10 | exit 0
11 | fi
12 |
13 | echo "Checking tesseract installation"
14 |
15 | if ! type tesseract &> /dev/null; then
16 | echo "Tesseract not found!"
17 | exit 1
18 | else
19 | echo "Tesseract executable OK"
20 | fi
21 |
22 | # Similar to what tesseract-ocr-for-php does
23 | CUR_TESSERACT_VERSION=$(tesseract --version | head -n1 | sed "s/tesseract v/tesseract /")
24 | CUR_MIN_VERSION=$( echo -e "$MIN_TESSERACT_VERSION\n$CUR_TESSERACT_VERSION" | sort -V | head -n1 )
25 | if [ "$CUR_MIN_VERSION" != "$MIN_TESSERACT_VERSION" ]; then
26 | echo "Tesseract version mismatch: current is ${CUR_TESSERACT_VERSION}, minimum required is ${MIN_TESSERACT_VERSION}"
27 | exit 1
28 | else
29 | echo "Tesseract version OK"
30 | fi
31 |
32 | # For the future, we might make languages optional; we'd probably have to cache the result of `tesseract --list-langs`.
33 |
34 | if type jq &> /dev/null; then
35 | # Sort both just in case, and remove duplicates from the expected list to account for google having more variants that
36 | # map to the same code in tesseract (e.g. zh and zh-hans)
37 | # Skip deu_latf as it's not insalled by default yet (but will be in the future).
38 | AVAILABLE_LANGS=$(tesseract --list-langs | tail -n +2 | sort)
39 | EXPECTED_LANGS=$(jq -r '.tesseract | keys | to_entries[] | .value' public/models.json | sort -u | sed "/^deu_latf$/d" )
40 |
41 | EXTRA_LOCAL_LANGS=$( comm -23 <( echo "$AVAILABLE_LANGS" ) <( echo "$EXPECTED_LANGS" ) )
42 | MISSING_LOCAL_LANGS=$( comm -13 <( echo "$AVAILABLE_LANGS" ) <( echo "$EXPECTED_LANGS" ) )
43 |
44 | if [ -z "$MISSING_LOCAL_LANGS" ]; then
45 | echo "All expected languages are installed"
46 | else
47 | echo -e "The following required languages are not installed:\n$MISSING_LOCAL_LANGS"
48 | exit 1
49 | fi
50 | if [ -n "$EXTRA_LOCAL_LANGS" ]; then
51 | echo -e "The following languages are installed but not supported:\n$EXTRA_LOCAL_LANGS"
52 | fi
53 | else
54 | echo "jq is not installed, skipping validation of available languages"
55 | fi
56 |
57 | echo "All checks passed!"
58 |
--------------------------------------------------------------------------------
/i18n/te.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Chaduvari"
5 | ]
6 | },
7 | "title": "WikimediaOCR",
8 | "subtitle": "బొమ్మల నుండి పాఠ్యాన్ని ఎత్తిరాయండి",
9 | "form-heading": "ఓ బొమ్మను ఎత్తిరాయండి",
10 | "image-url": "బొమ్మ URL",
11 | "image-url-help": "$1 లాంటి ఏదైనా వికీమీడియా సర్వరులో ఉన్న బొమ్మ URL ఇవ్వండి:",
12 | "image-url-error": "బొమ్మ URL తప్పనిసరిగా {{PLURAL:$1|కింది డొమెయిను పేరుతో|కింది డొమెయిను పేర్లలో ఏదో ఒకదానితో}} మొదలై, ఒక సరైన ఫైలు ఎక్స్టెన్షనుతో ముగియాలి: $2",
13 | "image-alt-text": "ఒరిజినలు బొమ్మ",
14 | "language-code": "భాషలు (ఐచ్ఛికం)",
15 | "engine": "OCR ఇంజను",
16 | "engine-name-google": "Google Cloud Vision OCR",
17 | "engine-name-tesseract": "Tesseract OCR",
18 | "engine-not-found-warning": "అడిగిన ఇంజను '$1' కనబడలేదు. దాని బదులు డిఫాల్టు ఇంజను '$2' ను వాడుతున్నాం.",
19 | "engine-invalid-langs-warning": "కింది భాషలు చెల్లనివి, లేదా ఇంజను వాటికి మద్దతు ఇవ్వడం లేదు. అంచేత వాటిని పక్కన పెట్టాం: $1",
20 | "submit": "మొత్తం పేజీ నంతటినీ ఎత్తిరాయి",
21 | "submit-crop": "ఈ ప్రాంతాన్ని ఎత్తిరాయి",
22 | "drag-help": "క్రాప్ పరికరాన్ని ఎంచుకుని, కొంత ప్రాంతాన్ని మాత్రమే ఎత్తిరాయాలంటే, కింద ఉన్న బొమ్మపై ఒక దీర్ఘ చతురస్ర రూపాన్ని లాగండి.",
23 | "drag-mode-move": "లాగితే బొమ్మ కదులుతుంది",
24 | "drag-mode-move-alt": "'కదిలించు' చర్యను సూచించే ఐకను",
25 | "drag-mode-crop": "లాగితే కొత్త క్రాప్ ప్రాంతాన్ని సృష్టిస్తుంది",
26 | "drag-mode-crop-alt": "'క్రాప్' చర్యను సూచించే ఐకను",
27 | "copy-to-clipboard": "క్లిప్బోర్డుకు కాపీ చెయ్యి",
28 | "copied-to-clipboard": "కాపీ చేసాం!",
29 | "google-error": "గూగుల్ సేవ ఓ లోపాన్ని చూపించింది: $1",
30 | "image-retrieval-failed": "బొమ్మను తేవడం విఫలమైంది: $1",
31 | "documentation": "డాక్యుమెంటేషను",
32 | "api": "API",
33 | "api-tooltip": "API డాక్యుమెంటేషన్ను చూపించు",
34 | "version": "వెర్షను $1",
35 | "report-issue": "సమస్యను నివేదించండి",
36 | "langs-placeholder": "భాషను ఆటోమాటిగ్గా ఎంచుకునేందుకు ఖాళీగా వదిలెయ్యండి.",
37 | "langs-param-error": "కింది {{PLURAL:$1|భాషకు|భాషలకు}} OCR ఇంజను మద్దతు ఇవ్వదు: $2",
38 | "tesseract-options": "Tesseract వికల్పాలు",
39 | "tesseract-psm-7": "బొమ్మను ఒకే పాఠ్యపు పంక్తిగా భావించు.",
40 | "tesseract-psm-8": "బొమ్మను ఒకే పదంగా భావించు.",
41 | "tesseract-psm-9": "బొమ్మను ఒక వృత్తంలో ఉన్న ఒకే పదంగా భావించు.",
42 | "tesseract-psm-10": "బొమ్మను ఒకే కారెక్టరుగా భావించు.",
43 | "tesseract-internal-error": "టెస్సరాక్ట్ ఇంజను ఏదో అంతర్గత లోపాన్ని చూపించింది.",
44 | "transkribus-no-lang-error": "భాష దేన్నీ ఎంచుకోలేదు",
45 | "transkribus-multiple-lang-error": "బహుళ భాషలకు అనుమతి లేదు, ఒకటే భాషను ఇవ్వండి"
46 | }
47 |
--------------------------------------------------------------------------------
/i18n/az.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Nemoralis",
5 | "Toghrul Rahimli",
6 | "Şeyx Şamil",
7 | "Əkrəm",
8 | "Əkrəm Cəfər"
9 | ]
10 | },
11 | "title": "WikimediaOCR",
12 | "subtitle": "Şəkillərdən mətni transkripsiya edin",
13 | "form-heading": "Şəkli transkripsiya edin",
14 | "image-url": "Şəkil URL",
15 | "image-url-help": "Vikimedia serverində yerləşdirilən şəkil URL-ni daxil edin, məsələn: $1",
16 | "image-url-error": "Şəkil URL-i {{PLURAL:$1|aşağıdakı domen adı|aşağıdakı domen adlarından biri}} ilə başlamalıdır və etibarlı fayl uzantısı ilə bitməlidir: $2",
17 | "image-alt-text": "Orijinal şəkil",
18 | "language-code": "Dillər (qeyri-məcburi)",
19 | "engine": "OCR motoru",
20 | "engine-name-transkribus": "Transkribus OCR",
21 | "engine-not-found-warning": "Tələb olunan \"$1\" mühərriki tapılmadı. Əvəzində defolt mühərrik olan \"$2\" istifadə edin.",
22 | "engine-invalid-langs-warning": "Aşağıdakı dillər etibarsızdır və ya mühərrik tərəfindən dəstəklənmir və nəzərə alınmayıb: $1",
23 | "submit": "Bütün səhifəni transkripsiya et",
24 | "submit-crop": "Transkripsiya sahəsi",
25 | "drag-help": "Kəsmə alətini seçin və səhifənin yalnız bir sahəsini transkripsiya etmək üçün aşağıdakı şəkildə düzbucaqlı çəkin.",
26 | "drag-mode-move": "Sürüşdürdükdə şəkil hərəkət edəcək",
27 | "drag-mode-move-alt": "\"Yerini dəyiş\" hərəkətini təmsil edən ikona.",
28 | "drag-mode-crop": "Sürüşdürmək yeni kəsim sahəsi yaradacaq",
29 | "drag-mode-crop-alt": "\"Kəsmə\" hərəkətini təmsil edən ikona.",
30 | "copy-to-clipboard": "Mübadilə buferinə kopiyala",
31 | "copied-to-clipboard": "Kopyalandı!",
32 | "google-error": "Google xidməti xəta verdi: $1",
33 | "image-retrieval-failed": "Şəklin alınması uğursuz oldu: $1",
34 | "documentation": "Sənədləşdirmə",
35 | "api-tooltip": "API sənədləşdirməsinə bax",
36 | "version": "$1 versiyası",
37 | "report-issue": "Bir problem bildir",
38 | "langs-placeholder": "Avtomatik dil aşkarlanması üçün boş buraxın.",
39 | "langs-param-error": "Aşağıdakı {{PLURAL:$1|dil|dillər}} OCR mühərriki tərəfindən dəstəklənmir: $2",
40 | "loading-message": "Transkripsiya həyata keçirilir...",
41 | "tesseract-options": "Tesseract seçimləri",
42 | "tesseract-psm-label": "Səhifə bölmə metodları",
43 | "tesseract-psm-help": "Daha yaxşı çox sütunlu dəstək üçün \"Seyrək mətn\"i sınayın.",
44 | "tesseract-psm-0": "Yalnız oriyentasiya və skript aşkarlanması (OSD).",
45 | "tesseract-psm-1": "OSD ilə avtomatik səhifə bölünməsi.",
46 | "tesseract-psm-2": "Avtomatik səhifə bölünməsi, lakin OSD və ya OCR yoxdur. (həyata keçirilmir)"
47 | }
48 |
--------------------------------------------------------------------------------
/i18n/lt.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Nokeoo"
5 | ]
6 | },
7 | "subtitle": "Transkribuoti tekstą iš paveikslėlių",
8 | "form-heading": "Transkribuoti paveikslėlį",
9 | "image-url": "Paveikslėlio URL",
10 | "image-url-help": "Įterpkite paveikslėlio URL, kuris talpinamas Vikimedija serveryje, pvz.: $1",
11 | "image-url-error": "Paveikslėlio URL turi prasidėti su {{PLURAL:$1|šiuo domeno pavadinimu|su vienu iš šių domeno pavadinimų}} ir baigtis su galimu failo plėtiniu: $2",
12 | "image-alt-text": "Originalus paveikslėlis",
13 | "language-code": "Kalbos (nebūtina)",
14 | "engine": "OCR variklis",
15 | "engine-not-found-warning": "Prašomas variklis '$1' nerastas. Vietoj to, naudojamas numatytasis variklis '$2'.",
16 | "engine-invalid-langs-warning": "Šios kalbos negalimos arba nepalaikomos variklio ir todėl buvo ignoruojamos: $1",
17 | "submit": "Transkribuoti visą puslapį",
18 | "submit-crop": "Transkribuoti plotą",
19 | "drag-help": "Pasirinkite apkarpymo įrankį ir nupieškite stačiakampį paveikslėlyje žemiau, kad transkribuotumėte tik puslapio plotą.",
20 | "drag-mode-move": "Tempimas perkels paveikslėlį",
21 | "drag-mode-move-alt": "Ikona, nurodanti perkėlimo veiksmą.",
22 | "drag-mode-crop": "Tempimas sukurs nauja iškarpos plotą",
23 | "drag-mode-crop-alt": "Ikona, nurodanti kirpimo veiksmą.",
24 | "copy-to-clipboard": "Kopijuoti į iškarpinę",
25 | "copied-to-clipboard": "Nukopijuota!",
26 | "google-error": "Google paslauga grąžino klaidą: $1",
27 | "image-retrieval-failed": "Paveikslėlio gavimas nepavyko: $1",
28 | "documentation": "Dokumentacija",
29 | "api-tooltip": "Žiūrėti API dokumentaciją",
30 | "version": "Versija $1",
31 | "report-issue": "Pranešti apie problemą",
32 | "langs-placeholder": "Palikite tuščia, kad kalba būtų nustatyta automatiškai.",
33 | "langs-param-error": "{{PLURAL:$1|Ši kalba nepalaikoma|Šios kalbos nepalaikomos}} OCR variklio: $2",
34 | "tesseract-psm-7": "Laikyti paveikslėlį vienos eilutės tekstu.",
35 | "tesseract-psm-8": "Laikyti paveikslėlį vienu žodžiu.",
36 | "tesseract-psm-9": "Laikyti paveikslėlį vienu žodžiu apskritime.",
37 | "tesseract-psm-10": "Laikyti paveikslėlį vienu simboliu.",
38 | "transkribus-language-code": "Kalbos modelis",
39 | "transkribus-unauthorized-error": "Klaidos kodas '$1' :: prašymas neleistinas",
40 | "transkribus-default-error": "Klaidos kodas '$1' :: nepavyko užbaigti prašymo, bandykite dar kartą!",
41 | "transkribus-no-lang-error": "Nepasirinkta jokia kalba",
42 | "transkribus-multiple-lang-error": "Kelios kalbos neleidžiamos, nurodykite vieną kalbą",
43 | "transkribus-line-id-none-option": "Nėra"
44 | }
45 |
--------------------------------------------------------------------------------
/config/services.yaml:
--------------------------------------------------------------------------------
1 | # This file is the entry point to configure your own services.
2 | # Files in the packages/ subdirectory configure your dependencies.
3 |
4 | # Put parameters here that don't need to change on each machine where the app is deployed
5 | # https://symfony.com/doc/current/best_practices/configuration.html#application-related-configuration
6 | parameters:
7 | cache_ttl: '%env(APP_CACHE_TTL)%'
8 |
9 | services:
10 | # default configuration for services in *this* file
11 | _defaults:
12 | autowire: true # Automatically injects dependencies in your services.
13 | autoconfigure: true # Automatically registers your services as commands, event subscribers, etc.
14 |
15 | # makes classes in src/ available to be used as services
16 | # this creates a service per class whose id is the fully-qualified class name
17 | App\:
18 | resource: '../src/'
19 | exclude:
20 | - '../src/DependencyInjection/'
21 | - '../src/Kernel.php'
22 | - '../src/Tests/'
23 |
24 | # controllers are imported separately to make sure services can be injected
25 | # as action arguments even if you don't extend any base controller class
26 | App\Controller\:
27 | resource: '../src/Controller/'
28 | tags: ['controller.service_arguments']
29 |
30 | # https://symfony.com/doc/current/service_container/parent_services.html
31 | App\Engine\EngineBase:
32 | arguments:
33 | $projectDir: '%kernel.project_dir%'
34 | calls:
35 | - setImageHosts: [ '%env(APP_IMAGE_HOSTS)%' ]
36 |
37 | App\Engine\TesseractEngine:
38 | parent: App\Engine\EngineBase
39 |
40 | App\Engine\GoogleCloudVisionEngine:
41 | parent: App\Engine\EngineBase
42 | arguments:
43 | $keyFile: '%env(APP_GOOGLE_KEYFILE)%'
44 |
45 | App\Engine\TranskribusEngine:
46 | parent: App\Engine\EngineBase
47 |
48 | App\Engine\TranskribusClient:
49 | arguments:
50 | $username: '%env(APP_TRANSKRIBUS_USERNAME)%'
51 | $password: '%env(APP_TRANSKRIBUS_PASSWORD)%'
52 |
53 | App\EventListener\ExceptionListener:
54 | arguments:
55 | - '@request_stack'
56 | - '@twig'
57 | - '@Krinkle\Intuition\Intuition'
58 | - '@monolog.logger.tesseract'
59 | tags:
60 | - { name: kernel.event_listener, event: kernel.exception }
61 |
62 | # Vendor services for autowiring
63 | thiagoalessio\TesseractOCR\TesseractOCR:
64 |
65 | # please note that last definitions always *replace* previous ones
66 | # add more service definitions when explicit configuration is needed
67 |
--------------------------------------------------------------------------------
/webpack.config.js:
--------------------------------------------------------------------------------
1 | const Encore = require('@symfony/webpack-encore');
2 |
3 | // Manually configure the runtime environment if not already configured yet by the "encore" command.
4 | // It's useful when you use tools that rely on webpack.config.js file.
5 | if (!Encore.isRuntimeEnvironmentConfigured()) {
6 | Encore.configureRuntimeEnvironment(process.env.NODE_ENV || 'dev');
7 | }
8 |
9 | Encore
10 | // directory where compiled assets will be stored
11 | .setOutputPath('public/build/')
12 | // public path used by the web server to access the output path
13 | .setPublicPath('/build')
14 | // only needed for CDN's or sub-directory deploy
15 | //.setManifestKeyPrefix('build/')
16 |
17 | .copyFiles({
18 | from: './assets/images',
19 | to: 'images/[path][name].[ext]'
20 | })
21 |
22 | /*
23 | * ENTRY CONFIG
24 | *
25 | * Each entry will result in one JavaScript file (e.g. app.js)
26 | * and one CSS file (e.g. app.css) if your JavaScript imports CSS.
27 | */
28 | .addEntry('app', './assets/app.js')
29 |
30 | // When enabled, Webpack "splits" your files into smaller pieces for greater optimization.
31 | .splitEntryChunks()
32 |
33 | // will require an extra script tag for runtime.js
34 | // but, you probably want this, unless you're building a single-page app
35 | .enableSingleRuntimeChunk()
36 |
37 | /*
38 | * FEATURE CONFIG
39 | *
40 | * Enable & configure other features below. For a full
41 | * list of features, see:
42 | * https://symfony.com/doc/current/frontend.html#adding-more-features
43 | */
44 | .cleanupOutputBeforeBuild()
45 | .enableBuildNotifications()
46 | .enableSourceMaps(!Encore.isProduction())
47 | // enables hashed filenames (e.g. app.abc123.css)
48 | .enableVersioning(Encore.isProduction())
49 |
50 | .configureBabel((config) => {
51 | config.plugins.push('@babel/plugin-proposal-class-properties');
52 | })
53 |
54 | // enables @babel/preset-env polyfills
55 | .configureBabelPresetEnv((config) => {
56 | config.useBuiltIns = 'usage';
57 | config.corejs = 3;
58 | })
59 |
60 | // enables Sass/SCSS support
61 | //.enableSassLoader()
62 |
63 | // uncomment if you use TypeScript
64 | //.enableTypeScriptLoader()
65 |
66 | // uncomment if you use React
67 | //.enableReactPreset()
68 |
69 | // uncomment to get integrity="..." attributes on your script & link tags
70 | // requires WebpackEncoreBundle 1.4 or higher
71 | //.enableIntegrityHashes(Encore.isProduction())
72 |
73 | // uncomment if you're having problems with a jQuery plugin
74 | //.autoProvidejQuery()
75 | ;
76 |
77 | module.exports = Encore.getWebpackConfig();
78 |
--------------------------------------------------------------------------------
/tests/Controller/OcrControllerTest.php:
--------------------------------------------------------------------------------
1 | push( $request );
35 | $request->setSession( new Session( new MockArraySessionStorage() ) );
36 | $intuition = new Intuition( [] );
37 | $gcv = new GoogleCloudVisionEngine(
38 | dirname( __DIR__ ) . '/fixtures/google-account-keyfile.json',
39 | $intuition,
40 | $this->projectDir,
41 | new MockHttpClient()
42 | );
43 | $controller = new OcrController(
44 | $requestStack,
45 | $intuition,
46 | new EngineFactory(
47 | $gcv,
48 | new TesseractEngine( new MockHttpClient(), $intuition, $this->projectDir, new TesseractOCR() ),
49 | new TranskribusEngine(
50 | new TranskribusClient(
51 | getenv( 'APP_TRANSKRIBUS_USERNAME' ),
52 | getenv( 'APP_TRANSKRIBUS_PASSWORD' ),
53 | new MockHttpClient(),
54 | new NullAdapter(),
55 | new NullAdapter()
56 | ),
57 | $intuition,
58 | $this->projectDir,
59 | new MockHttpClient()
60 | ),
61 | ),
62 | new FilesystemAdapter()
63 | );
64 | $this->assertSame( $expectedLangs, $controller->getLangs( $request ) );
65 | }
66 |
67 | /**
68 | * @return mixed[]
69 | */
70 | public function provideGetLang(): array {
71 | return [
72 | [
73 | [ 'lang' => 'ar' ],
74 | [ 'ar' ],
75 | ],
76 | [
77 | [ 'langs' => [ 'a|b', 'c!', 'ab' ] ],
78 | [ 'ab', 'c' ],
79 | ],
80 | 'special characters' => [
81 | [ 'langs' => [ 'sr-Latn', 'Canadian_Aboriginal' ] ],
82 | [ 'sr-Latn', 'Canadian_Aboriginal' ],
83 | ],
84 | 'numbers' => [
85 | [ 'langs' => [ 'ru-petr1708' ] ],
86 | [ 'ru-petr1708' ],
87 | ],
88 | ];
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/i18n/cs.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Georg101"
5 | ]
6 | },
7 | "title": "Wikimédia OCR",
8 | "subtitle": "Přepis textu z obrázků",
9 | "form-heading": "Přepsat obrázek",
10 | "image-url": "Adresa URL obrázku",
11 | "image-url-help": "Vložte adresu URL obrázku na server Wikimedia, například: $1",
12 | "image-url-error": "URL obrázku musí začínat na {{PLURAL:$1| nasledující název domény | jeden z následujících názvů domén}} a končit platnou príponou souboru: $2",
13 | "image-alt-text": "Původní obrázek",
14 | "language-code": "Jazyky (volitelné)",
15 | "engine": "engine OCR",
16 | "engine-not-found-warning": "Požadovaný modul „ $1 “ nebyl nalezen. Místo toho se používá výchozí modul „ $2",
17 | "engine-invalid-langs-warning": "Následující jazyky jsou neplatné nebo nepodporované a byly ignorované: $1",
18 | "submit": "Přepsat",
19 | "copy-to-clipboard": "Kopírovat do schránky",
20 | "copied-to-clipboard": "Zkopírováno!",
21 | "google-error": "Služba Google vrátila chybu: $1",
22 | "image-retrieval-failed": "Načítání obrázku selhalo: $1",
23 | "documentation": "Dokumentace",
24 | "version": "Verze $1",
25 | "report-issue": "Nahlásit problém",
26 | "langs-placeholder": "Pro automatickou detekci jazyka nechejte pole prázdné.",
27 | "langs-param-error": "{{PLURAL:$1|Nasledující jazyk není je podporovaný|Nasledující jazyky nejsou podporované}} modulem OCR: $2",
28 | "tesseract-options": "Možnosti Tesseractu",
29 | "tesseract-psm-label": "Metoda segmentace stránek",
30 | "tesseract-psm-help": "Vyzkoušejte „Řídký text“ pro lepší podporu vícero sloupců.",
31 | "tesseract-psm-0": "Jenom orientace a detekce skriptu (OSD).",
32 | "tesseract-psm-1": "Automatická segmentace stránek pomocí OSD.",
33 | "tesseract-psm-2": "Automatická segmentace stránek, ale bez OSD nebo OCR. (není implementováno)",
34 | "tesseract-psm-3": "Plně automatická segmentace stránek, ale bez OSD. (Výchozí)",
35 | "tesseract-psm-4": "Předpokládaný jeden sloupec textu s proměnlivou velikostí.",
36 | "tesseract-psm-5": "Předpokládaný jeden jednotný blok vertikálně zarovnaného textu.",
37 | "tesseract-psm-6": "Předpokládaný jeden jednotný blok textu.",
38 | "tesseract-psm-7": "Zacházet s obrázkem jako s jedním řádkem textu.",
39 | "tesseract-psm-8": "Zacházet s obrázkem jako s jedním slovem.",
40 | "tesseract-psm-9": "Zacházet s obrázkem jako s jedním slovem v kruhu.",
41 | "tesseract-psm-10": "Zacházet s obrázkem jako s jedním znakem.",
42 | "tesseract-psm-11": "Řídký text. Najděte co nejvíce textu v rozházeném pořadí.",
43 | "tesseract-psm-12": "Řídký text s OSD.",
44 | "tesseract-psm-13": "Syrový řádek. Zacházet s obrázkem jako s jedním řádkem textu a obcházet hacky, které jsou specifické pro Tesseract.",
45 | "tesseract-param-error": "Tesseract nepodporuje možnost ' $1 ' s hodnotou $2. Maximální hodnota: $3",
46 | "tesseract-internal-error": "Engine tesseractu vrátil interní chybu."
47 | }
48 |
--------------------------------------------------------------------------------
/i18n/pl.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Ankry",
5 | "Chrumps",
6 | "Darellur",
7 | "Frozengeist",
8 | "Strebski",
9 | "WaldiSt",
10 | "Woytecr"
11 | ]
12 | },
13 | "title": "WikimediaOCR",
14 | "subtitle": "Narzędzie do transkrypcji tekstu z skanowanych obrazów na Wikimedia Commons, do użytku na Wikisource i w innych miejscach.",
15 | "form-heading": "Wyodrębnij tekst",
16 | "image-url": "URL obrazu",
17 | "image-url-help": "Poda1 adres URL obrazka na serwerze Wikimedia, takim jak $1",
18 | "image-alt-text": "Obrazek źródłowy",
19 | "language-code": "Języki (opcjonalnie)",
20 | "engine": "Silnik OCR",
21 | "engine-name-transkribus": "Transkribus OCR",
22 | "engine-not-found-warning": "Żądany silnik '$1' jest niedostępny. Będzie użyty silnik '$2'.",
23 | "engine-invalid-langs-warning": "Następujące języki są nieprawidłowe lub niewspierane i zostaną zignorowane: $1",
24 | "submit": "Wyodrębnij tekst z całej strony",
25 | "submit-crop": "Wyodrębnij z obszaru",
26 | "drag-help": "Aktywuj narzędzie przycinania i zaznacz na poniższym obrazku obszar, z którego będzie wyodrębniony tekst.",
27 | "drag-mode-move": "Przeciąganie przesunie obraz",
28 | "drag-mode-move-alt": "Ikona reprezentująca działanie „przesuń”.",
29 | "drag-mode-crop": "Przeciąganie utworzy nowy obszar przycinania",
30 | "drag-mode-crop-alt": "Ikona reprezentująca działanie „przytnij”.",
31 | "copy-to-clipboard": "Kopiuj do schowka",
32 | "copied-to-clipboard": "Skopiowano!",
33 | "google-error": "Usługa Google zgłosiła błąd: $1",
34 | "image-retrieval-failed": "Nie udało się pobrać obrazka: $1",
35 | "documentation": "Dokumentacja",
36 | "api-tooltip": "Dokumentacja API",
37 | "version": "Wersja $1",
38 | "report-issue": "Zgłoś problem",
39 | "langs-placeholder": "Pozostaw puste aby automatycznie rozpoznać język.",
40 | "loading-message": "Wykonywanie transkrypcji...",
41 | "tesseract-options": "Opcje tesseract",
42 | "tesseract-psm-label": "Metoda segmentacji stron",
43 | "tesseract-psm-1": "Automatyczna segmentacja stron z OSD.",
44 | "tesseract-psm-6": "Załóż jeden jednolity blok tekstu.",
45 | "tesseract-psm-7": "Traktuj obraz jako jedną linię tekstu.",
46 | "tesseract-psm-8": "Traktuj obraz jako jedno słowo.",
47 | "tesseract-psm-9": "Traktuj obraz jako jedno słowo w okręgu.",
48 | "tesseract-psm-10": "Traktuj obraz jako jeden znak.",
49 | "tesseract-internal-error": "Silnik tesseract zgłosił błąd wewnętrzny.",
50 | "transkribus-language-code": "Model Języka",
51 | "transkribus-unauthorized-error": "Kod błędu '$1' :: Żądanie nie jest autoryzowane",
52 | "transkribus-default-error": "Kod błędu '$1' :: Nie można zrealizować żądania, spróbuj ponownie!",
53 | "transkribus-browse-public-models": "Przeglądaj wszystkie modele języka publicznego dla Transkribus",
54 | "transkribus-request-for-model": "Złóż wniosek o dodanie modelu z Transkribus do narzędzia OCR",
55 | "transkribus-line-id-none-option": "Żaden",
56 | "transkribus-job-state": "Stan",
57 | "transkribus-job-description": "Opis",
58 | "transkribus-job-start": "Rozpoczęto",
59 | "transkribus-job-end": "Zakończono",
60 | "transkribus-job-waited": "Opóźnienie startu (minuty)"
61 | }
62 |
--------------------------------------------------------------------------------
/i18n/ru.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "DDPAT",
5 | "Ice bulldog",
6 | "Kareyac",
7 | "Okras",
8 | "Pplex.vhs",
9 | "Smigles",
10 | "Thothsum"
11 | ]
12 | },
13 | "title": "ВикимедиаOCR",
14 | "subtitle": "Инструмент для распознавания текста из отсканированных изображений с Викисклада для использования в Викитеке и других местах.",
15 | "form-heading": "Расшифровать изображение",
16 | "image-url": "URL изображения",
17 | "image-url-help": "Вставьте URL-адрес изображения, размещенного на сервере Викимедиа, например: $1",
18 | "image-url-error": "URL изображения должно начинаться со {{PLURAL:$1|следующих доменных имён}} и оканчиваться допустимым расширением файла: $2",
19 | "image-alt-text": "Исходное изображение",
20 | "language-code": "Языки (необязательно)",
21 | "engine": "Движок OCR",
22 | "engine-name-transkribus": "Transkribus OCR",
23 | "engine-not-found-warning": "Запрошенный движок '$1' не найден. Вместо этого используется движок по умолчанию — '$2'.",
24 | "engine-invalid-langs-warning": "Следующие языки недействительны или не поддерживаются движком и были проигнорированы: $1",
25 | "submit": "Транскрибировать всю страницу",
26 | "submit-crop": "Распознать область",
27 | "drag-help": "Выберите инструмент обрезки и перетащите прямоугольник на изображение ниже, чтобы распознать только одну область страницы.",
28 | "drag-mode-move": "Перетаскивание переместит изображение.",
29 | "copy-to-clipboard": "Скопировать в буфер обмена",
30 | "copied-to-clipboard": "Скопировано!",
31 | "google-error": "Служба Google вернула ошибку: $1",
32 | "image-retrieval-failed": "Не удалось получить изображение: $1",
33 | "documentation": "Документация",
34 | "api-tooltip": "Просмотреть документацию по API",
35 | "version": "Версия $1",
36 | "report-issue": "Сообщить об ошибке",
37 | "langs-placeholder": "Оставьте поле пустым для автоматического определения языка.",
38 | "langs-param-error": "Следующие {{PLURAL:$1|языки}} не поддерживаются движком ОРТ: $2",
39 | "loading-message": "Выполнение распознавания…",
40 | "tesseract-options": "Настройки Tesseract",
41 | "tesseract-psm-label": "Метод сегментации страницы",
42 | "tesseract-psm-1": "Автоматическая сегментация страниц с экранным меню.",
43 | "tesseract-psm-7": "Рассматривать изображение как одну текстовую строку.",
44 | "tesseract-psm-8": "Рассматривать изображение как одно слово.",
45 | "tesseract-psm-10": "Рассматривать изображение как один символ.",
46 | "tesseract-psm-12": "Разрезанный текст с экранным меню.",
47 | "tesseract-internal-error": "Механизм тессеракта возвратил внутреннюю ошибку.",
48 | "transkribus-language-code": "Языковая модель",
49 | "transkribus-empty-response-error": "Не удалось проанализировать результат из API Transkribus",
50 | "transkribus-no-lang-error": "Язык не выбран",
51 | "transkribus-multiple-lang-error": "Нельзя использовать несколько языков, выберите один язык",
52 | "transkribus-options": "Настройки Transkribus",
53 | "transkribus-job-id": "Идентификатор задачи",
54 | "transkribus-job-state": "Состояние",
55 | "transkribus-job-description": "Описание",
56 | "transkribus-job-start": "Начато",
57 | "transkribus-job-end": "Завершено",
58 | "transkribus-job-waited": "Задержка запуска (в минутах)"
59 | }
60 |
--------------------------------------------------------------------------------
/i18n/vi.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Bapham123",
5 | "Minh Nguyen",
6 | "Nguyễn Mạnh An",
7 | "Vinhtantran"
8 | ]
9 | },
10 | "title": "WikimediaOCR",
11 | "subtitle": "Tách văn bản từ hình",
12 | "form-heading": "Tách văn bản từ hình",
13 | "image-url": "URL hình",
14 | "image-url-help": "Nhập địa chỉ URL của tập tin hình ảnh được lưu trữ trên máy chủ Wikimedia chẳng hạn như: $1",
15 | "image-url-error": "Địa chỉ URL của hình phải bắt đầu bằng {{PLURAL:$1|tên miền sau|một trong các tên miền sau}} và kết thúc bằng một phần mở rộng tập tin hợp lệ: $2",
16 | "image-alt-text": "Hình ảnh gốc",
17 | "language-code": "Các ngôn ngữ (tùy chọn):",
18 | "engine": "Bộ máy OCR",
19 | "engine-not-found-warning": "Không tìm thấy bộ máy ‘$1’ được yêu cầu. Hãy dùng bộ máy mặc định ‘$2’.",
20 | "engine-invalid-langs-warning": "Các ngôn ngữ sau không hợp lệ hoặc bị bỏ qua vì bộ máy không hỗ trợ: $1",
21 | "submit": "Tách văn bản",
22 | "drag-mode-move-alt": "Biểu tượng biểu thị cho hành động 'di chuyển'.",
23 | "drag-mode-crop-alt": "Biểu tượng biểu thị cho hành động 'cắt xén'.",
24 | "copy-to-clipboard": "Chép vào bảng tạm",
25 | "copied-to-clipboard": "Đã sao chép!",
26 | "google-error": "Dịch vụ Google trả về lỗi: $1",
27 | "image-retrieval-failed": "Truy xuất ảnh thất bại: $1",
28 | "documentation": "Tài liệu hướng dẫn",
29 | "api": "API",
30 | "api-tooltip": "Xem tài liệu API",
31 | "version": "Phiên bản $1",
32 | "report-issue": "Báo cáo lỗi",
33 | "langs-placeholder": "Để trống để tự động phát hiện ngôn ngữ.",
34 | "langs-param-error": "{{PLURAL:$1|Ngôn ngữ|Các ngôn ngữ}} sau không được bộ máy OCR hỗ trợ: $2",
35 | "tesseract-options": "Tùy chọn cho Tesseract",
36 | "tesseract-psm-label": "Phương pháp phân đoạn trang",
37 | "tesseract-psm-help": "Hãy thử “Văn bản lẻ tẻ” để được hỗ trợ bố trí nhiều cột chính xác hơn.",
38 | "tesseract-psm-0": "Chỉ Phát hiện hướng viết và kiểu chữ (Orientation and script detection - OSD).",
39 | "tesseract-psm-1": "Phân đoạn trang tự động bằng OSD.",
40 | "tesseract-psm-2": "Phân đoạn trang tự động, nhưng không dùng OSD lẫn OCR. (chưa hiện thực)",
41 | "tesseract-psm-3": "Phân đoạn trang tự động hoàn toàn, nhưng không dùng OSD. (Mặc định)",
42 | "tesseract-psm-4": "Giả định là một cột văn bản với kích thước thay đổi.",
43 | "tesseract-psm-5": "Giả định là một khối đồng nhất với văn bản canh dọc.",
44 | "tesseract-psm-6": "Giả định là một khối văn bản đồng nhất.",
45 | "tesseract-psm-7": "Xem ảnh như một dòng văn bản duy nhất.",
46 | "tesseract-psm-8": "Xem ảnh như một từ duy nhất.",
47 | "tesseract-psm-9": "Xem ảnh như một từ duy nhất trong vòng tròn.",
48 | "tesseract-psm-10": "Xem ảnh như một ký tự duy nhất.",
49 | "tesseract-psm-11": "Văn bản lẻ tẻ. Tìm càng nhiều văn bản càng tốt không cần theo thứ tự cụ thể.",
50 | "tesseract-psm-12": "Dò văn bản bằng OSD.",
51 | "tesseract-psm-13": "Dòng thô. Xem ảnh như một dòng văn bản duy nhất, bỏ qua những mẹo đặc thù của Tesseract.",
52 | "tesseract-param-error": "Tùy chọn ‘$1’ với giá trị ‘$2’ không được Tesseract hỗ trợ. Giá trị tối đa: $3",
53 | "tesseract-internal-error": "Bộ máy tesseract trả về lỗi nội bộ.",
54 | "transkribus-no-lang-error": "Không có ngôn ngữ nào được chọn",
55 | "transkribus-line-id-none-option": "Không có"
56 | }
57 |
--------------------------------------------------------------------------------
/i18n/sv.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "DraconicDark",
5 | "Sabelöga",
6 | "WikiPhoenix"
7 | ]
8 | },
9 | "title": "Wikimedia OCR",
10 | "subtitle": "Transkribera text från bilder",
11 | "form-heading": "Transkribera en bild",
12 | "image-url": "Bild-URL",
13 | "image-url-help": "Ange en bild-URL som finns på en Wikimedia som exempelvis: $1",
14 | "image-url-error": "Bild-URL måste börja med {{PLURAL:$1|följande domännamn|en av följande domännamn}} och avslutas med ett giltigt filtillägg: $2",
15 | "image-alt-text": "Originalbilden",
16 | "language-code": "Språk (valfri)",
17 | "engine": "OCR-motor",
18 | "engine-not-found-warning": "Den begärda motorn '$1' hittades inte. Använd standardmotorn '$2' i stället.",
19 | "engine-invalid-langs-warning": "Följande språk är ogiltiga eller så stöds dem inte av motorn och ignorerades: $1",
20 | "submit": "Transkribera hela sidan",
21 | "submit-crop": "Transkribera område",
22 | "drag-help": "Välj klippningsverktyget och rita en rektangel i bilden nedan för att bara transkribera en bit av sidan.",
23 | "drag-mode-move": "Drar du på bilden flyttas den",
24 | "drag-mode-move-alt": "Ikonen för åtgärden \"flytta\".",
25 | "drag-mode-crop": "Genom att dra kommer ett nytt klippningsområde skapas",
26 | "drag-mode-crop-alt": "Ikonen för åtgärden \"klipp\".",
27 | "copy-to-clipboard": "Kopiera till urklipp",
28 | "copied-to-clipboard": "Kopierades!",
29 | "google-error": "Google-tjänsten returnerade ett fel: $1",
30 | "image-retrieval-failed": "Bilden hittades inte: $1",
31 | "documentation": "Dokumentation",
32 | "api-tooltip": "Visa API-dokumentationen",
33 | "version": "Version $1",
34 | "report-issue": "Rapportera ett problem",
35 | "langs-placeholder": "Lämna som tom för automatisk språkidentifiering.",
36 | "langs-param-error": "Följande {{PLURAL:$1|språk}} stöds inte av OCR-motorn: $2",
37 | "tesseract-options": "Tesseract-alternativ",
38 | "tesseract-psm-label": "Metod för att segmentera sidan",
39 | "tesseract-psm-help": "Testa \"Gles text\" för bättre stöd för fler kolumner.",
40 | "tesseract-psm-0": "Bara orientering och skriptdetektering (OSD).",
41 | "tesseract-psm-1": "Automatisk sidsegmentering med OSD.",
42 | "tesseract-psm-2": "Automatisk segmentering av sida, men ingen OSD eller OCR. (inte implementerad)",
43 | "tesseract-psm-3": "Fullständig segmentering av sida, men ingen OSD. (Standard)",
44 | "tesseract-psm-4": "Anta en textkolumn i olika storlekar.",
45 | "tesseract-psm-5": "Anta ett enhetlig block med lodrät fixerad text.",
46 | "tesseract-psm-6": "Anta ett enhetligt textblock.",
47 | "tesseract-psm-7": "Behandla bilden som en textrad.",
48 | "tesseract-psm-8": "Behandla bilden som ett ord.",
49 | "tesseract-psm-9": "Behandla bilden som ett ord i en cirkel.",
50 | "tesseract-psm-10": "Behandla bilden som ett tecken.",
51 | "tesseract-psm-11": "Gles text. Hitta så mycket text som möjligt utan en bestämd ordning.",
52 | "tesseract-psm-12": "Gles text med OSD.",
53 | "tesseract-psm-13": "Rå rad. Behandla bilden som en textrad, förbigå hackningar som är Tesseract-specifika.",
54 | "tesseract-param-error": "Alternativet \"$1\" med ett värde av $2 stöds inte av Tesseract. Högsta värdet: $3",
55 | "tesseract-internal-error": "Tesseract-motorn returnerade ett internt fel."
56 | }
57 |
--------------------------------------------------------------------------------
/i18n/zh-hant.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Cookai1205",
5 | "Kly",
6 | "Winston Sung",
7 | "捍粵者"
8 | ]
9 | },
10 | "title": "維基媒體 OCR",
11 | "subtitle": "從圖片轉譯成文字",
12 | "form-heading": "轉譯一張圖片",
13 | "image-url": "圖像 URL",
14 | "image-url-help": "請插入一個托管在維基媒體伺服器的圖片 URL,例如像是:$1",
15 | "image-url-error": "圖片 URL必須以{{PLURAL:$1|以下網域名稱|以下網域名稱之一}}為開頭,並且要以有效的副檔名作為結尾:$2",
16 | "image-alt-text": "原始圖片",
17 | "language-code": "語言(非必填)",
18 | "engine": "OCR 引擎",
19 | "engine-name-transkribus": "Transkribus OCR",
20 | "engine-not-found-warning": "未找到請求的引擎「$1」。改使用預設引擎「$2」。",
21 | "engine-invalid-langs-warning": "以下語言無效或是不被引擎支援而被忽略:$1",
22 | "submit": "轉譯整個頁面",
23 | "submit-crop": "轉譯區域",
24 | "drag-help": "選擇裁剪工具,並在圖片下方拖曳一個矩形,來僅轉譯頁面上的單一區域。",
25 | "drag-mode-move": "拖曳將會移動圖片",
26 | "drag-mode-move-alt": "代表「移動」操作的圖示。",
27 | "drag-mode-crop": "拖曳將會產生新的裁剪區域",
28 | "drag-mode-crop-alt": "代表「裁剪」操作的圖示。",
29 | "copy-to-clipboard": "複製到剪貼簿",
30 | "copied-to-clipboard": "已複製!",
31 | "google-error": "Google 服務回傳一個錯誤:$1",
32 | "image-retrieval-failed": "圖片取回失敗:$1",
33 | "documentation": "文件",
34 | "api-tooltip": "檢視 API 文件",
35 | "version": "版本 $1",
36 | "report-issue": "問題回報",
37 | "langs-placeholder": "留空以自動偵測語言",
38 | "langs-param-error": "以下{{PLURAL:$1|語言}}不被 OCR 引擎支援:$2",
39 | "loading-message": "正在執行轉譯…",
40 | "tesseract-options": "Tesseract 選項",
41 | "tesseract-psm-label": "頁面拆分方式",
42 | "tesseract-psm-help": "嘗試「稀疏文字」來獲得較好的多行支援。",
43 | "tesseract-psm-0": "僅方向與文字檢測(OSD)。",
44 | "tesseract-psm-1": "以 OSD 來自動拆分頁面。",
45 | "tesseract-psm-2": "自動拆分頁面,但不使用 OSD 或是 OCR。(尚未實現)",
46 | "tesseract-psm-3": "全自動頁面拆分,但不使用 OSD。(預設)",
47 | "tesseract-psm-4": "視為文字內容長度可變的單一行。",
48 | "tesseract-psm-5": "視為有一個垂直文字對齊的均勻文字區塊。",
49 | "tesseract-psm-6": "視為有一個均勻文字區塊。",
50 | "tesseract-psm-7": "將圖片視為單行文字。",
51 | "tesseract-psm-8": "將圖片視為單一字詞。",
52 | "tesseract-psm-9": "將圖片視為環繞狀的單一字詞。",
53 | "tesseract-psm-10": "將圖片視為單一字元。",
54 | "tesseract-psm-11": "稀疏文字。以沒有特定的順序來盡可能找出文字。",
55 | "tesseract-psm-12": "以 OSD 來稀疏文字。",
56 | "tesseract-psm-13": "原始行。將圖片視為一行文字,繞過特定於 Tesseract 的駭客攻擊。",
57 | "tesseract-param-error": "帶有值$2的選項「$1」不被 Tesseract 支援。最大值為:$3",
58 | "tesseract-no-text-error": "Tesseract 引擎沒有回傳此圖片的任何文字。",
59 | "tesseract-internal-error": "Tesseract 引擎返回一個內部錯誤。",
60 | "transkribus-language-code": "語言模型",
61 | "transkribus-unauthorized-error": "錯誤代碼 '$1' :: 未經授權的請求",
62 | "transkribus-default-error": "錯誤代碼 '$1' :: 無法完成請求,請重試!",
63 | "transkribus-empty-response-error": "無法解析來自 Transkribus API 的結果",
64 | "transkribus-init-process-error": "無法初始化 Transkribus 程序",
65 | "transkribus-failed-process-error": "Transkribus 程序失敗",
66 | "transkribus-no-lang-error": "未選擇語言",
67 | "transkribus-multiple-lang-error": "不允許多個語言,請指定一種語言",
68 | "transkribus-browse-public-models": "瀏覽 Transkribus 的所有公開語言模型",
69 | "transkribus-request-for-model": "請求從 Transkribus 添加一個模型到 OCR 工具",
70 | "transkribus-options": "Transkribus 選項",
71 | "transkribus-line-label": "文字行檢測模型",
72 | "transkribus-line-id-none-option": "無",
73 | "transkribus-mixed-line-option": "混合直線方向",
74 | "transkribus-line-help": "如果您不確定要使用哪種直線檢測模型,請留空",
75 | "transkribus-jobs": "Transkribus 任務",
76 | "transkribus-job-id": "任務 ID",
77 | "transkribus-job-state": "狀態",
78 | "transkribus-job-description": "描述",
79 | "transkribus-job-start": "已啟動",
80 | "transkribus-job-end": "已完成",
81 | "transkribus-job-waited": "啟動延遲(分鐘)"
82 | }
83 |
--------------------------------------------------------------------------------
/i18n/sk.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Yardom78"
5 | ]
6 | },
7 | "title": "Wikimédia OCR",
8 | "subtitle": "Prepis textu z obrázkov",
9 | "form-heading": "Urobiť prepis z obrázka",
10 | "image-url": "URL obrázka",
11 | "image-url-help": "Vložte adresu URL obrázka na serveri Wikimedia, napríklad: $1",
12 | "image-url-error": "URL obrázka musí začínať na {{PLURAL:$1| nasledujúci názov domény | jeden z nasledujúcich názvov domén}} a končiť platnou príponou súboru: $2",
13 | "image-alt-text": "Pôvodný obrázok",
14 | "language-code": "Jazyky (voliteľné)",
15 | "engine": "OCR motor",
16 | "engine-not-found-warning": "Požadovaný prostriedok „ $1 “ sa nenašiel. Namiesto toho sa používa predvolený modul „ $2",
17 | "engine-invalid-langs-warning": "Nasledujúce jazyky sú neplatné alebo nepodporované a boli ignorované: $1",
18 | "submit": "Prepísať celú stránku",
19 | "submit-crop": "Prepísať oblasť",
20 | "drag-help": "Vyberte nástroj orezávania a označte na obrázku nižšie, ktorú oblasť chcete prepísať.",
21 | "drag-mode-move": "Potiahnutie posunie obrázok",
22 | "drag-mode-move-alt": "Ikona reprezentuje akciu presunu.",
23 | "drag-mode-crop": "Potiahnutie vytvorí novú oblasť pre orezanie",
24 | "drag-mode-crop-alt": "Ikona reprezentujúca akciu orezania.",
25 | "copy-to-clipboard": "Skopírovať do schránky",
26 | "copied-to-clipboard": "Skopírované!",
27 | "google-error": "Služba Google vrátila chybu: $1",
28 | "image-retrieval-failed": "Načítanie obrázka zlyhalo: $1",
29 | "documentation": "Dokumentácia",
30 | "api-tooltip": "Pozrieť API dokumentáciu",
31 | "version": "Verzia $1",
32 | "report-issue": "Nahlásiť problém",
33 | "langs-placeholder": "Pre automatickú detekciu jazyka nechajte pole prázdne.",
34 | "langs-param-error": "{{PLURAL:$1|Nasledovný jazyk nie je podporovaný|Nasledovné jazyky nie sú podporované}} modulom OCR: $2",
35 | "tesseract-options": "Možnosti Tesseractu",
36 | "tesseract-psm-label": "Metóda segmentácie stránok",
37 | "tesseract-psm-help": "Vyskúšajte „Riedky text“ pre lepšiu podporu viacerých stĺpcov.",
38 | "tesseract-psm-0": "Iba orientácia a detekcia skriptu (OSD).",
39 | "tesseract-psm-1": "Automatická segmentácia stránok pomocou OSD.",
40 | "tesseract-psm-2": "Automatická segmentácia stránkok, ale bez OSD alebo OCR. (nie je implementovaný)",
41 | "tesseract-psm-3": "Plne automatická segmentácia stránok, ale bez OSD. (Predvolené)",
42 | "tesseract-psm-4": "Predpokladaný jeden stĺpec textu s premenlivou veľkosťou.",
43 | "tesseract-psm-5": "Predpokladaný jeden jednotný blok vertikálne zarovnaného textu.",
44 | "tesseract-psm-6": "Predpokladaný jeden jednotný blok textu.",
45 | "tesseract-psm-7": "S obrázkom zaobchádzajte ako s jedným textovým riadkom.",
46 | "tesseract-psm-8": "S obrázkom zaobchádzajte ako s jedným slovom.",
47 | "tesseract-psm-9": "S obrázkom zaobchádzajte ako s jedným slovom v kruhu.",
48 | "tesseract-psm-10": "S obrázkom zaobchádzajte ako s jedným znakom.",
49 | "tesseract-psm-11": "Riedky text. Nájdite čo najviac textu v hocijakom poradí.",
50 | "tesseract-psm-12": "Riedky text s OSD.",
51 | "tesseract-psm-13": "Surová linka. S obrázkom zaobchádzajte ako s jedným textovým riadkom, pričom obídete hacky, ktoré sú špecifické pre Tesseract.",
52 | "tesseract-param-error": "Tesseract nepodporuje možnosť ' $1 ' s hodnotou $2 Maximálna hodnota: $3",
53 | "tesseract-internal-error": "Prostriedok tesseractu vrátil internú chybu."
54 | }
55 |
--------------------------------------------------------------------------------
/src/Engine/GoogleCloudVisionEngine.php:
--------------------------------------------------------------------------------
1 | imageAnnotator = new ImageAnnotatorClient( [ 'credentials' => $keyFile ] );
36 | }
37 | }
38 |
39 | /**
40 | * @inheritDoc
41 | */
42 | public static function getId(): string {
43 | return 'google';
44 | }
45 |
46 | /**
47 | * @inheritDoc
48 | * @throws OcrException
49 | */
50 | public function getResult(
51 | string $imageUrl,
52 | string $invalidLangsMode,
53 | array $crop,
54 | ?array $langs = null
55 | ): EngineResult {
56 | $this->checkImageUrl( $imageUrl );
57 |
58 | [ $validLangs, $invalidLangs ] = $this->filterValidLangs( $langs, $invalidLangsMode );
59 |
60 | $imageContext = new ImageContext();
61 | if ( $validLangs ) {
62 | $imageContext->setLanguageHints( $validLangs );
63 | }
64 |
65 | if ( !$this->imageAnnotator ) {
66 | throw new OcrException( 'google-error', [ 'Key for Google OCR engine is missing' ] );
67 | }
68 |
69 | $image = $this->getImage( $imageUrl, $crop );
70 | $imageUrlOrData = $image->hasData() ? $image->getData() : $image->getUrl();
71 | $response = $this->imageAnnotator->textDetection( $imageUrlOrData, [ 'imageContext' => $imageContext ] );
72 |
73 | // Re-try with direct upload if the error returned is something similar to
74 | // "The URL does not appear to be accessible by us. Please double check or download the content and pass it in."
75 | // There doesn't seem to be a specific error code for this (it is usually 3, but that's also used for other
76 | // things), so it seems like we have to check the actual message string.
77 | if ( $response->getError()
78 | && stripos( $response->getError()->getMessage(), 'download the content and pass it in' ) !== false
79 | ) {
80 | $image = $this->getImage( $imageUrl, $crop, self::DO_DOWNLOAD_IMAGE );
81 | $response = $this->imageAnnotator->textDetection( $image->getData(), [ 'imageContext' => $imageContext ] );
82 | }
83 |
84 | // Other errors, report to the user.
85 | if ( $response->getError() ) {
86 | throw new OcrException( 'google-error', [ $response->getError()->getMessage() ] );
87 | }
88 |
89 | $annotation = $response->getFullTextAnnotation();
90 | $resText = $annotation instanceof TextAnnotation ? $annotation->getText() : '';
91 | $warnings = $invalidLangs ? [ $this->getInvalidLangsWarning( $invalidLangs ) ] : [];
92 | return new EngineResult( $resText, $warnings );
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/i18n/zh-hans.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | ")8",
5 | "Anterdc99",
6 | "Crowley666",
7 | "GuoPC",
8 | "LittlePaw365",
9 | "Midleading",
10 | "Shizhao",
11 | "Zhang8569",
12 | "列维劳德"
13 | ]
14 | },
15 | "title": "维基媒体OCR",
16 | "subtitle": "从维基共享资源上的扫描图像转录文本,以供维基文库和其他项目使用的工具。",
17 | "form-heading": "转录图像",
18 | "image-url": "图像 URL",
19 | "image-url-help": "插入在维基媒体服务器上托管的图像 URL,例如: $1",
20 | "image-url-error": "图片URL必须以{{PLURAL:$1|以下域名|以下域名之一}}开头并以有效的文件扩展名结尾:$2",
21 | "image-alt-text": "原图",
22 | "language-code": "语言(可选)",
23 | "engine": "OCR引擎",
24 | "engine-name-transkribus": "Transkribus OCR",
25 | "engine-not-found-warning": "请求的引擎“$1”未找到。改用默认引擎“$2”。",
26 | "engine-invalid-langs-warning": "以下语言无效或不受引擎支持而被忽略:$1",
27 | "submit": "转录整页",
28 | "submit-crop": "转录范围",
29 | "drag-help": "选择裁剪工具,然后在下面的图像上拖拽出一个矩形,来仅转录页面上的某一区域。",
30 | "drag-mode-move": "拖曳操作会移动图像",
31 | "drag-mode-move-alt": "代表“移动”操作的图标。",
32 | "drag-mode-crop": "拖拽会创建新裁剪区域",
33 | "drag-mode-crop-alt": "代表“裁剪”操作的图标。",
34 | "copy-to-clipboard": "复制到剪贴板",
35 | "copied-to-clipboard": "已复制!",
36 | "google-error": "Google服务返回错误:$1",
37 | "image-retrieval-failed": "图像检索失败:$1",
38 | "documentation": "文档",
39 | "api-tooltip": "查看 API 文档",
40 | "version": "版本$1",
41 | "report-issue": "报告问题",
42 | "langs-placeholder": "留空以进行自动语言检测。",
43 | "langs-param-error": "以下{{PLURAL:$1|语言}}不受OCR引擎支持:$2",
44 | "loading-message": "正在执行转录...",
45 | "tesseract-options": "Tesseract选项",
46 | "tesseract-psm-label": "页面拆分方式",
47 | "tesseract-psm-help": "尝试“稀疏文本”来获得更好的多列支持。",
48 | "tesseract-psm-0": "仅方向和手写检测(OSD)。",
49 | "tesseract-psm-1": "使用OSD自动拆分页面。",
50 | "tesseract-psm-2": "自动拆分页面,但不使用OSD或OCR。(未实现)",
51 | "tesseract-psm-3": "全自动拆分页面,但不使用OSD。(默认)",
52 | "tesseract-psm-4": "假设有一列可变大小的文本。",
53 | "tesseract-psm-5": "假设有一个统一的垂直对齐的文本块。",
54 | "tesseract-psm-6": "假设有一个统一的文本块。",
55 | "tesseract-psm-7": "将图像视为一行文本。",
56 | "tesseract-psm-8": "将图像视为一个词。",
57 | "tesseract-psm-9": "将图像视为环状的一个词。",
58 | "tesseract-psm-10": "将图像视为一个字符。",
59 | "tesseract-psm-11": "稀疏文本。查找尽可能多的文本,没有特定的顺序。",
60 | "tesseract-psm-12": "使用OSD稀疏文本。",
61 | "tesseract-psm-13": "原始行。将图像视为一行文本,绕过特定于Tesseract的黑客攻击。",
62 | "tesseract-param-error": "带有值$2的“$1”选项不受Tesseract支持。最大值:$3",
63 | "tesseract-no-text-error": "Tesseract 引擎没有返回此图片的任何文本。",
64 | "tesseract-internal-error": "tesseract 引擎返回了一个内部错误。",
65 | "transkribus-language-code": "语言模型",
66 | "transkribus-unauthorized-error": "错误代码“ $1 ”:: 请求未经授权",
67 | "transkribus-default-error": "错误代码“ $1 ”:: 无法完成请求,请重试!",
68 | "transkribus-empty-response-error": "无法解析来自 Transkribus API 的结果",
69 | "transkribus-init-process-error": "无法初始化 Transkribus 进程",
70 | "transkribus-failed-process-error": "Transkribus 进程失败",
71 | "transkribus-no-lang-error": "未选择语言",
72 | "transkribus-multiple-lang-error": "不允许使用多种语言,请指定一种语言",
73 | "transkribus-browse-public-models": "浏览 Transkribus 的所有公开语言模型",
74 | "transkribus-request-for-model": "请求将 Transkribus 中的模型添加到 OCR 工具",
75 | "transkribus-options": "Transkribus 选项",
76 | "transkribus-line-label": "直线检测模型",
77 | "transkribus-line-id-none-option": "无",
78 | "transkribus-mixed-line-option": "混合直线方向",
79 | "transkribus-line-help": "如果您不确定要使用哪种直线检测模型,请留空",
80 | "transkribus-jobs": "Transkribus 任务",
81 | "transkribus-job-id": "任务 ID",
82 | "transkribus-job-state": "状态",
83 | "transkribus-job-description": "描述",
84 | "transkribus-job-start": "已开始",
85 | "transkribus-job-end": "已完成",
86 | "transkribus-job-waited": "开始延迟(分钟)"
87 | }
88 |
--------------------------------------------------------------------------------
/templates/base.html.twig:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {% block title %}{{ msg('title') }}{% endblock %}
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | {% block stylesheets %}
17 | {{ encore_entry_link_tags('app') }}
18 | {% if is_rtl() %}
19 |
20 | {% endif %}
21 | {% endblock %}
22 | {% block javascripts %}
23 | {{ encore_entry_script_tags('app') }}
24 | {% endblock %}
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
{{ msg('title') }}
34 |
{{ msg('subtitle') }}
35 |
36 |
37 |
38 |
39 | {% for label, messages in app.flashes(['error']) %}
40 | {% for message in messages %}
41 |
42 | {{ message }}
43 |
44 | {% endfor %}
45 | {% endfor %}
46 | {% for label, messages in app.flashes(['warning']) %}
47 | {% for message in messages %}
48 |
49 | {{ message }}
50 |
51 | {% endfor %}
52 | {% endfor %}
53 |
54 | {% block body %}{% endblock %}
55 |
56 |
57 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "wikimedia/wikimedia-ocr",
3 | "description": "A simple wrapper around multiple OCR engines, enabling Wikisources to submit images for OCR and retrieve the resultant text.",
4 | "type": "project",
5 | "license": "GPL-3.0-or-later",
6 | "require": {
7 | "php": ">=7.3",
8 | "ext-bcmath": "*",
9 | "ext-ctype": "*",
10 | "ext-gd": "*",
11 | "ext-iconv": "*",
12 | "ext-json": "*",
13 | "google/cloud-vision": "^1.3",
14 | "imagine/imagine": "^1.2",
15 | "nelmio/api-doc-bundle": "^4.4",
16 | "predis/predis": "^2.2",
17 | "symfony/cache": "5.2.*",
18 | "symfony/console": "5.2.*",
19 | "symfony/dotenv": "5.2.*",
20 | "symfony/framework-bundle": "^5.4",
21 | "symfony/mailer": "^5.2",
22 | "symfony/monolog-bundle": "^3.7",
23 | "symfony/property-info": "5.2.*",
24 | "symfony/twig-bundle": "5.2.*",
25 | "symfony/webpack-encore-bundle": "^1.11",
26 | "symfony/yaml": "5.2.*",
27 | "thiagoalessio/tesseract_ocr": "^2.11",
28 | "twig/extra-bundle": "^2.12|^3.0",
29 | "twig/intl-extra": "^3.7",
30 | "twig/twig": "^2.12|^3.0",
31 | "wikimedia/toolforge-bundle": "^1.3"
32 | },
33 | "require-dev": {
34 | "drenso/phan-extensions": "^3.3",
35 | "mediawiki/mediawiki-codesniffer": "^39.0",
36 | "mediawiki/minus-x": "^1.1",
37 | "mediawiki/phan-taint-check-plugin": "^4.0",
38 | "symfony/phpunit-bridge": "^5.2",
39 | "symfony/stopwatch": "^5.2",
40 | "symfony/web-profiler-bundle": "^5.2"
41 | },
42 | "config": {
43 | "platform": {
44 | "php": "7.3.31"
45 | },
46 | "optimize-autoloader": true,
47 | "preferred-install": {
48 | "*": "dist"
49 | },
50 | "sort-packages": true
51 | },
52 | "autoload": {
53 | "psr-4": {
54 | "App\\": "src/"
55 | }
56 | },
57 | "autoload-dev": {
58 | "psr-4": {
59 | "App\\Tests\\": "tests/"
60 | }
61 | },
62 | "replace": {
63 | "symfony/polyfill-ctype": "*",
64 | "symfony/polyfill-iconv": "*",
65 | "symfony/polyfill-php72": "*"
66 | },
67 | "scripts": {
68 | "auto-scripts": [
69 | "./bin/console cache:clear",
70 | "./bin/console assets:install"
71 | ],
72 | "check-tesseract": "./check_tesseract.sh",
73 | "post-install-cmd": [
74 | "@auto-scripts",
75 | "@check-tesseract"
76 | ],
77 | "post-update-cmd": [
78 | "@auto-scripts",
79 | "@check-tesseract"
80 | ],
81 | "test": [
82 | "@test-common",
83 | "@phan"
84 | ],
85 | "test-common": [
86 | "composer validate",
87 | "phpcs -s -p .",
88 | "./bin/console lint:twig ./templates",
89 | "./bin/console lint:yaml ./config",
90 | "minus-x check .",
91 | "@check-tesseract",
92 | "./bin/phpunit"
93 | ],
94 | "phan": [
95 | "phan --allow-polyfill-parser --long-progress-bar"
96 | ],
97 | "fix": "phpcbf"
98 | },
99 | "conflict": {
100 | "symfony/symfony": "*"
101 | },
102 | "extra": {
103 | "symfony": {
104 | "allow-contrib": false,
105 | "require": "5.2.*"
106 | }
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/i18n/tl.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "GinawaSaHapon"
5 | ]
6 | },
7 | "title": "WikimediaOCR",
8 | "subtitle": "Mag-transcribe ang teksto mula sa mga larawan",
9 | "form-heading": "Mag-transcribe ng larawan",
10 | "image-url": "URL ng larawan",
11 | "image-url-help": "Ilagay ang URL ng larawan na hino-host sa isang server ng Wikimedia tulad ng: $1",
12 | "image-url-error": "Dapat nagsisimula ang URL ng larawan sa {{PLURAL:$1|sumusunod na pangalan ng domain|mga sumusunod na pangalan ng domain}} at magtapos sa isang valid na file extension: $2",
13 | "image-alt-text": "Ang orihinal na larawan",
14 | "language-code": "Mga wika (di-kailangan)",
15 | "engine": "OCR engine",
16 | "engine-not-found-warning": "Hindi nakita ang hiniling na engine na '$1'. Gagamitin na lang ang default na engine na '$2'.",
17 | "engine-invalid-langs-warning": "Invalid o di-suportado ng engine ang mga sumusunod na wika kaya binalewala sila: $1",
18 | "submit": "I-transcribe ang buong pahina",
19 | "submit-crop": "Lugar na ita-transcribe",
20 | "drag-help": "Piliin ang crop tool at mag-drag ng isang parihaba sa larawan sa baba para i-transcribe lang ang isang partikular na lugar ng pahina.",
21 | "drag-mode-move": "Gagalaw ang larawan kung ida-drag",
22 | "drag-mode-move-alt": "Ang icon na kumakatawan sa kilos na 'galawin'.",
23 | "drag-mode-crop": "Gagawa ng bagong lugar na ika-crop kung ida-drag",
24 | "drag-mode-crop-alt": "Ang icon na kumakatawan sa kilos na 'i-crop'.",
25 | "copy-to-clipboard": "Kopyahin sa clipboard",
26 | "copied-to-clipboard": "Nakopya na!",
27 | "google-error": "Nagbalik ng error ang serbisyo ng Google: $1",
28 | "image-retrieval-failed": "Nabigo sa pagkuha sa larawan: $1",
29 | "documentation": "Dokumentasyon",
30 | "api-tooltip": "Tingnan ang dokumentasyon sa API",
31 | "version": "Bersyon $1",
32 | "report-issue": "Mag-ulat ng isyu",
33 | "langs-placeholder": "Bakantehin para sa kusang pag-detect sa wika.",
34 | "langs-param-error": "Hindi suportado ang ng OCR engine ang sumusunod na {{PLURAL:$1|wika|mga wika}}: $2",
35 | "tesseract-options": "Pagsasaayos sa Tesseract",
36 | "tesseract-psm-label": "Paraan ng segmentation sa pahina",
37 | "tesseract-psm-help": "Subukan ang \"Kalat-kalat na teksto\" para sa mas maayos na suporta sa mga maramihang hanay.",
38 | "tesseract-psm-0": "Orientation at script detection (OSD) lang.",
39 | "tesseract-psm-1": "Kusang segmentation sa pahina gamit OSD.",
40 | "tesseract-psm-2": "Kusang segmentation sa pahina, pero walang OSD, o OCR. (di na-implement)",
41 | "tesseract-psm-3": "Kusang segmentation sa pahina, pero walang OSD. (default)",
42 | "tesseract-psm-4": "I-assume ang isang hanay ng teksto na iba-iba ang sukat.",
43 | "tesseract-psm-5": "I-assume ang isang pantay na bloke ng naka-align nang patayo na teksto.",
44 | "tesseract-psm-6": "I-assume ang isang pantay na bloke ng teksto.",
45 | "tesseract-psm-7": "Tratuhin ang larawan bilang isang linya ng teksto.",
46 | "tesseract-psm-8": "Tratuhin ang larawan bilang isang salita.",
47 | "tesseract-psm-9": "Tratuhin ang larawan bilang isang salita sa loob ng isang bilog.",
48 | "tesseract-psm-10": "Tratuhin ang larawan bilang isang karakter.",
49 | "tesseract-psm-11": "Kalat-kalat na teksto. Maghanap ng teksto hanggat posible nang walang partikular na plano.",
50 | "tesseract-psm-12": "Kalat-kalat na teksto na may OSD.",
51 | "tesseract-psm-13": "Raw na linya. Tratuhin ang larawan bilang isang linya ng teksto, na nagba-bypass sa mga hack na specific sa Tesseract.",
52 | "tesseract-param-error": "Hindi suportado ang '$1' na may value na $2. Maximum na value: $3",
53 | "tesseract-internal-error": "Nagbalik ng isang internal error ang tesseract engine."
54 | }
55 |
--------------------------------------------------------------------------------
/i18n/id.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Mnam23",
5 | "Penyuwangi",
6 | "Veracious"
7 | ]
8 | },
9 | "title": "WikimediaOCR",
10 | "subtitle": "Mentranskripkan teks dari gambar",
11 | "form-heading": "Transkripsi sebuah gambar",
12 | "image-url": "URL Gambar",
13 | "image-url-help": "Sisipkan URL gambar yang dihosting di peladen Wikimedia seperti: $1",
14 | "image-url-error": "URL gambar harus dimulai dengan {{PLURAL:$1|nama domain berikut|salah satu dari nama domain berikut}} dan diakhiri dengan ekstensi berkas valid: $2",
15 | "image-alt-text": "Gambar asli",
16 | "language-code": "Bahasa (opsional)",
17 | "engine": "Mesin OCR",
18 | "engine-name-transkribus": "Transkribus OCR",
19 | "engine-not-found-warning": "Mesin '$1' yang diminta tidak ditemukan. Menggunakan mesin bawaan '$2' sebagai gantinya.",
20 | "engine-invalid-langs-warning": "Bahasa berikut tidak valid atau tidak didukung mesin dan diabaikan: $1",
21 | "submit": "Transkripsikan seluruh halaman",
22 | "submit-crop": "Transkripsikan area",
23 | "drag-help": "Pilih perkakas pemangkas dan tarik sebuah persegi pada gambar di bawah untuk mentranskripsikan hanya satu area halaman.",
24 | "drag-mode-move": "Menarik akan memindahkan gambar",
25 | "drag-mode-move-alt": "Ikon merepresentasikan tindakan 'memindahkan'.",
26 | "drag-mode-crop": "Menarik akan membuat area pemangkasan baru",
27 | "drag-mode-crop-alt": "Ikon merepresentasikan tindakan 'pemangkasan'.",
28 | "copy-to-clipboard": "Salin ke papan klip",
29 | "copied-to-clipboard": "Tersalin!",
30 | "google-error": "Layanan Google mengembalikan kesalahan: $1",
31 | "image-retrieval-failed": "Pengambilan gambar gagal: $1",
32 | "documentation": "Dokumentasi",
33 | "api-tooltip": "Lihat dokumentasi API",
34 | "version": "Versi $1",
35 | "report-issue": "Laporkan masalah",
36 | "langs-placeholder": "Biarkan kosong untuk deteksi bahasa otomatis.",
37 | "langs-param-error": "{{PLURAL:$1|Bahasa|Bahasa-bahasa}} berikut tidak didukung oleh mesin OCR: $2",
38 | "tesseract-options": "Pilihan Tesseract",
39 | "tesseract-psm-label": "Metode segmentasi halaman",
40 | "tesseract-psm-help": "Coba \"Teks jarang\" untuk dukungan multi-kolom yang lebih baik.",
41 | "tesseract-psm-0": "Orientasi dan deteksi skrip (OSD) saja.",
42 | "tesseract-psm-1": "Segmentasi halaman otomatis dengan OSD.",
43 | "tesseract-psm-2": "Segmentasi halaman otomatis, tapi tanpa OSD, atau OCR. (tidak diterapkan)",
44 | "tesseract-psm-3": "Segmentasi halaman otomatis sepenuhnya, tetapi tanpa OSD. (Bawaan)",
45 | "tesseract-psm-4": "Asumsikan satu kolom teks dengan ukuran variabel.",
46 | "tesseract-psm-5": "Asumsikan satu blok seragam dari teks yang disejajarkan secara vertikal.",
47 | "tesseract-psm-6": "Asumsikan satu blok teks seragam.",
48 | "tesseract-psm-7": "Perlakukan gambar sebagai satu baris teks.",
49 | "tesseract-psm-8": "Perlakukan gambar sebagai kata tunggal.",
50 | "tesseract-psm-9": "Perlakukan gambar sebagai kata tunggal dalam lingkaran.",
51 | "tesseract-psm-10": "Perlakukan gambar sebagai karakter tunggal.",
52 | "tesseract-psm-11": "Teks jarang. Temukan teks sebanyak mungkin tanpa urutan tertentu.",
53 | "tesseract-psm-12": "Teks jarang dengan OSD.",
54 | "tesseract-psm-13": "Garis mentah. Perlakukan gambar sebagai satu baris teks, melewati peretasan yang spesifik Tesseract.",
55 | "tesseract-param-error": "Pilihan '$1' dengan nilai $2 tidak didukung oleh Tesseract. Nilai maksimum: $3",
56 | "tesseract-internal-error": "Mesin tesseract mengembalikan galat internal.",
57 | "transkribus-language-code": "Model bahasa",
58 | "transkribus-unauthorized-error": "Kode Galat '$1' :: Permintaan tidak diotorisasi",
59 | "transkribus-default-error": "Kode Galat '$1' :: Tak dapat menyelesaikan permintaan, coba lagi!",
60 | "transkribus-no-lang-error": "Tak ada bahasa yang dipilih"
61 | }
62 |
--------------------------------------------------------------------------------
/i18n/fi.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "MITO",
5 | "Pyscowicz",
6 | "Veikk0.ma"
7 | ]
8 | },
9 | "title": "WikimediaOCR",
10 | "subtitle": "Muunna kuvamuotoinen teksti raakatekstiksi",
11 | "form-heading": "Muunna kuvassa oleva kirjoitus tekstiksi",
12 | "image-url": "Kuvan verkko-osoite",
13 | "image-url-help": "Lisää Wikimedian palvelimella sijaitsevan kuvan verkko-osoite, esim.: $1",
14 | "image-url-error": "Kuvan verkko-osoitteen tulee alkaa {{PLURAL:$1|seuraavalla verkkotunnuksella|yhdellä seuraavista verkkotunnuksista}} ja loppua hyväksytyllä tiedostopäätteellä: $2",
15 | "image-alt-text": "Alkuperäinen kuva",
16 | "language-code": "Kielet (valinnainen)",
17 | "engine": "Tekstintunnistusmoottori",
18 | "engine-name-transkribus": "Transkribus OCR",
19 | "engine-not-found-warning": "Pyydettyä moottoria '$1' ei löytynyt. Käytetään sen sijasta oletusmoottoria '$2'.",
20 | "engine-invalid-langs-warning": "Seuraavat kielet ovat virheellisiä tai moottori ei tue niitä ja ne ohitettiin: $1",
21 | "submit": "Muunna koko sivu tekstiksi",
22 | "submit-crop": "Muunna alue tekstiksi",
23 | "drag-help": "Valitse rajaustyökalu ja piirrä alla olevaan kuvaan suorakulmio muuntaaksesi vain yhden alueen sivulta tekstiksi.",
24 | "drag-mode-move": "Raahaaminen siirtää kuvaa",
25 | "drag-mode-move-alt": "Siirtämistoimintoa esittävä kuvake.",
26 | "drag-mode-crop": "Raahaaminen luo uuden rajausalueen",
27 | "drag-mode-crop-alt": "Rajaustoimintoa esittävä kuvake.",
28 | "copy-to-clipboard": "Kopioi leikepöydälle",
29 | "copied-to-clipboard": "Kopioitu!",
30 | "google-error": "Googlen palvelu vastasi virheilmoituksella: $1",
31 | "image-retrieval-failed": "Kuvan noutaminen epäonnistui: $1",
32 | "documentation": "Käyttöohjeet",
33 | "api-tooltip": "Katso rajapinnan dokumentaatio",
34 | "version": "Versio $1",
35 | "report-issue": "Tee vikailmoitus",
36 | "langs-placeholder": "Jätä tyhjäksi tunnistaaksesi kielen automaattisesti.",
37 | "langs-param-error": "Tekstintunnistusmoottori ei tue seuraavaa {{PLURAL:$1|kieltä}}: $2",
38 | "tesseract-options": "Tesseractin asetukset",
39 | "tesseract-psm-label": "Sivunjakomenetelmä",
40 | "tesseract-psm-help": "Kokeile asetusta \"Harva teksti\" mikäli haluat paremman tuen useaan palstaan jaetulle tekstille.",
41 | "tesseract-psm-0": "Vain tekstin suunnan ja kirjoitusjärjestelmän tunnistus (OSD).",
42 | "tesseract-psm-1": "Automaattinen sivunjako ja tekstin suunnan ja kirjoitusjärjestelmän tunnistus",
43 | "tesseract-psm-2": "Automaattinen sivunjako, ei tekstin suunnan ja kirjoitusjärjestelmän tunnistusta eikä tekstintunnistusta. (ei vielä toteutettu)",
44 | "tesseract-psm-3": "Täysin automaattinen sivunjako, ei tekstin suunnan ja kirjoitusjärjestelmän tunnistusta. (Oletus)",
45 | "tesseract-psm-4": "Oleta yksi tekstipalsta ja vaihteleva kirjasinkoko.",
46 | "tesseract-psm-5": "Oleta yksi yhtenäinen, pystysuoraan tasattu tekstilohko.",
47 | "tesseract-psm-6": "Oleta yksi yhtenäinen tekstilohko.",
48 | "tesseract-psm-7": "Käsittele kuvaa yhtenä tekstirivinä.",
49 | "tesseract-psm-8": "Käsittele kuvaa yhtenä sanana.",
50 | "tesseract-psm-9": "Käsittele kuvaa yhtenä sanana, joka on ympyröity.",
51 | "tesseract-psm-10": "Käsittele kuvaa yhtenä kirjoitusmerkkinä.",
52 | "tesseract-psm-11": "Harva teksti. Etsi mahdollisimman paljon tekstiä järjestyksestä välittämättä.",
53 | "tesseract-psm-12": "Harva teksti ja tekstin suunnan ja kirjoitusjärjestelmän tunnistus.",
54 | "tesseract-psm-13": "Tekstirivi. Käsittele kuvaa yhtenä tekstirivinä (kiertää Tesseractia varten tehdyt niksit).",
55 | "tesseract-param-error": "\"$1\"-asetus ei tue arvoa $2. Enimmäisarvo on $3",
56 | "tesseract-internal-error": "Tesseract-moottori kohtasi sisäisen virheen.",
57 | "transkribus-empty-response-error": "Tulosta ei voitu jäsentää Transribus API:sta",
58 | "transkribus-no-lang-error": "Kieltä ei valittu",
59 | "transkribus-multiple-lang-error": "Useita kieliä ei sallita, määritä yksi kieli"
60 | }
61 |
--------------------------------------------------------------------------------
/i18n/ko.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Apzp79",
5 | "Namoroka",
6 | "Suleiman the Magnificent Television",
7 | "Ykhwong",
8 | "그냥기여자"
9 | ]
10 | },
11 | "title": "위키미디어OCR",
12 | "subtitle": "위키미디어 공용의 스캔된 이미지에서 텍스트를 변환하여 위키문헌과 다른 곳에서 사용할 수 있는 도구입니다.",
13 | "form-heading": "그림을 변환하기",
14 | "image-url": "그림 URL",
15 | "image-url-help": "위키미디어 서버에 호스팅된 그림 URL을 삽입하여 다음을 입력합니다: $1",
16 | "image-url-error": "그림 URL은 {{PLURAL:$1|다음 도메인 이름으로|다음 도메인 이름 중 하나로}} 시작하여 유효한 파일 확장자로 끝나야 합니다: $2",
17 | "image-alt-text": "원본 그림",
18 | "language-code": "언어 (선택 사항)",
19 | "engine": "OCR 엔진",
20 | "engine-name-google": "구글 클라우드 비전 OCR",
21 | "engine-name-tesseract": "테서랙트 OCR",
22 | "engine-name-transkribus": "트랜스크리버스 OCR",
23 | "engine-not-found-warning": "요청하신 '$1' 엔진을 찾을 수 없습니다. 대신 기본 엔진 '$2'을 사용합니다.",
24 | "engine-invalid-langs-warning": "다음 언어는 잘못되었거나 엔진에서 지원하지 않으므로 무시되었습니다: $1",
25 | "submit": "전체 페이지 변환",
26 | "submit-crop": "변환 영역",
27 | "drag-help": "자르기 도구를 선택하고 사각형을 아래의 그림 위로 드래그하여 페이지의 일부 영역만 변환합니다.",
28 | "drag-mode-move": "드래그하면 그림이 이동합니다",
29 | "drag-mode-move-alt": "'이동' 동작을 나타내는 아이콘입니다.",
30 | "drag-mode-crop": "드래그하면 새로운 자르기 영역이 생성됩니다",
31 | "drag-mode-crop-alt": "'자르기' 동작을 나타내는 아이콘입니다.",
32 | "copy-to-clipboard": "클립보드에 복사",
33 | "copied-to-clipboard": "복사 완료!",
34 | "google-error": "구글 서비스가 오류를 반환했습니다: $1",
35 | "image-retrieval-failed": "그림 검색에 실패하였습니다: $1",
36 | "documentation": "문서",
37 | "api-tooltip": "API 문서 보기",
38 | "version": "버전 $1",
39 | "report-issue": "문제점 보고",
40 | "langs-placeholder": "자동 언어 감지를 위해 비워두세요.",
41 | "langs-param-error": "다음 {{PLURAL:$1|언어}}는 OCR 엔진에서 지원되지 않습니다: $2",
42 | "loading-message": "변환 수행 중...",
43 | "tesseract-options": "테서랙트 옵션",
44 | "tesseract-psm-label": "페이지 분할 방법",
45 | "tesseract-psm-help": "더 나은 다단 지원을 위해 \"산발적인 텍스트\"를 사용해 보세요.",
46 | "tesseract-psm-0": "방향 및 문자 인식(OSD)만 수행합니다.",
47 | "tesseract-psm-1": "OSD를 이용하여 페이지를 자동 분할합니다.",
48 | "tesseract-psm-2": "OSD나 OCR 없이 페이지를 자동 분할합니다. (구현되지 않음)",
49 | "tesseract-psm-3": "OSD 없이 완전히 자동으로 페이지를 분할합니다. (기본값)",
50 | "tesseract-psm-4": "다양한 크기의 단일 텍스트 열을 가정합니다.",
51 | "tesseract-psm-5": "수직으로 정렬된 단일 텍스트 블록을 가정합니다.",
52 | "tesseract-psm-6": "단일 텍스트 블록을 가정합니다.",
53 | "tesseract-psm-7": "그림을 단일 텍스트 줄로 취급합니다.",
54 | "tesseract-psm-8": "그림을 하나의 단어로 취급합니다.",
55 | "tesseract-psm-9": "그림을 원 내부의 하나의 단어로 취급합니다.",
56 | "tesseract-psm-10": "그림을 단일 문자로 취급합니다.",
57 | "tesseract-psm-11": "산발적인 텍스트. 특별한 정렬 없이 텍스트를 가능한 한 많이 찾습니다.",
58 | "tesseract-psm-12": "OSD를 이용한 산발적인 텍스트.",
59 | "tesseract-psm-13": "가공되지 않은 줄. 그림을 단일 텍스트 줄로 취급하여 테서랙트 고유의 해킹을 우회합니다.",
60 | "tesseract-param-error": "'$2'의 값을 가진 '$1' 옵션은 테러색트에서 지원되지 않습니다. 최댓값: $3",
61 | "tesseract-no-text-error": "테서랙트 엔진이 이 그림으로부터 아무런 텍스트를 반환하지 못했습니다.",
62 | "tesseract-internal-error": "테서랙트 엔진이 내부 오류를 반환했습니다.",
63 | "transkribus-language-code": "언어 모델",
64 | "transkribus-unauthorized-error": "오류 코드 '$1' :: 요청이 허가되지 않음",
65 | "transkribus-default-error": "오류 코드 '$1' :: 요청을 완료할 수 없음, 다시 시도하세요!",
66 | "transkribus-empty-response-error": "트랜스크리버스 API로부터 결과를 파싱하지 못했습니다",
67 | "transkribus-init-process-error": "트랜스크리버스 프로세스를 초기화하는데 실패했습니다",
68 | "transkribus-failed-process-error": "트랜스크리버스 프로세스 실패",
69 | "transkribus-no-lang-error": "선택한 언어가 없습니다",
70 | "transkribus-multiple-lang-error": "여러 언어는 허용되지 않습니다, 하나의 언어만 지정하세요",
71 | "transkribus-browse-public-models": "트랜스크리버스의 모든 공개 언어 모델 둘러보기",
72 | "transkribus-request-for-model": "트랜스크리버스에서 OCR 도구 모델 추가 요청하기",
73 | "transkribus-options": "트랜스크리버스 옵션",
74 | "transkribus-line-label": "줄 감지 모델",
75 | "transkribus-line-id-none-option": "없음",
76 | "transkribus-mixed-line-option": "혼합된 줄 방향",
77 | "transkribus-line-help": "무슨 줄 감지 모델을 사용해야 하는지 모르겠다면 이 부분을 비워두세요",
78 | "transkribus-jobs": "트랜스크리버스 업무",
79 | "transkribus-job-id": "업무 ID",
80 | "transkribus-job-state": "상태",
81 | "transkribus-job-description": "설명",
82 | "transkribus-job-start": "시작됨",
83 | "transkribus-job-end": "완료",
84 | "transkribus-job-waited": "시작 지연 (분)"
85 | }
86 |
--------------------------------------------------------------------------------
/src/EventListener/ExceptionListener.php:
--------------------------------------------------------------------------------
1 | request = $requestStack->getCurrentRequest();
49 | $this->session = $requestStack->getSession();
50 | $this->twig = $twig;
51 | $this->intuition = $intuition;
52 | $this->tesseractLogger = $tesseractLogger;
53 | }
54 |
55 | /**
56 | * @param ExceptionEvent $event
57 | */
58 | public function onKernelException( ExceptionEvent $event ): void {
59 | $exception = $event->getThrowable();
60 |
61 | // We only care about OcrExceptions, and UnsuccessfulCommandException thrown by the library (T282141).
62 | if ( !( $exception instanceof OcrException || $exception instanceof UnsuccessfulCommandException )
63 | || !$event->isMainRequest()
64 | ) {
65 | return;
66 | }
67 |
68 | $isApi = str_contains( $this->request->getPathInfo(), '/api' );
69 | $params = array_merge(
70 | OcrController::$params,
71 | $this->request->query->all()
72 | );
73 | if ( $exception instanceof UnsuccessfulCommandException ) {
74 | $this->tesseractLogger->critical( $exception->__toString() );
75 | $errorMessage = $this->getMessageForTesseractException( $exception );
76 | } else {
77 | $errorMessage = $this->intuition->msg(
78 | $exception->getI18nKey(),
79 | [ 'variables' => $exception->getI18nParams() ]
80 | );
81 | }
82 |
83 | if ( $isApi ) {
84 | $params['error'] = $errorMessage;
85 | $response = new JsonResponse( $params );
86 | } else {
87 | /** @var FlashBagInterface $flashBag */
88 | $flashBag = $this->session->getBag( 'flashes' );
89 | // @phan-suppress-next-line PhanUndeclaredMethod
90 | $flashBag->add( 'error', $errorMessage );
91 | $response = new Response(
92 | $this->twig->render( 'output.html.twig', $params )
93 | );
94 | }
95 |
96 | // Allow cross-origin requests like we do for successful requests. See T285543
97 | $response->headers->set( 'Access-Control-Allow-Origin', '*' );
98 | $response->setStatusCode( Response::HTTP_BAD_REQUEST );
99 | $event->setResponse( $response );
100 | }
101 |
102 | /**
103 | * Given a tesseract-specific exception, try and extract a useful error message. Tries to balance between
104 | * being helpful and not giving away any potentially sensitive information (as might happen if we were
105 | * to pass any error message through).
106 | *
107 | * @param UnsuccessfulCommandException $exc @phan-unused-param
108 | * @return string
109 | */
110 | private function getMessageForTesseractException( UnsuccessfulCommandException $exc ): string {
111 | // TODO: How can we be more specific about what's gone wrong?
112 | return $this->intuition->msg( 'tesseract-internal-error' );
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## Requirements #
2 |
3 | * PHP 7.2+
4 | * [Composer](http://getcomposer.org/)
5 | * [Symfony CLI](https://symfony.com/download)
6 |
7 | If you need to make asset changes:
8 |
9 | * [Node](https://nodejs.org) with the version specified by the `.nvmrc` [nvm](https://github.com/nvm-sh/nvm#installing-and-updating) file.
10 |
11 | ## Installation ##
12 |
13 | * `composer install`
14 | * `npm install`
15 |
16 | ### For Google Cloud Vision Engine ###
17 |
18 | * Add the missing values from `.env` to a `.env.local` file
19 | * Enable the Cloud Vision API at https://console.cloud.google.com/apis/api/vision.googleapis.com/overview
20 | * Create a new Google service account at https://console.cloud.google.com/iam-admin/serviceaccounts Google gives you 1,000 free lookups per month.
21 | * Give the service account the *Compute Engine Service Account* role.
22 | * Add a new key for the service account, and download the key's JSON file. Nothing needs to be changed in this file.
23 | * Add the path of that file to your `.env.local` as `APP_GOOGLE_KEYFILE`.
24 |
25 | ### For Tesseract OCR Engine ###
26 | * Install [Tesseract](https://tesseract-ocr.github.io) and make sure it's in your `$PATH`
27 |
28 | ### For Transkribus OCR Engine ###
29 |
30 | You can [create a free account](https://readcoop.eu/transkribus/?sc=Transkribus) for Transkribus, and get a small number of free credits.
31 |
32 | You will also need to set the *username* and *password* of your Transkribus account in `.env.local`:
33 |
34 | ```dotenv
35 | APP_TRANSKRIBUS_USERNAME=username
36 | APP_TRANSKRIBUS_PASSWORD=password
37 | ```
38 |
39 | **Note**: You will require sufficient credits in your account to use the Transkribus API.
40 |
41 | ## Run the application ##
42 | * `symfony serve` to start the application
43 | * `npm run watch` if you need to make JS/CSS changes. Compiled assets are not committed.
44 |
45 | ## Using Redis for caching
46 |
47 | The application caches some data.
48 | In development this is done on the filesystem (in the `var/cache/dev/pools/` directory),
49 | and in production in Redis
50 | (the [Toolforge installation](https://wikitech.wikimedia.org/wiki/Help:Toolforge/Redis_for_Toolforge)).
51 |
52 | To test the Redis configuration locally, open an SSH tunnel to Toolforge's Redis server:
53 |
54 | ```console
55 | $ ssh -N -L 6379:redis.svc.tools.eqiad1.wikimedia.cloud:6379 login.toolforge.org
56 | ```
57 |
58 | And set the following in `.env.local`:
59 |
60 | ```dotenv
61 | APP_ENV=prod
62 | REDIS_HOST=localhost
63 | ```
64 |
65 | Then clear the application cache with
66 |
67 | ```console
68 | $ ./bin/console c:c
69 | ```
70 |
71 | Docker Developer Environment
72 | ============================
73 |
74 | _(beta: this is a very raw setup and needs improvements)_
75 |
76 | ### Requirements
77 |
78 | - [Docker installation instructions][docker-install]
79 |
80 | [docker-install]: https://docs.docker.com/install/
81 |
82 | ### Quickstart
83 |
84 | Setup container
85 | ```
86 | ./docker/setup.sh
87 | ```
88 |
89 | Run container
90 | ```
91 | ./docker/run.sh
92 | ```
93 |
94 | ## Structure of models.json
95 |
96 | The engines' model and language information is stored in `/public/models.json`,
97 | from where it's read and returned in the `/api/available_langs` API endpoint.
98 |
99 | OCR engines take zero to many model names (often called 'languages' because
100 | there's direct mapping to those, but we're moving away from this nomenclature
101 | now because it doesn't always hold true).
102 |
103 | `models.json` is first grouped by engine, and then each engine has a list of models.
104 | These are identified by a 'model code', which is what the user provides in the `langs[]` parameter.
105 | For some engines these are passed through to the actual engine process or API,
106 | but others don't have convenient model names and so we invent them
107 | and add whatever extra info is needed as additional properties within `models.json`.
108 |
109 | In addition to the model code, every model needs to have at least a `title` and `languages` property.
110 |
111 | * `title`: This is what's shown (unlocalized) to the user.
112 | * `languages`: An array of ISO639 language codes. This is (or will be) what's used to group models when the user is browsing them.
113 |
--------------------------------------------------------------------------------
/i18n/he.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Amire80",
5 | "Ghsuvr",
6 | "Leononon",
7 | "YaronSh"
8 | ]
9 | },
10 | "title": "WikimediaOCR",
11 | "subtitle": "כלי לאחזור טקסט מתמונות סרוקות בוויקישיתוף, לשימוש בוויקיטקסט ובכל מקום אחר.",
12 | "form-heading": "לתמלל תמונה",
13 | "image-url": "כתובת URL של התמונה",
14 | "image-url-help": "נא להכניס כתובת URL של תמונה המאוחסנת בשרת ויקימדיה כמו: $1",
15 | "image-url-error": "קישור התמונה חייב להתחיל {{PLURAL:$1|בשם המתחם הבא|באחד משמות הבאים}} ולהסתיים בסיומת קובץ תקינה: $2",
16 | "image-alt-text": "התמונה המקורית",
17 | "language-code": "שפות (לא חובה)",
18 | "engine": "מנוע זיהוי התווים OCR",
19 | "engine-name-google": "Google Cloud Vision OCR",
20 | "engine-name-tesseract": "Tesseract OCR",
21 | "engine-name-transkribus": "Transkribus OCR",
22 | "engine-not-found-warning": "המנוע המבוקש $1 לא נמצא. נא לנסות את $2 במקום זה.",
23 | "engine-invalid-langs-warning": "השפות הבאות לא תקניות או שלא נתמכות על ידי המנוע ולכל נדחו: $1",
24 | "submit": "לתמלל את כל הדף",
25 | "submit-crop": "אזור תמלול",
26 | "drag-help": "נא לבחור את כלי החיתוך ולגרור את המלבן בתמונה למטה כדי לתמוך רק אזור אחד בדף.",
27 | "drag-mode-move": "גרירה תזיז את התמונה",
28 | "drag-mode-move-alt": "סמל מייצג פעולת 'הזזה'",
29 | "drag-mode-crop": "גרירה תיצור אזור חיתוך חדש",
30 | "drag-mode-crop-alt": "אייקון מייצג פעולת 'חיתוך'",
31 | "copy-to-clipboard": "העתקה ללוח",
32 | "copied-to-clipboard": "הועתק!",
33 | "google-error": "השירות של גוגל החזיר שגיאה: $1",
34 | "image-retrieval-failed": "אחזור התמונה נכשל: $1",
35 | "documentation": "תיעוד",
36 | "api": "API",
37 | "api-tooltip": "ר' את תיעוד ה־API",
38 | "version": "גרסה $1",
39 | "report-issue": "דיווח על טעות",
40 | "langs-placeholder": "יש להשאיר ריק לזיהוי שפה אוטומטי",
41 | "langs-param-error": "{{PLURAL:$1|השפה שציינת אינה נתמכת|השפות שציינת אינן נתמכות}} על־ידי מנוע זיהוי התווים: $2",
42 | "loading-message": "מתבצע זיהוי תווים...",
43 | "tesseract-options": "אפשרויות טסרקט",
44 | "tesseract-psm-label": "שיטת חלקות הדף למקטעים",
45 | "tesseract-psm-help": "יש לנסות את המצב 'טקסט דליל' לתמיכה טובה יותר בריבוי עמודות",
46 | "tesseract-psm-0": "כיוון הדף וזיהוי הכתב בלבד (OSD).",
47 | "tesseract-psm-1": "חלוקת דף אוטומטית למקטעים בשיטת כיוון הטקסט וזיהוי הכתב (OSD).",
48 | "tesseract-psm-2": "חלוקת דף אוטומטית למטקעים, אבל ללא OSD או OCR. (לא ממומש)",
49 | "tesseract-psm-3": "חלוקת דף אוטומטית למקטעים, אבל ללא OSD. (ברירת מחדל)",
50 | "tesseract-psm-4": "להניח עמודה בודדת של טקסט בגדלים משתנים.",
51 | "tesseract-psm-5": "להניח שהטקסט מיושר אנכית באופן אחיד.",
52 | "tesseract-psm-6": "להניח שהטקסט מיושר באופן אחיד.",
53 | "tesseract-psm-7": "להתייחס לתמונה כאל שורת טקסט אחת.",
54 | "tesseract-psm-8": "להתייחס לתמונה כאל מילה אחת.",
55 | "tesseract-psm-9": "להתייחס לתמונה כאל מילה בודדת בעיגול.",
56 | "tesseract-psm-10": "להתייחס לתמונה כאל תו בודד.",
57 | "tesseract-psm-11": "דילול הטקסט. מציאת כמה שיותר טקסט ללא סדר מסוים",
58 | "tesseract-psm-12": "דילול הטקסט בעזרת OSD.",
59 | "tesseract-psm-13": "שורה גולמית. להתייחס לתמונה כשורת טקסט אחת, ולעקוף אפשרויות מיוחדות לטסרקט.",
60 | "tesseract-param-error": "האפשרות $1 עם הערך $2 לא נתמכת על־ידי טסרקט. הערך המרבי הוא $3",
61 | "tesseract-no-text-error": "מנוע Tesseract לא החזיר טקסט לתמונה הזאת.",
62 | "tesseract-internal-error": "מנוע טסרקט החזיר שגיאה פנימית.",
63 | "transkribus-language-code": "מודל שפה",
64 | "transkribus-unauthorized-error": "קוד שגיאה ‚$1’ :: הבקשה לא מאומתת",
65 | "transkribus-default-error": "קוד שגיאה ‚$1’ :: לא ניתן להשלים את הבקשה, נא לנסות שוב!",
66 | "transkribus-empty-response-error": "לא היה אפשר לפענח את התוצאות מ־Transkribus API",
67 | "transkribus-init-process-error": "הפעלת תהליך ה־Transkribus נכשלה",
68 | "transkribus-failed-process-error": "תהליך Transkribus נכשל",
69 | "transkribus-no-lang-error": "לא נבחרה שפה",
70 | "transkribus-multiple-lang-error": "אסור להשתמש בכמה שפות, יש לציין שפה אחת",
71 | "transkribus-browse-public-models": "עיין בכל דגמי השפה הציבוריים עבור Transkribus",
72 | "transkribus-request-for-model": "הגשת בקשה להוספת דגם מ־Transkribus לכלי OCR",
73 | "transkribus-options": "אפשרויות Transkribus",
74 | "transkribus-line-label": "דגם זיהוי קו",
75 | "transkribus-line-id-none-option": "אין",
76 | "transkribus-mixed-line-option": "כיוון קו מעורב",
77 | "transkribus-line-help": "נא להשאיר את זה ריק אם אינך בטוח באיזה דגם זיהוי קו להשתמש",
78 | "transkribus-jobs": "משימות ל־Transkribus",
79 | "transkribus-job-id": "מזהה משימה",
80 | "transkribus-job-state": "מצב",
81 | "transkribus-job-description": "תיאור",
82 | "transkribus-job-start": "התחילה",
83 | "transkribus-job-end": "הסתיימה",
84 | "transkribus-job-waited": "השהיית התחלה (דקות)"
85 | }
86 |
--------------------------------------------------------------------------------
/src/Engine/TesseractEngine.php:
--------------------------------------------------------------------------------
1 | ocr = $tesseractOcr;
38 | }
39 |
40 | /**
41 | * @inheritDoc
42 | */
43 | public static function getId(): string {
44 | return 'tesseract';
45 | }
46 |
47 | /**
48 | * @inheritDoc
49 | */
50 | public function getResult(
51 | string $imageUrl,
52 | string $invalidLangsMode,
53 | array $crop,
54 | ?array $langs = null
55 | ): EngineResult {
56 | // Check the URL and fetch the image data.
57 | $this->checkImageUrl( $imageUrl );
58 |
59 | [ $validLangs, $invalidLangs ] = $this->filterValidLangs( $langs, $invalidLangsMode );
60 |
61 | $image = $this->getImage( $imageUrl, $crop, self::DO_DOWNLOAD_IMAGE );
62 | $this->ocr->imageData( $image->getData(), $image->getSize() );
63 |
64 | if ( $validLangs ) {
65 | $this->ocr->lang( ...$validLangs );
66 | }
67 |
68 | // Env vars are passed through by the thiagoalessio/tesseract_ocr package to the tesseract command,
69 | // but when they're loaded from Symfony's .env they aren't actually available (by design),
70 | // so we have to load this one manually. We only process one image at a time, so don't benefit from
71 | // multiple threads. See https://github.com/tesseract-ocr/tesseract/issues/898 for some more info.
72 | putenv( 'OMP_THREAD_LIMIT=1' );
73 | try {
74 | $text = $this->ocr->run();
75 | } catch ( UnsuccessfulCommandException $e ) {
76 | // An UnsuccessfulCommandException is thrown when there's no output, but that's not an
77 | // actual error so we check for it here and just show a warning. The same exception class
78 | // is also used for other things, hence the message check here.
79 | if ( strpos( $e->getMessage(), 'The command did not produce any output' ) !== false ) {
80 | return new EngineResult( '', [ $this->intuition->msg( 'tesseract-no-text-error' ) ] );
81 | }
82 | throw $e;
83 | }
84 |
85 | $warnings = $invalidLangs ? [ $this->getInvalidLangsWarning( $invalidLangs ) ] : [];
86 | return new EngineResult( $text, $warnings );
87 | }
88 |
89 | /**
90 | * Set the page segmentation mode.
91 | * @param int $psm
92 | */
93 | public function setPsm( int $psm ): void {
94 | $this->validateOption( 'psm', $psm, self::MAX_PSM );
95 | $this->ocr->psm( $psm );
96 | }
97 |
98 | /**
99 | * Get available PSM IDs and values.
100 | * @return mixed[][]
101 | */
102 | public function getAvailablePsms(): array {
103 | $psms = [];
104 | $psmIds = [ 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ];
105 | foreach ( $psmIds as $psmId ) {
106 | array_push( $psms, [
107 | 'value' => $psmId,
108 | // The following messages can be used here: 'tesseract-psm-0', 'tesseract-psm-1',
109 | // 'tesseract-psm-3', 'tesseract-psm-4', 'tesseract-psm-5', 'tesseract-psm-6', 'tesseract-psm-7',
110 | // 'tesseract-psm-8', 'tesseract-psm-9', 'tesseract-psm-10', 'tesseract-psm-11', 'tesseract-psm-12',
111 | // 'tesseract-psm-13'
112 | 'label' => $this->intuition->msg( 'tesseract-psm-' . $psmId ),
113 | ] );
114 | }
115 | return $psms;
116 | }
117 |
118 | /**
119 | * Validates the given option.
120 | * @param string $option
121 | * @param int $given
122 | * @param int $maximum
123 | * @throws OcrException
124 | */
125 | private function validateOption( string $option, int $given, int $maximum ): void {
126 | if ( $given > $maximum ) {
127 | throw new OcrException(
128 | 'tesseract-param-error',
129 | [
130 | $this->intuition->msg( "tesseract-$option-label" ),
131 | $given,
132 | $maximum,
133 | ]
134 | );
135 | }
136 | }
137 | }
138 |
--------------------------------------------------------------------------------
/i18n/fa.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Darafsh",
5 | "Ebrahim",
6 | "Ebraminio",
7 | "Jeeputer",
8 | "Mfatemi",
9 | "Yoosef Pooranvary"
10 | ]
11 | },
12 | "title": "OCR ویکیمدیا",
13 | "subtitle": "ابزاری برای رونویسی متن از تصاویر اسکنشده در ویکیانبار، برای استفاده در ویکینبشته و جاهای دیگر.",
14 | "form-heading": "رونویسی یک تصویر",
15 | "image-url": "نشانی تصویر",
16 | "image-url-help": "نشانی URL تصویری را که در کارگزار ویکیمدیا میزبانی شده است را وارد کنید، مانند: $1",
17 | "image-url-error": "نشانی URL تصویر باید با {{PLURAL:$1|the following domain name|one of the following domain names}} آغاز شده و با شناسه فایل معتبری پایان یابد: $2",
18 | "image-alt-text": "تصویر اصلی",
19 | "language-code": "زبانها (اختیاری)",
20 | "engine": "موتور OCR",
21 | "engine-name-transkribus": "Transkribus OCR",
22 | "engine-not-found-warning": "موتور درخواستی $1 پیدا نشد. به جای آن از موتور پیشفرض $2 استفاده میشود.",
23 | "engine-invalid-langs-warning": "زبانهای زیر نامعتبر بوده یا توسط موتور پشتیبانی نمیشوند و از آنها چشمپوشی شد: $1",
24 | "submit": "رونویسی تمام صفحه",
25 | "submit-crop": "ناحیه رونویسی",
26 | "drag-help": "ابزار برش را انتخاب کرده و چهارگوشی بر روی تصویر زیر بکشید تا فقط یک ناحیه از صفحه رونویسی شود.",
27 | "drag-mode-move": "با کشیدن تصویر حرکت خواهد کرد.",
28 | "drag-mode-move-alt": "آیکان نشاندهندهٔ عمل «جابجایی»",
29 | "drag-mode-crop": "با کشیدن ناحیه چیدن جدیدی درست خواهد شد",
30 | "drag-mode-crop-alt": "آیکان نشاندهندهٔ عمل «چیدن»",
31 | "copy-to-clipboard": "کپی به کلیپبورد",
32 | "copied-to-clipboard": "کپی شد!",
33 | "google-error": "سرویس گوگل خطایی برگرداند: $1",
34 | "image-retrieval-failed": "بازیابی تصویر ناموفق بود: $1",
35 | "documentation": "مستندات",
36 | "api-tooltip": "اسناد API را ببینید",
37 | "version": "نسخهٔ $1",
38 | "report-issue": "گزارش مشکل",
39 | "langs-placeholder": "برای شناسایی خودکار زبان، خالی بگذارید.",
40 | "langs-param-error": "این {{PLURAL:$1|زبان|زبانها}} توسط موتور OCR پشتیبانی نمیشوند: $2",
41 | "loading-message": "در حال انجام رونویسی...",
42 | "tesseract-options": "گزینههای Tesseract",
43 | "tesseract-psm-label": "روش بخشبندی صفحه",
44 | "tesseract-psm-help": "برای پشتیبانی بهتر از متن چندستونی «متن پراکنده» را انتخاب کن.",
45 | "tesseract-psm-0": "فقط تشخیص جهت و خط (OSD)",
46 | "tesseract-psm-1": "بخشبندی خودکار صفحه با OSD",
47 | "tesseract-psm-2": "بخشبندی خودکار صفحه، ولی بدون استفاده از OSD یا OCR. (هنوز اجرایی نشده است)",
48 | "tesseract-psm-3": "بخشبندی کاملاً خودکار صفحه، ولی بدون OSD. (پیشفرض)",
49 | "tesseract-psm-4": "فرض کن متن تکستونی با اندازههای گوناگون است.",
50 | "tesseract-psm-5": "فرض کن بلوک یکنواختی از متن با چینش عمودی است.",
51 | "tesseract-psm-6": "فرض کن متن یک بلوک یکنواخت است.",
52 | "tesseract-psm-7": "با تصویر مانند یک خط متن رفتار کن.",
53 | "tesseract-psm-8": "با تصویر به عنوان تکواژه رفتار کن.",
54 | "tesseract-psm-9": "با تصویر به عنوان تکواژهای در یک دایره رفتار کن.",
55 | "tesseract-psm-10": "با تصویر به عنوان یک نویسه رفتار کن",
56 | "tesseract-psm-11": "متن پراکنده. تا جایی که ممکن است بدون توجه به ترتیب، متن پیدا کن.",
57 | "tesseract-psm-12": "متن پراکنده با OSD",
58 | "tesseract-psm-13": "خط خام. با تصویر به عنوان یک خط متن رفتار کن و از ترفندهای اختصاصی Tesseract صرف نظر کن.",
59 | "tesseract-param-error": "گزینه $1 با مقدار $2 توسط Tesseract پشتیبانی نمیشود. مقدار بیشینه: $3",
60 | "tesseract-no-text-error": "موتور Tesseract هیچ متنی برای این تصویر بازنگرداند.",
61 | "tesseract-internal-error": "موتور tesseract خطای داخلی برگرداند.",
62 | "transkribus-language-code": "مدل زبانی",
63 | "transkribus-unauthorized-error": "کد خطا ' $1 ' :: درخواست مجاز نیست",
64 | "transkribus-default-error": "کد خطا ' $1 ' :: درخواست تکمیل نشد، دوباره امتحان کنید!",
65 | "transkribus-empty-response-error": "نتیجه بازگردانیشده از API ترنسکریبوس قابل تشخیص و تجزیه نیست",
66 | "transkribus-init-process-error": "فرآیند Transkribus راهاندازی نشد",
67 | "transkribus-failed-process-error": "فرآیند Transkribus ناموفق بود",
68 | "transkribus-no-lang-error": "هیچ زبانی را انتخاب نکردید",
69 | "transkribus-multiple-lang-error": "نمیتوانید چند زبان را انتخاب کنید، پس فقط یکی را برگزینید",
70 | "transkribus-browse-public-models": "مشاهده همه مدلهای زبانی در ترنسکریبوس",
71 | "transkribus-request-for-model": "ثبت درخواست افزودن یک مدل ترنسکریبوس به ابزار رونویسی",
72 | "transkribus-options": "گزینههای ترنسکربوس",
73 | "transkribus-line-label": "مدل تشخیص خط",
74 | "transkribus-line-id-none-option": "هیچ کدام",
75 | "transkribus-mixed-line-option": "چیدمان مختلط خطوط",
76 | "transkribus-line-help": "اگر نمیدانید از کدام مدل تشخیص خطوط استفاده کنید این مورد را خالی بگذارید",
77 | "transkribus-jobs": "وظایف ترنسکریبوس",
78 | "transkribus-job-id": "وظیفه ID",
79 | "transkribus-job-state": "وضعیت",
80 | "transkribus-job-description": "توضیحات",
81 | "transkribus-job-start": "آغاز شده",
82 | "transkribus-job-end": "پایان",
83 | "transkribus-job-waited": "تأخیر آغاز (دقیقه)"
84 | }
85 |
--------------------------------------------------------------------------------
/i18n/ar.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Dr-Taher",
5 | "Mdktb",
6 | "Meno25",
7 | "Mohanad",
8 | "Mohanad Kh"
9 | ]
10 | },
11 | "title": "ويكيميديا-أداة التعرف على الحروف",
12 | "subtitle": "أداة لنقل النصوص من الصور الممسوحة ضوئيًا على ويكيميديا كومنز، لاستخدامها على ويكي مصدر وأماكن أخرى.",
13 | "form-heading": "نسخ صورة",
14 | "image-url": "عنوان صفحة الشبكة للصورة (URL)",
15 | "image-url-help": "أدخل عنوان صفحة الشبكة لصورة مرفوعة على أحد خوادم ويكيميديا مثل: $1",
16 | "image-url-error": "يجب أن يبدأ عنوان صفحة الشبكة للصورة بـ {{PLURAL:$1|اسم النطاق التالي|أحد أسماء النطاقات التالية}} وينتهي بامتداد ملف صالح: $2",
17 | "image-alt-text": "الصورة الأصلية",
18 | "language-code": "اللغات (اختياري)",
19 | "engine": "أداة التعرف على الحروف",
20 | "engine-name-google": "التعرف الضوئي على الحروف في Google Cloud Vision",
21 | "engine-name-tesseract": "برنامج Tesseract OCR",
22 | "engine-name-transkribus": "ترانسكريبوس OCR",
23 | "engine-not-found-warning": "لم يتم العثور على الأداة المطلوبة ' $1 '. استخدام الأداة الافتراضية \" $2 \" بدلاً من ذلك.",
24 | "engine-invalid-langs-warning": "اللغات التالية غير صالحة أو غير مدعومة من قبل الأداة وجرى تجاهلها: $1",
25 | "submit": "نسخ الصفحة بأكملها",
26 | "submit-crop": "منطقة النسخ",
27 | "drag-help": "حدد أداة الاقتصاص واسحب مستطيلًا على الصورة أدناه لتحديد مساحة واحدة فقط من الصفحة.",
28 | "drag-mode-move": "السحب سيحرك الصورة",
29 | "drag-mode-move-alt": "رمز يمثل إجراء \"النقل\".",
30 | "drag-mode-crop": "سيؤدي السحب إلى إنشاء منطقة اقتصاص جديدة",
31 | "drag-mode-crop-alt": "رمز يمثل إجراء \"الاقتصاص\".",
32 | "copy-to-clipboard": "نسخ إلى الحافظة",
33 | "copied-to-clipboard": "نُسِخت!",
34 | "google-error": "أظهرت خدمة جوجل خطأً: $1",
35 | "image-retrieval-failed": "فشل استرداد الصورة: $1",
36 | "documentation": "التوثيق",
37 | "api": "واجهة برمجة التطبيقات",
38 | "api-tooltip": "عرض وثائق API",
39 | "version": "الإصدار $1",
40 | "report-issue": "أبلغ عن مشكلة",
41 | "langs-placeholder": "اتركه فارغًا للكشف التلقائي عن اللغة.",
42 | "langs-param-error": "التالي {{PLURAL:$1|هذه اللغة|هذه اللغات}} لا يدعمها أداة التعرف على الحروف: $2",
43 | "loading-message": "جاري النسخ...",
44 | "tesseract-options": "خيارات Tesseract",
45 | "tesseract-psm-label": "طريقة تجزئة الصفحة",
46 | "tesseract-psm-help": "جرب \"Sparse text\" للحصول على دعم أفضل للأعمدة المتعددة.",
47 | "tesseract-psm-0": "اكتشاف الاتجاه والنص (OSD) فقط.",
48 | "tesseract-psm-1": "تقسيم تلقائي للصفحة باستخدام OSD.",
49 | "tesseract-psm-2": "تجزئة الصفحة تلقائيًا، ولكن بدون OSD أو OCR. (لم تنفذ)",
50 | "tesseract-psm-3": "تجزئة تلقائية بالكامل للصفحة، ولكن بدون OSD. (افتراضي)",
51 | "tesseract-psm-4": "افترض عمودًا واحدًا من النص ذي الأحجام المتغيرة.",
52 | "tesseract-psm-5": "افترض كتلة نصية واحدة متجانسة، مع محازاة رأسية.",
53 | "tesseract-psm-6": "افترض وجود كتلة نصية واحدة موحدة.",
54 | "tesseract-psm-7": "تعامل مع الصورة كسطر نصي واحد.",
55 | "tesseract-psm-8": "تعامل مع الصورة ككلمة واحدة.",
56 | "tesseract-psm-9": "تعامل مع الصورة ككلمة واحدة في دائرة.",
57 | "tesseract-psm-10": "تعامل مع الصورة كحرف واحد.",
58 | "tesseract-psm-11": "نص متفرق. ابحث عن أكبر قدر ممكن من النص بدون ترتيب معين.",
59 | "tesseract-psm-12": "نصوص متفرقة مع OSD.",
60 | "tesseract-psm-13": "سطر أولي. تعامل مع الصورة كسطر نصي واحد، متجاوزًا الاختراقات الخاصة بـ Tesseract.",
61 | "tesseract-param-error": "لا تدعم Tesseract خيار \"$1\" بقيمة $2. الحد الأقصى للقيمة: $3",
62 | "tesseract-no-text-error": "لم يرد محرك Tesseract أي نص لهذه الصورة.",
63 | "tesseract-internal-error": "أظهرت أداة tesseract خطأ داخلي.",
64 | "transkribus-language-code": "نموذج اللغة",
65 | "transkribus-unauthorized-error": "رمز الخطأ '$1' :: الطلب غير مصرح به",
66 | "transkribus-default-error": "رمز الخطأ '$1' :: غير قادر على إكمال الطلب، حاول مرة أخرى!",
67 | "transkribus-empty-response-error": "لم يتمكن من تحليل النتيجة من واجهة برمجة تطبيقات Transkribus",
68 | "transkribus-init-process-error": "فشل في تهيئة عملية Transkribus",
69 | "transkribus-failed-process-error": "فشلت عملية Transkribus",
70 | "transkribus-no-lang-error": "لم يتم اختيار اللغة",
71 | "transkribus-multiple-lang-error": "لا يُسمح باستخدام لغات متعددة، حدد لغة واحدة",
72 | "transkribus-browse-public-models": "تصفح جميع نماذج اللغة العامة لـ Transkribus",
73 | "transkribus-request-for-model": "قم بتقديم طلب لإضافة نموذج من Transkribus إلى أداة OCR",
74 | "transkribus-options": "خيارات ترانسكريبوس",
75 | "transkribus-line-label": "نموذج اكتشاف الخط",
76 | "transkribus-line-id-none-option": "لا شيء",
77 | "transkribus-mixed-line-option": "اتجاه الخط المختلط",
78 | "transkribus-line-help": "اتركه فارغًا إذا لم تكن متأكدًا من نموذج اكتشاف الخط الذي يجب استخدامه",
79 | "transkribus-jobs": "وظائف ترانسكريبوس",
80 | "transkribus-job-id": "معرف الوظيفة",
81 | "transkribus-job-state": "الحالة",
82 | "transkribus-job-description": "الوصف",
83 | "transkribus-job-start": "بدأ",
84 | "transkribus-job-end": "انتهى",
85 | "transkribus-job-waited": "تأخير البدء (بالدقائق)"
86 | }
87 |
--------------------------------------------------------------------------------
/i18n/sl.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Eleassar"
5 | ]
6 | },
7 | "title": "WikimediaOCR",
8 | "subtitle": "Prepisovanje besedila s slik",
9 | "form-heading": "Prepis slike",
10 | "image-url": "URL slike",
11 | "image-url-help": "Vstavite URL slike, ki gostuje v strežniku Wikimedie, npr.: $1",
12 | "image-url-error": "URL slike se mora začeti z {{PLURAL:$1|naslednjim domenskim imenom|enim od naslednjih domenskih imen}} in se končati z veljavno datotečno pripono: $2",
13 | "image-alt-text": "Izvorna slika",
14 | "language-code": "Jeziki (neobvezno)",
15 | "engine": "Motor OCR",
16 | "engine-name-google": "OCR Google Cloud Vision",
17 | "engine-name-transkribus": "Transkribus OCR",
18 | "engine-not-found-warning": "Zahtevanega motorja »$1« ni bilo mogoče najti. Namesto tega uporabite privzeti motor »$2«.",
19 | "engine-invalid-langs-warning": "Naslednji jeziki so neveljavni ali jih motor ne podpira in so bili prezrti: $1",
20 | "submit": "Prepiši celotno stran",
21 | "submit-crop": "Prepiši območje",
22 | "drag-help": "Izberite orodje za obrezovanje in povlecite pravokotnik na spodnji sliki, da prepišete samo določeno območje strani.",
23 | "drag-mode-move": "Z vlečenjem boste premaknili sliko",
24 | "drag-mode-move-alt": "Ikona, ki predstavlja dejanje »premakni«.",
25 | "drag-mode-crop": "Z vlečenjem boste določili novo območje za obrez",
26 | "drag-mode-crop-alt": "Ikona, ki predstavlja dejanje »obreži«.",
27 | "copy-to-clipboard": "Kopiraj v odložišče",
28 | "copied-to-clipboard": "Kopirano!",
29 | "google-error": "Googlova storitev je vrnila napako: $1",
30 | "image-retrieval-failed": "Pridobivanje slike ni uspelo: $1",
31 | "documentation": "Dokumentacija",
32 | "api-tooltip": "Oglejte si dokumentacijo API-ja",
33 | "version": "Različica $1",
34 | "report-issue": "Sporočite težavo",
35 | "langs-placeholder": "Za samodejno prepoznavo jezika pustite prazno.",
36 | "langs-param-error": "{{PLURAL:$1|Naslednjega jezika|Naslednjih jezikov}} motor OCR ne podpira: $2",
37 | "tesseract-options": "Možnosti Tesseracta",
38 | "tesseract-psm-label": "Način segmentacije strani",
39 | "tesseract-psm-help": "Za boljšo podporo več stolpcev preizkusite »Sparse text«.",
40 | "tesseract-psm-0": "Samo usmeritev in prepoznava pisave (OSD).",
41 | "tesseract-psm-1": "Samodejna segmentacija strani z OSD.",
42 | "tesseract-psm-2": "Samodejna segmentacija strani, vendar brez OSD ali OCR. (ni implementirano)",
43 | "tesseract-psm-3": "Popolnoma samodejna segmentacija strani, vendar brez OSD. (privzeto)",
44 | "tesseract-psm-4": "Predpostavi en sam stolpec besedila različnih velikosti.",
45 | "tesseract-psm-5": "Predpostavi en sam enotni blok navpično poravnanega besedila.",
46 | "tesseract-psm-6": "Predpostavi en sam enotni blok besedila.",
47 | "tesseract-psm-7": "Sliko obravnavaj kot eno samo vrstico besedila.",
48 | "tesseract-psm-8": "Sliko obravnavaj kot eno samo besedo.",
49 | "tesseract-psm-9": "Sliko obravnavaj kot eno samo besedo v krogu.",
50 | "tesseract-psm-10": "Sliko obravnavaj kot en sam znak.",
51 | "tesseract-psm-11": "Pičlo besedilo. Poišči čim več besedila brez določenega vrstnega reda.",
52 | "tesseract-psm-12": "Pičlo besedilo z OSD.",
53 | "tesseract-psm-13": "Neobdelana vrstica. Obravnavaj sliko kot eno samo vrstico besedila, da se preprečijo artefakti, specifični za Tesseract.",
54 | "tesseract-param-error": "Možnosti »$1« z vrednostjo $2 Tesseract ne podpira. Najvišja vrednost: $3",
55 | "tesseract-internal-error": "Motor Tesseract je vrnil notranjo napako.",
56 | "transkribus-language-code": "Jezikovni model",
57 | "transkribus-unauthorized-error": "Koda napake »$1« :: Zahtevek ni pooblaščen",
58 | "transkribus-default-error": "Koda napake »$1« :: Zahtevka ni mogoče dokončati, poskusite znova!",
59 | "transkribus-empty-response-error": "Rezultata API-ja Transkribus ni bilo mogoče razčleniti",
60 | "transkribus-init-process-error": "Inicializacija postopka Transkribus ni uspela",
61 | "transkribus-failed-process-error": "Postopek Transkribus ni uspel",
62 | "transkribus-no-lang-error": "Izbran ni bil noben jezik",
63 | "transkribus-multiple-lang-error": "Več jezikov ni dovoljenih, določite en jezik",
64 | "transkribus-browse-public-models": "Prebrskajte vse javne jezikovne modele za Transkribus",
65 | "transkribus-request-for-model": "Ustvarite prošnjo za dodajanje modela iz Transkribusa v orodje OCR",
66 | "transkribus-options": "Možnosti Transkribus",
67 | "transkribus-line-label": "Model zaznavanja linij",
68 | "transkribus-line-id-none-option": "Noben",
69 | "transkribus-mixed-line-option": "Mešana usmeritev linij",
70 | "transkribus-line-help": "Če niste prepričani, kateri model zaznavanja linij uporabiti, pustite prazno.",
71 | "transkribus-jobs": "Opravila Transkribus",
72 | "transkribus-job-id": "ID opravila",
73 | "transkribus-job-state": "Stanje",
74 | "transkribus-job-description": "Opis",
75 | "transkribus-job-start": "Začeto",
76 | "transkribus-job-end": "Končano",
77 | "transkribus-job-waited": "Začetna zakasnitev (minute)"
78 | }
79 |
--------------------------------------------------------------------------------
/i18n/krc.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Къарачайлы"
5 | ]
6 | },
7 | "title": "ВикимедиаOCR",
8 | "subtitle": "Суратдан текстни ачыкъла",
9 | "form-heading": "Суратны ачыкъла",
10 | "image-url": "Суратны URL-и",
11 | "image-url-help": "Викимедиа серверде орналгъан суратны URL-ин салыгъы, сёз ючюн: $1",
12 | "image-url-error": "Сурат URL-и {{PLURAL:$1|бу домен ат бла|бу домен атланы бири бла}} башланыргъа керекди эмда джараулу файл кенгериу бла бошалыргъа керекди: $2",
13 | "image-alt-text": "Оригинал сурат",
14 | "language-code": "Тилле (амалсыз тюлдюле)",
15 | "engine": "OCR мотор",
16 | "engine-name-transkribus": "Transkribus OCR",
17 | "engine-not-found-warning": "Изленнген мотор' $1 ' табылалмады. Буну орнуна дефолт мотор ' $2 ' хайырланады.",
18 | "engine-invalid-langs-warning": "Тюбюндеги тилле джараусуздула неда мотор джанындан дагъан алмайдыла, неда джокъгъа саналадыла: $1",
19 | "submit": "Бютеу бетни транскрипция эт",
20 | "submit-crop": "Транскрипция къыр",
21 | "drag-help": "Бетни джангы бир джерин копия этер ючюн къыркъыу адырны сайла эмда тюбюндеги суратда тик тёртмюйюшню тарт.",
22 | "drag-mode-move": "Суратны тартыу, аны орнундан тебдирликди",
23 | "drag-mode-move-alt": "'Ташыу' этиуню кёргюзген белги.",
24 | "drag-mode-crop": "Тартыу, джангы къыркъыу аламны къурлукъду",
25 | "drag-mode-crop-alt": "'Къыркъыу' этиуню кёргюзген белги.",
26 | "copy-to-clipboard": "Алмашдырыу буферге копия эт",
27 | "copied-to-clipboard": "Копия этилди!",
28 | "google-error": "Google къуллукъ халат къайтарды: $1",
29 | "image-retrieval-failed": "Сурат алынамады: $1",
30 | "documentation": "Документация",
31 | "api-tooltip": "API документациягъа къара",
32 | "version": "Версия $1",
33 | "report-issue": "Проблеманы юсюнден билдир",
34 | "langs-placeholder": "Тилни автомат халда айгъакълар ючюн къырны бош къойгъуз.",
35 | "langs-param-error": "Бу {{PLURAL:$1|тил|тилле}}, OCR мотор джанындан дагъан болмайдыла: $2",
36 | "loading-message": "Транскрипция этиле турады...",
37 | "tesseract-options": "Tesseract джарашдырыула",
38 | "tesseract-psm-label": "Бетни бёлюмлеме амал",
39 | "tesseract-psm-help": "Талай багъананы дагъаны ючюн \"Аралыкълы текстни\" сынагъыз.",
40 | "tesseract-psm-0": "Къуру ориентация бла скриптлени табыу (OSD)",
41 | "tesseract-psm-1": "OSD бла автомат халда бетни бёлюмлеме.",
42 | "tesseract-psm-2": "Автомат халда бет бёлюмлеме, алай OSD неда OCR болмагъанлай. (этилмеди)",
43 | "tesseract-psm-3": "Толу автомат халда бет бёлюмлеме,а лай OSD тышында. (Дефолт)",
44 | "tesseract-psm-4": "Тюрленнген ёлчемледе джангыз бир текст багъананы баргъа сана.",
45 | "tesseract-psm-5": "Тик халда тюзетилген текстни джангыз бир типли блогу баргъа сана.",
46 | "tesseract-psm-6": "Джангыз бир типли текст блокну баргъа сана.",
47 | "tesseract-psm-7": "Суратха бир текст тизгинча къара.",
48 | "tesseract-psm-8": "Суратха бир джангыз сёза къара.",
49 | "tesseract-psm-9": "Суратны тёгерек ичинде джангыз сёзча кёр.",
50 | "tesseract-psm-10": "Суратха джангыз символча къара.",
51 | "tesseract-psm-11": "Аралыкълы текст. Белгили низам бла не къадар кёб текст табалсанг таб.",
52 | "tesseract-psm-12": "OSD бла аралыкълы текст.",
53 | "tesseract-psm-13": "Чий тизгин. Tesseract энчи хакланы атлатыб, джангыз текст тизгин кибик къара.",
54 | "tesseract-param-error": "$2 багъагъа ие болгъан \"$1\" сайлама Tesseract джанындан дагъан болмаз. Максимум багъасы: $3",
55 | "tesseract-no-text-error": "Tesseract тебдириучю бу сурат ючюн текстни къайтармады.",
56 | "tesseract-internal-error": "Tesseract механизм ич халатны ызына къайтарды.",
57 | "transkribus-language-code": "Тил модель",
58 | "transkribus-unauthorized-error": "Халатны коду «$1» :: Излем авторизация ётмегенди",
59 | "transkribus-default-error": "Халатны коду «$1» :: Излем тамамланалмайды, джангыдан сынагъыз!",
60 | "transkribus-empty-response-error": "Transkribus API эсеб анализ этилалмады.",
61 | "transkribus-init-process-error": "Transkribus процессни инициализациясы джетишимсиз болду",
62 | "transkribus-failed-process-error": "Transkribus процесс джетишимсиз болду",
63 | "transkribus-no-lang-error": "Сайланнган тил джокъду",
64 | "transkribus-multiple-lang-error": "Талай тил хайырланыргъа болмайды, бирни белгилегиз",
65 | "transkribus-browse-public-models": "Transkribus ючюн бютеу ачыкъ тил моделлега къара",
66 | "transkribus-request-for-model": "OCR адыргъа Transkribus моделин къошаргъа излем эт",
67 | "transkribus-options": "Transkribus Опцияла",
68 | "transkribus-line-label": "Сызны Эслеген Модель",
69 | "transkribus-line-id-none-option": "Джокъду",
70 | "transkribus-mixed-line-option": "Сызны Къатыш Ориентациясы",
71 | "transkribus-line-help": "Къайсы сыз эслеучю моделни хайырланыргъа билмей эсегиз, бош къоюгъуз.",
72 | "transkribus-jobs": "Transkribus Вакансия",
73 | "transkribus-job-id": "Вакансия ID",
74 | "transkribus-job-state": "Хал",
75 | "transkribus-job-description": "Ачыкълау",
76 | "transkribus-job-start": "Башланнганды",
77 | "transkribus-job-end": "Бошалгъанды",
78 | "transkribus-job-waited": "Башлауну кечикгени (минутла)"
79 | }
80 |
--------------------------------------------------------------------------------
/i18n/it.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Ajeje Brazorf",
5 | "Beta16",
6 | "Candalua"
7 | ]
8 | },
9 | "title": "WikimediaOCR",
10 | "subtitle": "Uno strumento per trascrivere testo da immagini scansionate su Wikimedia Commons, per l'uso su Wikisource e altrove.",
11 | "form-heading": "Trascrivi un'immagine",
12 | "image-url": "URL dell'immagine",
13 | "image-url-help": "Inserisci l'URL di un'immagine ospitata su un server Wikimedia come: $1",
14 | "image-url-error": "L'URL dell'immagine deve iniziare con {{PLURAL:$1|il seguente nome di dominio|uno dei seguenti nomi di dominio}} e terminare con un'estensione di file valida: $2",
15 | "image-alt-text": "L'immagine originale",
16 | "language-code": "Lingue (facoltativo)",
17 | "engine": "Motore di OCR",
18 | "engine-name-transkribus": "Transkribus OCR",
19 | "engine-not-found-warning": "Il motore richiesto '$1' non è stato trovato. Al suo posto verrà usato il motore predefinito '$2'.",
20 | "engine-invalid-langs-warning": "Le seguenti lingue non sono valide o non sono supportate dal motore e sono state ignorate: $1",
21 | "submit": "Trascrivi tutta la pagina",
22 | "submit-crop": "Trascrivi un'area",
23 | "drag-help": "Seleziona lo strumento di ritaglio e trascina un rettangolo sull'immagine sottostante per trascrivere solo un'area della pagina.",
24 | "drag-mode-move": "Il trascinamento sposterà l'immagine",
25 | "drag-mode-move-alt": "Icona che rappresenta l'azione 'sposta'.",
26 | "drag-mode-crop": "Il trascinamento creerà una nuova area di ritaglio",
27 | "drag-mode-crop-alt": "Icona che rappresenta l'azione 'ritaglia'.",
28 | "copy-to-clipboard": "Copia negli appunti",
29 | "copied-to-clipboard": "Copiato!",
30 | "google-error": "Il servizio Google ha restituito un errore: $1",
31 | "image-retrieval-failed": "Recupero dell'immagine non riuscito: $1",
32 | "documentation": "Documentazione",
33 | "api-tooltip": "Vedi la documentazione dell'API",
34 | "version": "Versione $1",
35 | "report-issue": "Segnala un problema",
36 | "langs-placeholder": "Lascia vuoto per il rilevamento automatico della lingua.",
37 | "langs-param-error": "{{PLURAL:$1|La lingua seguente non è supportata|Le lingue seguenti non sono supportate}} dal motore OCR: $2",
38 | "tesseract-options": "Opzioni di Tesseract",
39 | "tesseract-psm-label": "Metodo di segmentazione della pagina",
40 | "tesseract-psm-help": "Prova \"Testo sparso\" per un migliore supporto multicolonna.",
41 | "tesseract-psm-0": "Solo rilevamento orientamento e script (OSD).",
42 | "tesseract-psm-1": "Segmentazione automatica delle pagine con OSD.",
43 | "tesseract-psm-2": "Segmentazione automatica della pagina, ma nessun OSD o OCR. (non implementato)",
44 | "tesseract-psm-3": "Segmentazione della pagina completamente automatica, ma nessun OSD. (Predefinito)",
45 | "tesseract-psm-4": "Presupponi una singola colonna di testo di dimensioni variabili.",
46 | "tesseract-psm-5": "Presupponi un singolo blocco uniforme di testo allineato verticalmente.",
47 | "tesseract-psm-6": "Presupponi un singolo blocco uniforme di testo.",
48 | "tesseract-psm-7": "Tratta l'immagine come una singola riga di testo.",
49 | "tesseract-psm-8": "Tratta l'immagine come una singola parola.",
50 | "tesseract-psm-9": "Tratta l'immagine come una singola parola in un cerchio.",
51 | "tesseract-psm-10": "Tratta l'immagine come un singolo carattere.",
52 | "tesseract-psm-11": "Testo sparso. Trova più testo possibile senza un ordine particolare.",
53 | "tesseract-psm-12": "Testo sparso con OSD.",
54 | "tesseract-psm-13": "Linea grezza. Tratta l'immagine come una singola riga di testo, aggirando gli hack specifici di Tesseract.",
55 | "tesseract-param-error": "L'opzione '$1' con un valore di $2 non è supportata da Tesseract. Valore massimo: $3",
56 | "tesseract-internal-error": "Il motore Tesseract ha restituito un errore interno.",
57 | "transkribus-language-code": "Modello linguistico",
58 | "transkribus-unauthorized-error": "Codice di errore '$1' :: La richiesta non è autorizzata",
59 | "transkribus-default-error": "Codice di errore '$1' :: Impossibile completare la richiesta, riprova!",
60 | "transkribus-empty-response-error": "Impossibile analizzare il risultato dall'API Transkribus",
61 | "transkribus-init-process-error": "Impossibile inizializzare il processo Transkribus",
62 | "transkribus-failed-process-error": "Il processo Transkribus non è riuscito",
63 | "transkribus-no-lang-error": "Non è stata selezionata alcuna lingua",
64 | "transkribus-multiple-lang-error": "Non sono consentite più lingue, specificare una lingua",
65 | "transkribus-browse-public-models": "Sfoglia tutti i modelli di linguaggio pubblico per Transkribus",
66 | "transkribus-request-for-model": "Fai una richiesta per aggiungere un modello da Transkribus allo strumento OCR",
67 | "transkribus-line-id-none-option": "Nessuno",
68 | "transkribus-job-id": "ID attività",
69 | "transkribus-job-state": "Stato",
70 | "transkribus-job-description": "Descrizione",
71 | "transkribus-job-start": "Iniziato",
72 | "transkribus-job-end": "Finito",
73 | "transkribus-job-waited": "Ritardo di avvio (minuti)"
74 | }
75 |
--------------------------------------------------------------------------------
/i18n/tr.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Hedda",
5 | "Leo",
6 | "Slickdaddy"
7 | ]
8 | },
9 | "title": "WikimediaOCR",
10 | "subtitle": "Wikimedia Commons'ta taranmış görsellerden metinleri yazıya dökmek ve Vikikaynak ve diğer yerlerde kullanmak için bir araç.",
11 | "form-heading": "Bir görüntüyü metne dönüştürün",
12 | "image-url": "Resim URL'si",
13 | "image-url-help": "Bir Wikimedia sunucusunda barındırılan bir resim URL'si ekleyin, örneğin: $1",
14 | "image-url-error": "Resim URL'si {{PLURAL:$1|şu alan adıyla|şu alan adlarından biriyle}} başlamalı ve geçerli bir dosya uzantısıyla bitmelidir: $2",
15 | "image-alt-text": "Özgün resim",
16 | "language-code": "Diller (isteğe bağlı)",
17 | "engine": "OCR motoru",
18 | "engine-name-transkribus": "Transkribus OCR",
19 | "engine-not-found-warning": "İstenen motor ' $1 ' bulunamadı. Bunun yerine varsayılan motor ' $2 ' kullanılıyor.",
20 | "engine-invalid-langs-warning": "Aşağıdaki diller geçersizdir veya altyapı tarafından desteklenmez ve yok sayılır: $1",
21 | "submit": "Tüm sayfayı transkript et",
22 | "submit-crop": "Transkript alanı",
23 | "drag-help": "Sayfanın yalnızca bir alanını kopyalamak için kırpma aracını seçin ve aşağıdaki resimde bir dikdörtgeni sürükleyin.",
24 | "drag-mode-move": "Sürüklemek görüntüyü hareket ettirir",
25 | "drag-mode-move-alt": "'Taşı' eylemini temsil eden simge.",
26 | "drag-mode-crop": "Sürükleme, yeni bir kırpma alanı oluşturacak",
27 | "drag-mode-crop-alt": "'Kırpma' eylemini temsil eden simge.",
28 | "copy-to-clipboard": "Panoya kopyala",
29 | "copied-to-clipboard": "Kopyalandı!",
30 | "google-error": "Google hizmeti bir hata verdi: $1",
31 | "image-retrieval-failed": "Resim alınamadı: $1",
32 | "documentation": "Belgelendirme",
33 | "api": "API",
34 | "api-tooltip": "API belgelerini görüntüleyin",
35 | "version": "Sürüm $1",
36 | "report-issue": "Sorun bildirin",
37 | "langs-placeholder": "Otomatik dil algılama için boş bırakın.",
38 | "langs-param-error": "Şu {{PLURAL:$1|dil|diller}}, OCR motoru tarafından desteklenmemektedir: $2",
39 | "loading-message": "Transkripsiyon gerçekleştiriliyor...",
40 | "tesseract-options": "Tesseract seçenekleri",
41 | "tesseract-psm-label": "Sayfa bölümleme yöntemi",
42 | "tesseract-psm-help": "Daha iyi çoklu sütun desteği için \"Aralıklı metin\"i deneyin.",
43 | "tesseract-psm-0": "Yalnızca yönlendirme ve komut dosyası algılama (OSD).",
44 | "tesseract-psm-1": "OSD ile otomatik sayfa bölümleme.",
45 | "tesseract-psm-2": "Otomatik sayfa bölümleme ancak OSD veya OCR mevcut değil. (uygulanmadı)",
46 | "tesseract-psm-3": "Tam otomatik sayfa bölümleme ancak OSD mevcut değil. (Varsayılan)",
47 | "tesseract-psm-4": "Değişken boyutlarda tek bir metin sütunu varsayın.",
48 | "tesseract-psm-5": "Dikey olarak hizalanmış tek bir tek tip metin bloğu varsayın.",
49 | "tesseract-psm-6": "Tek bir tek tip metin bloğu varsayın.",
50 | "tesseract-psm-7": "Resmi, tek bir metin satırı olarak ele alın.",
51 | "tesseract-psm-8": "Resmi, tek bir kelime olarak ele alın.",
52 | "tesseract-psm-9": "Resmi bir daire içinde tek bir kelime olarak ele alın.",
53 | "tesseract-psm-10": "Resmi, tek bir karakter olarak ele alın.",
54 | "tesseract-psm-11": "Aralıklı metin. Belirli bir sırayla mümkün olduğunca fazla metin bulun.",
55 | "tesseract-psm-12": "OSD ile aralıklı metin.",
56 | "tesseract-psm-13": "Ham satır. Görüntüyü, Tesseract'a özgü saldırıları atlayarak tek bir metin satırı olarak ele alın.",
57 | "tesseract-param-error": "$2 değerine sahip '$1' seçeneği Tesseract tarafından desteklenmez. Maksimum değer: $3",
58 | "tesseract-no-text-error": "Tesseract motoru bu görüntü için herhangi bir metin döndürmedi.",
59 | "tesseract-internal-error": "Tesseract motoru dahili bir hata verdi.",
60 | "transkribus-language-code": "Dil Modeli",
61 | "transkribus-unauthorized-error": "Hata Kodu '$1' :: İstek yetkilendirilmedi",
62 | "transkribus-default-error": "Hata Kodu '$1' :: İstek tamamlanamadı, tekrar deneyin!",
63 | "transkribus-empty-response-error": "Transkribus API'sinden sonuç ayrıştırılamadı",
64 | "transkribus-init-process-error": "Transkribus işlemi başlatılamadı",
65 | "transkribus-failed-process-error": "Transkribus işlemi başarısız oldu",
66 | "transkribus-no-lang-error": "Dil seçilmedi",
67 | "transkribus-multiple-lang-error": "Birden fazla dile izin verilmiyor, bir dil belirtin",
68 | "transkribus-browse-public-models": "Transkribus için tüm genel dil modellerine göz atın",
69 | "transkribus-request-for-model": "Transkribus'tan OCR aracına bir model eklemek için istekte bulunun",
70 | "transkribus-options": "Transkribus Seçenekleri",
71 | "transkribus-line-label": "Çizgi Algılama Modeli",
72 | "transkribus-line-id-none-option": "Hiçbiri",
73 | "transkribus-mixed-line-option": "Karışık Çizgi Yönü",
74 | "transkribus-line-help": "Hangi hat algılama modelini kullanacağınızdan emin değilseniz boş bırakın",
75 | "transkribus-jobs": "Transkribus İşleri",
76 | "transkribus-job-id": "İş Kimliği",
77 | "transkribus-job-state": "Durum",
78 | "transkribus-job-description": "Açıklama",
79 | "transkribus-job-start": "Başladı",
80 | "transkribus-job-end": "Bitti",
81 | "transkribus-job-waited": "Başlatma gecikmesi (dakika)"
82 | }
83 |
--------------------------------------------------------------------------------
/i18n/ms.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Anakmalaysia",
5 | "Hakimi97",
6 | "Tofeiku"
7 | ]
8 | },
9 | "title": "WikimediaOCR",
10 | "subtitle": "Transkripsikan teks daripada imej",
11 | "form-heading": "Transkripsikan imej",
12 | "image-url": "URL imej",
13 | "image-url-help": "Masukkan URL imej yang dihoskan pada pelayan Wikimedia seperti: $1",
14 | "image-url-error": "URL imej mesti bermula dengan {{PLURAL:$1|nama domain berikut|salah satu daripada nama domain berikut}} dan berakhir dengan sambungan fail yang sah: $2",
15 | "image-alt-text": "Imej asal",
16 | "language-code": "Bahasa (pilihan)",
17 | "engine": "Enjin OCR",
18 | "engine-name-transkribus": "Transkribus OCR",
19 | "engine-not-found-warning": "Enjin yang diminta '$1' tidak ditemui. Menggunakan enjin lalai '$2' sebaliknya.",
20 | "engine-invalid-langs-warning": "Bahasa berikut tidak sah atau tidak disokong oleh enjin dan diabaikan: $1",
21 | "submit": "Transkripsikan keseluruhan halaman",
22 | "submit-crop": "Transkripsikan lingkungan",
23 | "drag-help": "Pilih alat pangkas dan seret segi empat panjang pada imej di bawah untuk menyalin hanya satu lingkungan halaman.",
24 | "drag-mode-move": "Menyeret akan mengalihkan imej",
25 | "drag-mode-move-alt": "Ikon yang mewakili tindakan 'pindah'.",
26 | "drag-mode-crop": "Menyeret akan mewujudkan keluasan pangkas baharu",
27 | "drag-mode-crop-alt": "Ikon yang mewakili tindakan 'pangkas'.",
28 | "copy-to-clipboard": "Salin ke papan keratan (''clipboard''):",
29 | "copied-to-clipboard": "Disalin!",
30 | "google-error": "Perkhidmatan Google mengembalikan ralat: $1",
31 | "image-retrieval-failed": "Pengambilan semula imej gagal: $1",
32 | "documentation": "Pendokumenan",
33 | "api-tooltip": "Lihat pendokumenan API",
34 | "version": "Versi $1",
35 | "report-issue": "Laporkan masalah",
36 | "langs-placeholder": "Biarkan kosong untuk pengesanan bahasa automatik.",
37 | "langs-param-error": "{{PLURAL:$1|Bahasa|Bahasa-bahasa}} berikut tidak disokong oleh enjin OCR: $2",
38 | "loading-message": "Melakukan transkripsi...",
39 | "tesseract-options": "Pilihan Tesseract",
40 | "tesseract-psm-label": "Kaedah segmen halaman",
41 | "tesseract-psm-help": "Cuba \"Taburkan teks\" untuk sokongan berbilang lajur yang lebih baik.",
42 | "tesseract-psm-0": "Hanya pengesanan orientasi dan skrip (OSD).",
43 | "tesseract-psm-1": "Pensegmenan halaman automatik dengan OSD.",
44 | "tesseract-psm-2": "Pensegmenan halaman automatik, tetapi tiada OSD atau OCR. (tidak dilaksanakan)",
45 | "tesseract-psm-3": "Pensegmenan halaman automatik penuh, tetapi tiada OSD. (Lalai)",
46 | "tesseract-psm-4": "Andaikan satu lajur teks dengan saiz berubah-ubah.",
47 | "tesseract-psm-5": "Andaikan satu blok seragam tunggal teks yang dijajarkan secara menegak.",
48 | "tesseract-psm-6": "Andaikan satu blok seragam tunggal teks.",
49 | "tesseract-psm-7": "Anggap imej sebagai satu baris teks.",
50 | "tesseract-psm-8": "Anggap imej sebagai satu perkataan tunggal.",
51 | "tesseract-psm-9": "Anggap imej sebagai satu perkataan tunggal dalam lingkaran.",
52 | "tesseract-psm-10": "Anggap imej sebagai satu patah perkataan.",
53 | "tesseract-psm-11": "Taburkan teks. Cari sebanyak teks yang mungkin tanpa urutan tertentu.",
54 | "tesseract-psm-12": "Taburkan teks dengan OSD.",
55 | "tesseract-psm-13": "Barisan mentah. Anggap imej sebagai satu baris teks, memintas penggodaman yang khusus Tesseract.",
56 | "tesseract-param-error": "Pilihan '$1' dengan nilai $2 adalah tidak disokong oleh Tesseract. Nilai maksimum: $3",
57 | "tesseract-no-text-error": "Enjin Tesseract tidak mengembalikan sebarang teks untuk imej ini.",
58 | "tesseract-internal-error": "Enjin tesseract mengembalikan ralat dalaman.",
59 | "transkribus-language-code": "Model bahasa",
60 | "transkribus-unauthorized-error": "Kod Ralat '$1' :: Permintaan tidak dibenarkan",
61 | "transkribus-default-error": "Kod Ralat '$1' :: Tidak dapat memenuhi permintaan, cuba lagi!",
62 | "transkribus-empty-response-error": "Tidak dapat menghuraikan hasil daripada API Transkribus",
63 | "transkribus-init-process-error": "Gagal memulakan proses Transkribus",
64 | "transkribus-failed-process-error": "Proses transkribus gagal",
65 | "transkribus-no-lang-error": "Tiada bahasa dipilih",
66 | "transkribus-multiple-lang-error": "Keberbilangan bahasa tidak dibenarkan, khususkan satu bahasa",
67 | "transkribus-browse-public-models": "Semak imbas semua model bahasa awam untuk Transkribus",
68 | "transkribus-request-for-model": "Buat permintaan untuk menambah model daripada Transkribus kepada alat OCR",
69 | "transkribus-options": "Pilihan Transkribus",
70 | "transkribus-line-label": "Model Pengesanan Barisan",
71 | "transkribus-line-id-none-option": "Tiada",
72 | "transkribus-mixed-line-option": "Orientasi Barisan Campuran",
73 | "transkribus-line-help": "Biarkan kosong jika anda tidak pasti model pengesanan barisan mana yang hendak digunakan",
74 | "transkribus-jobs": "Tugasan Transkribus",
75 | "transkribus-job-id": "ID tugasan",
76 | "transkribus-job-state": "Keadaan",
77 | "transkribus-job-description": "Keterangan",
78 | "transkribus-job-start": "Bermula",
79 | "transkribus-job-end": "Selesai",
80 | "transkribus-job-waited": "Kelewatan bermula (minit)"
81 | }
82 |
--------------------------------------------------------------------------------
/src/Engine/TranskribusEngine.php:
--------------------------------------------------------------------------------
1 | 'Balinese Line Detection Model',
24 | ];
25 |
26 | /**
27 | * TranskribusEngine constructor.
28 | * @param TranskribusClient $transkribusClient
29 | * @param Intuition $intuition
30 | * @param string $projectDir
31 | * @param HttpClientInterface $httpClient
32 | */
33 | public function __construct(
34 | TranskribusClient $transkribusClient,
35 | Intuition $intuition,
36 | string $projectDir,
37 | HttpClientInterface $httpClient
38 | ) {
39 | parent::__construct( $intuition, $projectDir, $httpClient );
40 |
41 | $this->transkribusClient = $transkribusClient;
42 | }
43 |
44 | /**
45 | * @inheritDoc
46 | */
47 | public static function getId(): string {
48 | return 'transkribus';
49 | }
50 |
51 | /**
52 | * Get line detection models accepted by the engine
53 | * @param bool $onlyLineIds Whether to return only the line detection model IDs
54 | * @param bool $onlyLineIdLangs Whether to return only the line detection model IDs lang codes
55 | * @return string[] Line detection model lang codes or model IDs or model ID names
56 | */
57 | public function getValidLineIds( bool $onlyLineIds = false, bool $onlyLineIdLangs = false ): array {
58 | $filteredLangList = array_filter(
59 | $this->getModelList(), static function ( $value ) {
60 | return isset( $value['line'] ) && $value['line'] !== '';
61 | }
62 | );
63 |
64 | $lineIdLangs = array_keys( $filteredLangList );
65 |
66 | // return only the lang names as written in the models.json file
67 | if ( $onlyLineIdLangs ) {
68 | return $lineIdLangs;
69 | }
70 |
71 | // create a list that maps from lang name to line detection model name
72 | $lineIDList = [];
73 | foreach ( $lineIdLangs as $lineIdLang ) {
74 | $lineIDList[$lineIdLang] = $this->getLineIdModelName( $lineIdLang );
75 | }
76 |
77 | // create a list that maps from line detection model ID to line detection model name
78 | $list = [];
79 | foreach ( $lineIdLangs as $lineIDKey ) {
80 | $list[$filteredLangList[$lineIDKey]['line']] = $lineIDList[$lineIDKey];
81 | }
82 |
83 | // return only the line detection model IDs
84 | if ( $onlyLineIds ) {
85 | return array_keys( $list );
86 | }
87 |
88 | return $list;
89 | }
90 |
91 | /**
92 | * Get name of the given line detection model from the language code
93 | * @param string|null $lineIdLang
94 | * @return string
95 | */
96 | public function getLineIdModelName( ?string $lineIdLang = null ): string {
97 | return self::LINE_ID_MODEL_NAMES[$lineIdLang];
98 | }
99 |
100 | /**
101 | * Set the line detection model ID for the Transkribus engine
102 | * @param int $lineId
103 | * @return void
104 | */
105 | public function setLineId( int $lineId ): void {
106 | $this->lineId = $lineId;
107 | }
108 |
109 | /**
110 | * @inheritDoc
111 | * @throws OcrException
112 | */
113 | public function getResult(
114 | string $imageUrl,
115 | string $invalidLangsMode,
116 | array $crop,
117 | ?array $langs = null
118 | ): EngineResult {
119 | $this->checkImageUrl( $imageUrl );
120 |
121 | $points = '';
122 | if ( $crop ) {
123 | $x = $crop['x'];
124 | $y = $crop['y'];
125 | $yPlusH = $crop['y'] + $crop['height'];
126 | $xPlusW = $crop['x'] + $crop['width'];
127 | $points = $x . ',' . $y . ' ' . $xPlusW . ',' .
128 | $y . ' ' . $xPlusW . ',' . $yPlusH . ' ' . $x . ',' . $yPlusH;
129 | }
130 |
131 | $htrModelId = 0;
132 | [ $validLangs, $invalidLangs ] = $this->filterValidLangs( $langs, $invalidLangsMode );
133 | if ( !$validLangs ) {
134 | throw new OcrException( 'transkribus-no-lang-error' );
135 | }
136 |
137 | if ( count( $validLangs ) > 1 ) {
138 | throw new OcrException( 'transkribus-multiple-lang-error' );
139 | }
140 | $modelCode = $validLangs[0];
141 | $modelInfo = $this->getModelList()[$modelCode];
142 | $htrModelId = (int)$modelInfo['htr'];
143 | $image = $this->getImage( $imageUrl, $crop, self::DO_DOWNLOAD_IMAGE );
144 | $processId = $this->transkribusClient->initProcess( $image, $htrModelId, $this->lineId, $points );
145 |
146 | $resText = '';
147 | while ( $this->transkribusClient->processStatus !== 'FINISHED' ) {
148 | $resText = $this->transkribusClient->retrieveProcessResult( $processId );
149 | sleep( 2 );
150 | }
151 |
152 | $warnings = $invalidLangs ? [ $this->getInvalidLangsWarning( $invalidLangs ) ] : [];
153 | return new EngineResult( $resText, $warnings );
154 | }
155 | }
156 |
--------------------------------------------------------------------------------
/i18n/bn.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Bodhisattwa",
5 | "Tahmid",
6 | "ZI Jony",
7 | "আফতাবুজ্জামান"
8 | ]
9 | },
10 | "title": "উইকিমিডিয়া ওসিআর",
11 | "subtitle": "চিত্র থেকে পাঠ্য আহরণ করুন",
12 | "form-heading": "ছবি ট্রান্সক্রাইব করুন",
13 | "image-url": "চিত্রের ইউআরএল",
14 | "image-url-help": "উইকিমিডিয়া সার্ভারে হোস্ট করা একটি চিত্রের ইউআরএল সন্নিবেশ করুন যেমন: $1",
15 | "image-url-error": "ছবির ইউআরএল {{PLURAL:$1|নিম্নোক্ত ডোমেইন নাম|নিম্নোক্ত ডোমেইন নামগুলোর কোনো একটি}} দিয়ে শুরু হতে হবে এবং বৈধ ফাইল এক্সটেনশন দিয়ে শেষ হতে হবে: $2",
16 | "image-alt-text": "প্রকৃত চিত্র",
17 | "language-code": "ভাষা (ঐচ্ছিক)",
18 | "engine": "ওসিআর ইঞ্জিন",
19 | "engine-name-google": "গুগল ক্লাউড ভিশন ওসিআর",
20 | "engine-name-tesseract": "টেসারেক্ট ওসিআর",
21 | "engine-name-transkribus": "ট্রান্সক্রিবাস ওসিআর",
22 | "engine-not-found-warning": "অনুরোধকৃত ইঞ্জিন '$1' খুঁজে পাওয়া যায়নি। এর পরিবর্তে পূর্বনির্ধারিত ইঞ্জিন '$2' ব্যবহার করা হচ্ছে।",
23 | "engine-invalid-langs-warning": "নিম্নলিখিত ভাষাগুলি অবৈধ বা ইঞ্জিন দ্বারা সমর্থিত নয় এবং উপেক্ষা করা হয়েছে: $1",
24 | "submit": "পুরো পাতা থেকে আহরণ করুন",
25 | "submit-crop": "আহরণের এলাকা",
26 | "drag-help": "পৃষ্ঠার শুধুমাত্র অংশবিশেষ ট্রান্সক্রাইব করতে ক্রপ টুলটি নির্বাচন করুন এবং নিচের ছবির উপর একটি আয়তক্ষেত্র আকুন।",
27 | "drag-mode-move": "ড্র্যাগ করলে ছবিটি সরে যাবে",
28 | "drag-mode-move-alt": "'সরানো' ক্রিয়ার প্রতিনিধিত্বকারী আইকন।",
29 | "drag-mode-crop": "ড্যাগ করলে নতুন ক্রপ এলাকা তৈরি হবে",
30 | "drag-mode-crop-alt": "'ক্রপ' ক্রিয়ার প্রতিনিধিত্বকারী আইকন",
31 | "copy-to-clipboard": "ক্লিপবোর্ডে অনুলিপি করুন",
32 | "copied-to-clipboard": "অনুলিপি করা হয়েছে!",
33 | "google-error": "গুগল পরিষেবা একটি ত্রুটি প্রদান করেছে: $1",
34 | "image-retrieval-failed": "ছবি পুনরুদ্ধার ব্যর্থ হয়েছেঃ $1",
35 | "documentation": "নথিপত্র",
36 | "api": "এপিআই",
37 | "api-tooltip": "এপিআই নতিপত্র দেখুন",
38 | "version": "সংস্করণ $1",
39 | "report-issue": "সমস্যা প্রতিবেদন করুন",
40 | "langs-placeholder": "স্বয়ংক্রিয়ভাবে ভাষা সনাক্তকরণের জন্য ফাঁকা রাখুন।",
41 | "langs-param-error": "নিম্নলিখিত {{PLURAL:$1|ভাষাটি|ভাষাগুলি}} ওসিআর ইঞ্জিন দ্বারা সমর্থিত নয়: $2",
42 | "loading-message": "ট্রান্সক্রাইব করা হচ্ছে...",
43 | "tesseract-options": "টেসারেক্ট বিকল্প",
44 | "tesseract-psm-label": "পৃষ্ঠা বিভাজনের পদ্ধতি",
45 | "tesseract-psm-help": "আরও ভালো বহু-কলাম সহায়তার জন্য \"ছড়ানো বিক্ষিপ্ত পাঠ্য\" নির্বাচন করুন।",
46 | "tesseract-psm-0": "শুধু দিক এবং লিপি নির্ধারণ।",
47 | "tesseract-psm-1": "ওএসডি দিয়ে স্বয়ংক্রিয় পৃষ্ঠা বিভাজন।",
48 | "tesseract-psm-2": "স্বয়ংক্রিয় পৃষ্ঠা বিভাজন, কিন্তু ওএসডি বা ওসিআর ছাড়া। (বাস্তবায়িত হয়নি)",
49 | "tesseract-psm-3": "সম্পূর্ণ স্বয়ংক্রিয় পৃষ্ঠা বিভাজন, কিন্তু ওএসডি ছাড়া। (পূর্বনির্ধারিত)",
50 | "tesseract-psm-4": "পরিবর্তনশীল আকারযুক্ত পাঠ্যের একটি একক কলাম ধরে নিন।",
51 | "tesseract-psm-5": "উল্লম্বভাবে সারিবদ্ধ পাঠ্যের একটি একক অভিন্ন ব্লক ধরে নিন।",
52 | "tesseract-psm-6": "পাঠ্যের একটি একক অভিন্ন ব্লক ধরে নিন।",
53 | "tesseract-psm-7": "চিত্রটিকে একটি একক পাঠ্য লাইন হিসেবে বিবেচনা করুন।",
54 | "tesseract-psm-8": "চিত্রটিকে একটি শব্দ হিসেবে বিবেচনা করুন।",
55 | "tesseract-psm-9": "চিত্রটিকে একটি বৃত্তে একটি শব্দ হিসেবে বিবেচনা করুন।",
56 | "tesseract-psm-10": "চিত্রটিকে একটি একক অক্ষর হিসেবে বিবেচনা করুন।",
57 | "tesseract-psm-11": "ছড়ানো বিক্ষিপ্ত পাঠ্য। কোনও নির্দিষ্ট ক্রম ছাড়াই যতটা সম্ভব পাঠ্য খুঁজুন।",
58 | "tesseract-psm-12": "ওএসডিসহ স্পার্স পাঠ্য।",
59 | "tesseract-psm-13": "পাঠ্যের লাইন। টেসরাক্টের নিজস্ব হ্যাকগুলিকে বাদ দিয়ে চিত্রটিকে পাঠ্যের একটি একক লাইন হিসাবে বিবেচনা করুন।",
60 | "tesseract-param-error": "টেসার্যাক্টে '$1' অপশনে $2 মান ব্যবহার করা যাবে না। সর্বোচ্চ মান: $3",
61 | "tesseract-no-text-error": "টেসরাক্ট ইঞ্জিন এই ছবির জন্য কোনও লেখা প্রদান করেনি।",
62 | "tesseract-internal-error": "টেসারেক্ট ইঞ্জিনে একটি অভ্যন্তরীণ ত্রুটি হয়েছে।",
63 | "transkribus-language-code": "ভাষা মডেল",
64 | "transkribus-unauthorized-error": "ত্রুটি কোড '$1' :: অনুরোধটি অনুমোদিত নয়।",
65 | "transkribus-default-error": "ত্রুটি কোড '$1' :: অনুরোধ সম্পন্ন করা যায়নি, আবার চেষ্টা করুন!",
66 | "transkribus-empty-response-error": "ট্রান্সক্রিবাস এপিআই থেকে ফলাফল পার্স করা যায়নি",
67 | "transkribus-init-process-error": "ট্রান্সক্রিবাস প্রক্রিয়া আরম্ভকরণ ব্যর্থ হয়েছে",
68 | "transkribus-failed-process-error": "ট্রান্সক্রিবাস প্রক্রিয়া ব্যর্থ হয়েছে",
69 | "transkribus-no-lang-error": "কোনও ভাষা নির্বাচন করা হয়নি",
70 | "transkribus-multiple-lang-error": "একাধিক ভাষা অনুমোদিত নয়, একটি ভাষা নির্দিষ্ট করুন",
71 | "transkribus-browse-public-models": "ট্রান্সক্রিবাসের সকল পাবলিক ভাষার মডেল ব্রাউজ করুন",
72 | "transkribus-request-for-model": "ট্রান্সক্রিবাস থেকে ওসিআর সরঞ্জামে একটি মডেল যোগ করার জন্য অনুরোধ করুন",
73 | "transkribus-options": "ট্রান্সক্রিবাস বিকল্প",
74 | "transkribus-line-label": "রেখা সনাক্তকরণ মডেল",
75 | "transkribus-line-id-none-option": "কোনোটিই নয়",
76 | "transkribus-mixed-line-option": "মিশ্র রেখা অভিমুখায়ন",
77 | "transkribus-line-help": "কোন লাইন সনাক্তকরণ মডেল ব্যবহার করবেন তা নিশ্চিত না হলে খালি রাখুন",
78 | "transkribus-jobs": "ট্রান্সক্রিবাস জবস",
79 | "transkribus-job-id": "কাজের আইডি",
80 | "transkribus-job-state": "স্থিতি",
81 | "transkribus-job-description": "বিবরণ",
82 | "transkribus-job-start": "শুরু হয়েছে",
83 | "transkribus-job-end": "সম্পন্ন হয়েছে",
84 | "transkribus-job-waited": "শুরুর বিলম্ব (মিনিট)"
85 | }
86 |
--------------------------------------------------------------------------------
/i18n/en.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {},
3 | "title": "WikimediaOCR",
4 | "subtitle": "A tool to transcribe text from scanned images on Wikimedia Commons, for use on Wikisource and elsewhere.",
5 | "form-heading": "Transcribe an image",
6 | "image-url": "Image URL",
7 | "image-url-help": "Insert an image URL hosted on a Wikimedia server such as: $1",
8 | "image-url-error": "Image URL must begin with {{PLURAL:$1|the following domain name|one of the following domain names}} and end with a valid file extension: $2",
9 | "image-alt-text": "The original image",
10 | "language-code": "Languages (optional)",
11 | "engine": "OCR engine",
12 | "engine-name-google": "Google Cloud Vision OCR",
13 | "engine-name-tesseract": "Tesseract OCR",
14 | "engine-name-transkribus": "Transkribus OCR",
15 | "engine-not-found-warning": "The requested engine '$1' was not found. Using the default engine '$2' instead.",
16 | "engine-invalid-langs-warning": "The following languages are invalid or not supported by the engine and were ignored: $1",
17 | "submit": "Transcribe whole page",
18 | "submit-crop": "Transcribe area",
19 | "drag-help": "Select the crop tool and drag a rectangle on the image below to transcribe only one area of the page.",
20 | "drag-mode-move": "Dragging will move the image",
21 | "drag-mode-move-alt": "Icon representing the 'move' action.",
22 | "drag-mode-crop": "Dragging will create a new crop area",
23 | "drag-mode-crop-alt": "Icon representing the 'crop' action.",
24 | "copy-to-clipboard": "Copy to clipboard",
25 | "copied-to-clipboard": "Copied!",
26 | "google-error": "The Google service returned an error: $1",
27 | "image-retrieval-failed": "Image retrieval failed: $1",
28 | "documentation": "Documentation",
29 | "api": "API",
30 | "api-tooltip": "View the API documentation",
31 | "version": "Version $1",
32 | "report-issue": "Report an issue",
33 | "langs-placeholder": "Leave blank for automatic language detection.",
34 | "langs-param-error": "The following {{PLURAL:$1|language is|languages are}} not supported by the OCR engine: $2",
35 | "loading-message": "Performing transcription...",
36 | "tesseract-options": "Tesseract options",
37 | "tesseract-psm-label": "Page segmentation method",
38 | "tesseract-psm-help": "Try \"Sparse text\" for better multi-column support.",
39 | "tesseract-psm-0": "Orientation and script detection (OSD) only.",
40 | "tesseract-psm-1": "Automatic page segmentation with OSD.",
41 | "tesseract-psm-2": "Automatic page segmentation, but no OSD, or OCR. (not implemented)",
42 | "tesseract-psm-3": "Fully automatic page segmentation, but no OSD. (Default)",
43 | "tesseract-psm-4": "Assume a single column of text of variable sizes.",
44 | "tesseract-psm-5": "Assume a single uniform block of vertically aligned text.",
45 | "tesseract-psm-6": "Assume a single uniform block of text.",
46 | "tesseract-psm-7": "Treat the image as a single text line.",
47 | "tesseract-psm-8": "Treat the image as a single word.",
48 | "tesseract-psm-9": "Treat the image as a single word in a circle.",
49 | "tesseract-psm-10": "Treat the image as a single character.",
50 | "tesseract-psm-11": "Sparse text. Find as much text as possible in no particular order.",
51 | "tesseract-psm-12": "Sparse text with OSD.",
52 | "tesseract-psm-13": "Raw line. Treat the image as a single text line, bypassing hacks that are Tesseract-specific.",
53 | "tesseract-param-error": "The '$1' option with a value of $2 is not supported by Tesseract. Maximum value: $3",
54 | "tesseract-no-text-error": "The Tesseract engine did not return any text for this image.",
55 | "tesseract-internal-error": "The tesseract engine returned an internal error.",
56 | "transkribus-language-code": "Language Model",
57 | "transkribus-unauthorized-error": "Error Code '$1' :: The request is not authorized",
58 | "transkribus-default-error": "Error Code '$1' :: Unable to complete request, try again!",
59 | "transkribus-empty-response-error": "Could not parse result from Transkribus API",
60 | "transkribus-init-process-error": "Failed to initialize Transkribus process",
61 | "transkribus-failed-process-error": "Transkribus process failed",
62 | "transkribus-no-lang-error": "No language was selected",
63 | "transkribus-multiple-lang-error": "Multiple languages are not allowed, specify one language",
64 | "transkribus-browse-public-models": "Browse all public language models for Transkribus",
65 | "transkribus-request-for-model": "Make a request to add a model from Transkribus to the OCR tool",
66 | "transkribus-options": "Transkribus Options",
67 | "transkribus-line-label": "Line Detection Model",
68 | "transkribus-line-id-none-option": "None",
69 | "transkribus-mixed-line-option": "Mixed Line Orientation",
70 | "transkribus-line-help": "Leave empty if you are not sure of which line detection model to use",
71 | "transkribus-jobs": "Transkribus Jobs",
72 | "transkribus-job-id": "Job ID",
73 | "transkribus-job-state": "State",
74 | "transkribus-job-description": "Description",
75 | "transkribus-job-start": "Started",
76 | "transkribus-job-end": "Finished",
77 | "transkribus-job-waited": "Start delay (minutes)"
78 | }
79 |
--------------------------------------------------------------------------------
/i18n/ia.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "McDutchie"
5 | ]
6 | },
7 | "title": "WikimediaOCR",
8 | "subtitle": "Transcriber texto de imagines",
9 | "form-heading": "Transcriber un imagine",
10 | "image-url": "URL del imagine",
11 | "image-url-help": "Insere un URL de imagine albergate sur un servitor de Wikimedia como: $1",
12 | "image-url-error": "Le adresse URL del imagine debe comenciar con {{PLURAL:$1|le sequente nomine|un del sequente nomines}} de dominio e terminar in un extension de nomine de file valide: $2",
13 | "image-alt-text": "Le imagine original",
14 | "language-code": "Linguas (optional)",
15 | "engine": "Motor OCR",
16 | "engine-name-transkribus": "OCR de Transkribus",
17 | "engine-not-found-warning": "Le motor '$1' requestate non ha essite trovate. Le motor predefinite '$2' es usate in su loco.",
18 | "engine-invalid-langs-warning": "Le sequente linguas non es valide o non es supportate per le motor e ha essite ignorate: $1",
19 | "submit": "Transcriber tote le pagina",
20 | "submit-crop": "Transcriber un area",
21 | "drag-help": "Selige le utensile de taliar e trahe un rectangulo sur le imagine hic infra pro transcriber solmente un area del pagina.",
22 | "drag-mode-move": "Traher displaciara le imagine",
23 | "drag-mode-move-alt": "Icone representante le action 'displaciar'.",
24 | "drag-mode-crop": "Traher creara un nove area de retalio",
25 | "drag-mode-crop-alt": "Icone representante le action 'taliar'.",
26 | "copy-to-clipboard": "Copiar al area de transferentia",
27 | "copied-to-clipboard": "Copiate!",
28 | "google-error": "Le servicio de Google ha restituite un error: $1",
29 | "image-retrieval-failed": "Recuperation de imagine fallite: $1",
30 | "documentation": "Documentation",
31 | "api-tooltip": "Vider le documentation del API",
32 | "version": "Version $1",
33 | "report-issue": "Reportar un problema",
34 | "langs-placeholder": "Lassar vacue pro detection automatic de lingua.",
35 | "langs-param-error": "Le sequente lingua{{PLURAL:$1||s}} non es supportate per le motor OCR: $2",
36 | "loading-message": "Transcription in curso…",
37 | "tesseract-options": "Optiones de Tesseract",
38 | "tesseract-psm-label": "Methodo de segmentation de pagina",
39 | "tesseract-psm-help": "Essaya \"Texto sparse\" pro un melior gestion de plure columnas.",
40 | "tesseract-psm-0": "Detection de orientation e de scriptura (OSD) solmente.",
41 | "tesseract-psm-1": "Segmentation automatic de paginas con OSD.",
42 | "tesseract-psm-2": "Segmentation automatic de paginas, ma sin OSD e sin OCR. (non implementate)",
43 | "tesseract-psm-3": "Segmentation de paginas totalmente automatic, ma sin OSD. (Option predefinite)",
44 | "tesseract-psm-4": "Assumer un sol columna de texto de varie grandores.",
45 | "tesseract-psm-5": "Assumer un sol bloco uniforme de texto alineate verticalmente.",
46 | "tesseract-psm-6": "Assumer un sol bloco uniforme de texto.",
47 | "tesseract-psm-7": "Tractar le imagine como un sol linea de texto.",
48 | "tesseract-psm-8": "Tractar le imagine como un sol parola.",
49 | "tesseract-psm-9": "Tractar le imagine como un sol parola in un circulo.",
50 | "tesseract-psm-10": "Tractar le imagine como un sol character.",
51 | "tesseract-psm-11": "Texto sparse. Trovar tante texto como possibile sin ordine particular.",
52 | "tesseract-psm-12": "Texto sparse con OSD.",
53 | "tesseract-psm-13": "Linea brute. Tractar le imagine como un sol linea de texto, contornante le trucos que es specific a Tesseract.",
54 | "tesseract-param-error": "Le option '$1' con un valor de $2 non es supportate per Tesseract. Valor maxime: $3",
55 | "tesseract-no-text-error": "Le motor Tesseract non ha restituite alcun texto pro iste imagine.",
56 | "tesseract-internal-error": "Le motor tesseract indicava un error interne.",
57 | "transkribus-language-code": "Modello de lingua",
58 | "transkribus-unauthorized-error": "Codice de error '$1' :: Le requesta non es autorisate",
59 | "transkribus-default-error": "Codice de error '$1' :: Non pote terminar le requesta, tenta lo de novo!",
60 | "transkribus-empty-response-error": "Non poteva interpretar le resultato del API de Transkribus",
61 | "transkribus-init-process-error": "Non poteva initialisar le processo Transkribus",
62 | "transkribus-failed-process-error": "Processo Transkribus fallite",
63 | "transkribus-no-lang-error": "Necun lingua ha essite seligite",
64 | "transkribus-multiple-lang-error": "Plure linguas non es permittite; specifica solo un lingua",
65 | "transkribus-browse-public-models": "Percurrer tote le modellos public de lingua pro Transkribus",
66 | "transkribus-request-for-model": "Facer un demanda pro adder un modello de Transkribus al utensile OCR",
67 | "transkribus-options": "Optiones de Transkribus",
68 | "transkribus-line-label": "Modello de detection de linea",
69 | "transkribus-line-id-none-option": "Necun",
70 | "transkribus-mixed-line-option": "Orientation de linea mixte",
71 | "transkribus-line-help": "Lassa vacue si tu non es secur de qual modello de detection de linea utilisar",
72 | "transkribus-jobs": "Travalios de Transkribus",
73 | "transkribus-job-id": "ID del travalio",
74 | "transkribus-job-state": "Stato",
75 | "transkribus-job-description": "Description",
76 | "transkribus-job-start": "Initiate",
77 | "transkribus-job-end": "Finite",
78 | "transkribus-job-waited": "Demora de initio (minutas)"
79 | }
80 |
--------------------------------------------------------------------------------
/i18n/mk.json:
--------------------------------------------------------------------------------
1 | {
2 | "@metadata": {
3 | "authors": [
4 | "Bjankuloski06"
5 | ]
6 | },
7 | "title": "ОПЗ на Викимедија",
8 | "subtitle": "Препис на текст од слики",
9 | "form-heading": "Препис на слика",
10 | "image-url": "URL на сликата",
11 | "image-url-help": "Ова мора да почнува со едно од следниве доменски имиња и да биде целосна URL кон фактичката сликовна податотека: $1",
12 | "image-url-error": "URL-то на сликата мора да почнува со {{PLURAL:$1|следново доменско име|едно од следниве доменски имиња}} и да завршува со важечка податотечна наставка: $2",
13 | "image-alt-text": "Изворната слика.",
14 | "language-code": "Јазици (незадолж.)",
15 | "engine": "ОПЗ-погон",
16 | "engine-name-google": "ОПЗ на Google Cloud Vision",
17 | "engine-name-tesseract": "ОПЗ на Tesseract",
18 | "engine-name-transkribus": "Transkribus OCR",
19 | "engine-not-found-warning": "Не го најдов побараниот погон „$1“. Ќе го користам основно зададениот погон „$2“.",
20 | "engine-invalid-langs-warning": "Следниве јазици се неважечки или не се поддржани од погонот, па затоа се занемаруваат: $1",
21 | "submit": "Препиши ја целата страница",
22 | "submit-crop": "Препиши го подрчајето",
23 | "drag-help": "Изберете ја алатката за кастрење и повлечете правоаголник врз долуприкажаната слика за да се препише само тој дел од неа.",
24 | "drag-mode-move": "Повлекувањето ќе ја помести сликата",
25 | "drag-mode-move-alt": "Икона за дејството „поместување“.",
26 | "drag-mode-crop": "Повлекувањето ќе направи ново скастрено подрачје",
27 | "drag-mode-crop-alt": "Икона за дејството „кастрење“.",
28 | "copy-to-clipboard": "Копирај во меѓускладот",
29 | "copied-to-clipboard": "Ископирано!",
30 | "google-error": "Службата на Гугл даде грешка: $1",
31 | "image-retrieval-failed": "Не успеав да ја добијам сликата: $1",
32 | "documentation": "Документација",
33 | "api": "Приложник",
34 | "api-tooltip": "Погл. документација на приложникот",
35 | "version": "Верзија $1",
36 | "report-issue": "Пријави проблем",
37 | "langs-placeholder": "Оставете празно за автоматско утврдување на јазикот.",
38 | "langs-param-error": "{{PLURAL:$1|Следниов јазик не е поддржан|Следниве јазици не се поддржани}} од погонот на OCR: $2",
39 | "loading-message": "Вршам препис...",
40 | "tesseract-options": "Можности на Tesseract",
41 | "tesseract-psm-label": "Начин на разделување на страницата",
42 | "tesseract-psm-help": "Пробајте „Редок текст“ за подобра повеќестолбна поддршка.",
43 | "tesseract-psm-0": "Само препознавање на насока и писмо (ПНП).",
44 | "tesseract-psm-1": "Автоматско разделување на страницата со ПНП.",
45 | "tesseract-psm-2": "Автоматско разделување на страницата, но без ПНП или ОПЗ. (не е спроведено)",
46 | "tesseract-psm-3": "Наполно автоматско разделување на страницата, но без ПНП. (По основно)",
47 | "tesseract-psm-4": "Претпостави единечен столб од текст со променливи големини.",
48 | "tesseract-psm-5": "Претпостави единечен еднообразен блок на вертијално подреден текст.",
49 | "tesseract-psm-6": "Претпостави единечен еднообразен блок од текст.",
50 | "tesseract-psm-7": "Сметај ја сликата за еден ред текст.",
51 | "tesseract-psm-8": "Сметај ја сликата за еден збор.",
52 | "tesseract-psm-9": "Сметај ја сликата за еден збор во кружница.",
53 | "tesseract-psm-10": "Сметај ја сликата за еден знак.",
54 | "tesseract-psm-11": "Разреден текст. Најди што повеќе текст по било кој редослед.",
55 | "tesseract-psm-12": "Разреден текст со ПНП.",
56 | "tesseract-psm-13": "Сиров рд. Сметај ја сликата за еден ред текст, заобиколувајќи ги можностите особени за Tesseract.",
57 | "tesseract-param-error": "Можноста „$1“ со вредност $2 не е поддржана од Tesseract. Најголема допуштена вредност: $3",
58 | "tesseract-no-text-error": "Погонот Tesseract не даде никаков текст за оваа слика.",
59 | "tesseract-internal-error": "Погонот Tesseract даде внатрешна грешка.",
60 | "transkribus-language-code": "Јазичен модел",
61 | "transkribus-unauthorized-error": "Грешка „$1“ :: Барањето не е овластено",
62 | "transkribus-default-error": "Грешка „$1“ :: Не можам да го исполнам барањето. Пробајте повторно!",
63 | "transkribus-empty-response-error": "Не можев да го расчленам исходот од приложникот на Transkribus",
64 | "transkribus-init-process-error": "Не можев да ја покренам постапката на Transkribus",
65 | "transkribus-failed-process-error": "Постапката на Transkribus не успеа",
66 | "transkribus-no-lang-error": "Немате избрано јазик",
67 | "transkribus-multiple-lang-error": "Не се дозволени повеќе јазици. Укажете еден",
68 | "transkribus-browse-public-models": "Прелистајте ги сите јавни јазични модели за Transkribus",
69 | "transkribus-request-for-model": "Поднесете барање за додавање на модел од Transkribus кон алатката за оптичко препознавање",
70 | "transkribus-options": "Можности за Transkribus",
71 | "transkribus-line-label": "Модел за откривање редови",
72 | "transkribus-line-id-none-option": "Нема",
73 | "transkribus-mixed-line-option": "Мешана насоченост на редовите",
74 | "transkribus-line-help": "Оставете го празно ако не сте сигурни кој модел за откривање редови треба да се користи",
75 | "transkribus-jobs": "Задачи на Transkribus",
76 | "transkribus-job-id": "Назнака на задачата",
77 | "transkribus-job-state": "Состојба",
78 | "transkribus-job-description": "Опис",
79 | "transkribus-job-start": "Започнато",
80 | "transkribus-job-end": "Завршено",
81 | "transkribus-job-waited": "Одложување на почетокот (минути)"
82 | }
83 |
--------------------------------------------------------------------------------