├── scripts ├── __init__.py ├── da-langs.py ├── all-namespaces.py └── scripts_utils.py ├── .github ├── PULL_REQUEST_TEMPLATE.md ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── template-add.md │ ├── template-modification.md │ └── template-dump.md ├── dependabot.yml └── workflows │ ├── quality.yml │ └── tests.yml.disabled ├── tests ├── data │ ├── fr │ │ ├── no section.wiki │ │ ├── gèlent.wiki │ │ ├── colligeait.wiki │ │ ├── venoient.wiki │ │ ├── chacune.wiki │ │ ├── djed.wiki │ │ ├── Bogotanais.wiki │ │ ├── 5E.wiki │ │ ├── -aux.wiki │ │ ├── corollaires.wiki │ │ ├── corps portant.wiki │ │ ├── π.wiki │ │ ├── encyclopædie.wiki │ │ ├── mutiner.wiki │ │ ├── minutes.wiki │ │ ├── acrologie.wiki │ │ └── 42.wiki │ ├── de │ │ ├── daß.wiki │ │ ├── Informationsverlusts.wiki │ │ ├── kartel.wiki │ │ ├── trage.wiki │ │ └── @.wiki │ ├── eo │ │ ├── ekamus.wiki │ │ ├── 💀.wiki │ │ ├── ♍.wiki │ │ ├── Teodoriko.wiki │ │ ├── latina.wiki │ │ ├── alkazabo.wiki │ │ ├── kaskedo.wiki │ │ ├── komputilo.wiki │ │ └── luko.wiki │ ├── sv │ │ ├── dufvor.wiki │ │ ├── harmonierar.wiki │ │ ├── auto.wiki │ │ └── -hörning.wiki │ ├── en │ │ ├── memoized.wiki │ │ ├── 42.wiki │ │ ├── humans.wiki │ │ └── Acanthis.wiki │ ├── pt │ │ ├── tenui-.wiki │ │ ├── Ku.wiki │ │ ├── sublist.wiki │ │ ├── ensimesmariam.wiki │ │ ├── nomenclaturar.wiki │ │ ├── quebrar galho.wiki │ │ ├── tique-taque.wiki │ │ ├── não tenho trocado.wiki │ │ ├── ababalhar.wiki │ │ ├── anões.wiki │ │ ├── 6.wiki │ │ ├── COPOM.wiki │ │ ├── neo-.wiki │ │ ├── UTC.wiki │ │ ├── galium.wiki │ │ ├── paulista.wiki │ │ ├── etc.wiki │ │ ├── dezassete.wiki │ │ ├── algo.wiki │ │ ├── ũa.wiki │ │ ├── cabrum.wiki │ │ ├── objetiva.wiki │ │ ├── -a.wiki │ │ ├── alguém.wiki │ │ ├── baiano.wiki │ │ └── giro-.wiki │ ├── no │ │ ├── gjente.wiki │ │ ├── vg..wiki │ │ ├── sviger-.wiki │ │ ├── ØNH.wiki │ │ ├── Øyvind.wiki │ │ ├── funnet.wiki │ │ ├── Kiberg.wiki │ │ ├── uten.wiki │ │ ├── NS.wiki │ │ ├── rasshol.wiki │ │ ├── bak lås og slå.wiki │ │ ├── slå to fluer i en smekk.wiki │ │ ├── tolvte.wiki │ │ ├── lumpen.wiki │ │ ├── -bar.wiki │ │ ├── verken.wiki │ │ ├── konsentrasjon.wiki │ │ ├── liksom.wiki │ │ ├── én svale gjør ingen sommer.wiki │ │ ├── seg.wiki │ │ ├── krokodille.wiki │ │ ├── aberrasjon.wiki │ │ ├── bare.wiki │ │ └── et.wiki │ ├── ro │ │ ├── MHz.wiki │ │ ├── frumoasă.wiki │ │ ├── portocale.wiki │ │ ├── aventurierul.wiki │ │ ├── paronim.wiki │ │ └── Lama.wiki │ ├── da │ │ ├── ▶.wiki │ │ ├── -ør.wiki │ │ ├── disse.wiki │ │ ├── PMV.wiki │ │ ├── et.wiki │ │ ├── skulle.wiki │ │ ├── godt nytår.wiki │ │ ├── bakterie.wiki │ │ ├── tolvte.wiki │ │ ├── søm.wiki │ │ ├── til.wiki │ │ ├── jørme.wiki │ │ ├── tyv.wiki │ │ ├── her.wiki │ │ └── mus.wiki │ ├── ca │ │ ├── Mn..wiki │ │ ├── expertes.wiki │ │ ├── halloweeniana.wiki │ │ ├── -ass-.wiki │ │ ├── PMF.wiki │ │ ├── AFI.wiki │ │ ├── hivernacle.wiki │ │ ├── pen.wiki │ │ ├── Castell.wiki │ │ ├── ch.wiki │ │ ├── -itzar.wiki │ │ ├── disset.wiki │ │ └── bio-.wiki │ ├── zh │ │ ├── 七講八講.wiki │ │ └── 稍後.wiki │ ├── es │ │ ├── Mús..wiki │ │ ├── buena.wiki │ │ ├── hala.wiki │ │ ├── ruego.wiki │ │ ├── zzz.wiki │ │ ├── uni-.wiki │ │ ├── extenuado.wiki │ │ ├── también.wiki │ │ ├── -acho.wiki │ │ ├── buque_mercante.wiki │ │ ├── los.wiki │ │ └── hocico.wiki │ ├── it │ │ ├── muratrici.wiki │ │ ├── debolmente.wiki │ │ ├── brillantino.wiki │ │ ├── modalità Goblin.wiki │ │ ├── lettore.wiki │ │ └── condividere.wiki │ ├── ja │ │ ├── みてる.wiki │ │ ├── 有する.wiki │ │ ├── 駐.wiki │ │ ├── 併.wiki │ │ └── V.wiki │ └── el │ │ ├── -αίικο.wiki │ │ ├── τσιγγάνα.wiki │ │ └── -ης.wiki ├── test_5_gen_dict.py ├── test_4_get_word.py ├── update-data.py ├── test_2_utils.py └── test_zh.py ├── .well-known └── funding-manifest-urls ├── wikidict ├── cover.png ├── caches │ ├── svg.gz │ └── __init__.py ├── lang │ ├── en │ │ ├── template_adapters.py │ │ └── template_overrides.py │ ├── ja │ │ ├── template_adapters.py │ │ └── variant_handlers.py │ ├── ro │ │ ├── template_overrides.py │ │ └── variant_handlers.py │ ├── pt │ │ ├── template_adapters.py │ │ └── variant_handlers.py │ ├── ca │ │ ├── template_adapters.py │ │ └── variant_handlers.py │ ├── sv │ │ ├── variant_handlers.py │ │ └── __init__.py │ ├── no │ │ └── variant_handlers.py │ ├── it │ │ └── variant_handlers.py │ ├── de │ │ └── variant_handlers.py │ ├── eo │ │ └── variant_handlers.py │ ├── defaults.py │ ├── fr │ │ ├── template_adapters.py │ │ └── template_overrides.py │ └── ru │ │ └── variant_handlers.py ├── __init__.py ├── stubs.py ├── svg.py ├── namespaces.py ├── gen_dict.py ├── show_pos.py └── constants.py ├── requirements-tests.txt ├── add-to-cache.py ├── .gitattributes ├── check.sh ├── requirements.txt ├── .gitignore ├── log-analyzer.py ├── LICENSE ├── server.py ├── pyproject.toml └── DEBUG.md /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Fixes # 2 | -------------------------------------------------------------------------------- /tests/data/fr/no section.wiki: -------------------------------------------------------------------------------- 1 | {{voir|Hafnium}} 2 | -------------------------------------------------------------------------------- /.well-known/funding-manifest-urls: -------------------------------------------------------------------------------- 1 | https://www.tiger-222.fr/funding.json 2 | 3 | -------------------------------------------------------------------------------- /wikidict/cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reader-dict/monolingual/HEAD/wikidict/cover.png -------------------------------------------------------------------------------- /tests/data/de/daß.wiki: -------------------------------------------------------------------------------- 1 | == daß ({{Sprache|Deutsch}}) == 2 | {{Alte Schreibweise|dass|Reform 1996}} 3 | -------------------------------------------------------------------------------- /wikidict/caches/svg.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reader-dict/monolingual/HEAD/wikidict/caches/svg.gz -------------------------------------------------------------------------------- /tests/data/eo/ekamus.wiki: -------------------------------------------------------------------------------- 1 | =={{Lingvo|eo}}== 2 | ==={{Vortospeco|verba formo|eo}}=== 3 | 4 | {{form-eo}} 5 | -------------------------------------------------------------------------------- /tests/data/sv/dufvor.wiki: -------------------------------------------------------------------------------- 1 | ==Svenska== 2 | ===Substantiv=== 3 | '''dufvor''' 4 | #{{böjning|sv|subst|dufva}} 5 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [BoboTiG] 4 | patreon: mschoentgen 5 | -------------------------------------------------------------------------------- /tests/data/sv/harmonierar.wiki: -------------------------------------------------------------------------------- 1 | ==Svenska== 2 | ===Verb=== 3 | '''harmonierar''' 4 | #{{böjning|sv|verb|harmoniera}} 5 | -------------------------------------------------------------------------------- /tests/data/en/memoized.wiki: -------------------------------------------------------------------------------- 1 | ==English== 2 | 3 | ===Verb=== 4 | {{head|en|verb form}} 5 | 6 | # {{infl of|en|memoize||ed-form}} 7 | -------------------------------------------------------------------------------- /wikidict/lang/en/template_adapters.py: -------------------------------------------------------------------------------- 1 | adapters = { 2 | "Template:taxlinknew": lambda _: "{{#if:{{{1|}}}|''{{{1}}}''|}}", 3 | } 4 | -------------------------------------------------------------------------------- /tests/data/pt/tenui-.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Antepositivo== 3 | '''te.nu.i-''' 4 | # {{varort|tenu-|pt}} 5 | [[Categoria:Antepositivo (Português)]] 6 | -------------------------------------------------------------------------------- /tests/data/no/gjente.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Subjektiv=== 3 | '''{{PAGENAME}}''' {{f}} {{norm|høgnorsk}} 4 | 5 | #[[jente]] 6 | 7 | [[kategori:Høgnorsk]] 8 | -------------------------------------------------------------------------------- /tests/data/pt/Ku.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | [[Categoria:Símbolo (Português)]] 3 | '''{{PAGENAME}}''' ''símbolo'' 4 | 5 | # símbolo químico do [[kurtschatóvio]] 6 | -------------------------------------------------------------------------------- /tests/data/ro/MHz.wiki: -------------------------------------------------------------------------------- 1 | {{Ajutor|Traduceri în alte limbi}} 2 | =={{limba|conv}}== 3 | {{-simbol-|conv}} 4 | '''MHz''' 5 | #[[simbol]] pentru [[megahertz]] 6 | -------------------------------------------------------------------------------- /tests/data/eo/💀.wiki: -------------------------------------------------------------------------------- 1 | == translingva == 2 | === Signo === 3 | {{livs|mul|KK}} 4 | {{signo|des=SKULL}} 5 | {{k|mul|T: BILD.}} 6 | 7 | ===={{Signifoj}}==== 8 | # morto 9 | -------------------------------------------------------------------------------- /tests/data/no/vg..wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Forkortelse=== 3 | {{infl|no|fork}} 4 | 5 | # forkortelse for ''[[videregående]]''/''[[videregåande]]'' 6 | 7 | [[Kategori:Forkortelser i norsk]] 8 | -------------------------------------------------------------------------------- /wikidict/lang/ja/template_adapters.py: -------------------------------------------------------------------------------- 1 | adapters = { 2 | "テンプレート:wikipedia-s": lambda _: "[[{{{1}}}{{#if: {{{label|}}} | {{!}}{{{label}}} | {{#if: {{{2|}}} | {{!}}{{{2|}}} }} }}]]", 3 | } 4 | -------------------------------------------------------------------------------- /tests/data/no/sviger-.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Prefiks=== 3 | {{no-pref}} 4 | 5 | #som befinner seg i inngiftet [[familie]] 6 | 7 | ====Se også==== 8 | *[[:Kategori:Ord i norsk som starter på «sviger-»]] 9 | -------------------------------------------------------------------------------- /tests/data/no/ØNH.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Forkortelse=== 3 | {{infl|no|fork}} 4 | 5 | ====Forklaring==== 6 | # forkortelse for ''[[øre]]-[[nese]]-[[hals]]'' 7 | 8 | [[Kategori:Forkortelser i norsk]] 9 | 10 | -------------------------------------------------------------------------------- /tests/data/da/▶.wiki: -------------------------------------------------------------------------------- 1 | {{=mul=}} 2 | {{-descr-}} 3 | En trekant som peger til højre 4 | {{-symb-|mul}} 5 | {{pn}} 6 | # knap som bruges til at afspille en video, lyd el. musik 7 | {{-also2-}} 8 | {{mul-media-buttons}} 9 | -------------------------------------------------------------------------------- /tests/data/pt/sublist.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Adjetivo== 3 | '''多山''' 4 | * '''Romanização''' 5 | ** '''[[Pinyin]]''': duo1 shan1 6 | 7 | # [[montanhoso]] 8 | #* ''A ros'''a''', a alun'''a''', a vid'''a''', lam'''a'''.'' 9 | -------------------------------------------------------------------------------- /requirements-tests.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | mobi==0.4.1 3 | mypy==1.19.1 4 | pytest==9.0.2 5 | pytest-dependency==0.6.0 6 | responses==0.25.8 7 | ruff==0.14.10 8 | ruff-api==0.2.1 9 | types-requests==2.32.4.20250913 10 | -------------------------------------------------------------------------------- /tests/data/ca/Mn..wiki: -------------------------------------------------------------------------------- 1 | {{vegeu|mn|Mn|MN}} 2 | 3 | == {{-ca-}} == 4 | {{-pronafi-}} Vegeu {{m|ca|mossèn}} 5 | 6 | === Abreviatura === 7 | {{entrada|ca|abreviatura}} 8 | 9 | # [[mossèn]] com a tractament davant el nom 10 | -------------------------------------------------------------------------------- /tests/data/pt/ensimesmariam.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | =={{Forma verbal|pt}}== 3 | '''ensimesmariam''' 4 | # [[terceira pessoa]] do [[plural]] do [[futuro do pretérito]] do verbo '''[[ensimesmar]]''' 5 | 6 | {{aviso.flexão|ensimesmar}} 7 | -------------------------------------------------------------------------------- /tests/data/pt/nomenclaturar.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Verbal== 3 | {{oxítona|no|men|cla|tu|rar}} 4 | # fazer a [[nomenclatura]] de 5 | 6 | ===Conjugação=== 7 | {{conj/pt|nomenclatur|ar}} 8 | 9 | [[Categoria:Verbo (Português)]] 10 | -------------------------------------------------------------------------------- /tests/data/da/-ør.wiki: -------------------------------------------------------------------------------- 1 | {{=da=}} 2 | {{-etym-}} 3 | Fra [[:w:fransk (sprog)|fransk]]: [[-eur]], af [[:w:latin (sprog)|latin]] [[-ator]]. 4 | {{-end-|da}} 5 | {{pn}} 6 | #Betegner den, der udfører et arbejde. 7 | #:''For eksempel [[gravør]]; den der graverer.'' 8 | -------------------------------------------------------------------------------- /tests/data/da/disse.wiki: -------------------------------------------------------------------------------- 1 | {{=da=}} 2 | {{-dem-pronom-|da}} 3 | {{pn}} 4 | # Flertal af [[denne]] 5 | {{-trans-}} 6 | * {{en}}: {{trad|en|these}} 7 | * {{de}}: {{trad|de|diese}} 8 | {{-noun-|da}} 9 | {{pn}} 10 | # ikke noget 11 | #: ''ikke en disse'' 12 | -------------------------------------------------------------------------------- /tests/data/ro/frumoasă.wiki: -------------------------------------------------------------------------------- 1 | =={{limba|ron}}== 2 | {{-etimologie-}} 3 | Din ''frumos''. 4 | {{-pronunție-}} 5 | * {{AFI}}: {{AFI|/fru'mo̯a.sə/}} 6 | {{-adjectiv-}} 7 | #''forma de feminin singular pentru'' [[frumos]]. 8 | [[Categorie:{{flex. adj}} {{ron}}]] 9 | -------------------------------------------------------------------------------- /tests/data/eo/♍.wiki: -------------------------------------------------------------------------------- 1 | {{character info}} 2 | =={{Lingvo|mul}}== 3 | ==={{Vortospeco|signo|mul}}=== 4 | 5 | ===={{Signifoj}}==== 6 | # {{k|mul|F: astrologio}} zodiaka signo de [[Virgulino]] (''[[Virgo]]'') 7 | 8 | {{nevideblafinodesekcio}} 9 | {{emojibox}} 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/template-add.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: New template 3 | about: Issue to request the support for a new template. 4 | title: '[LOCALE] Support "xxx" template' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | - Wiktionary page: 11 | - Model link: 12 | -------------------------------------------------------------------------------- /tests/data/no/Øyvind.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Egennavn=== 3 | {{no-egen}} 4 | 5 | # [[norsk|Norsk]] mannsnavn 6 | [[Kategori:no:Mannsnavn]] 7 | 8 | ====Andre former==== 9 | *[[Øivind]] 10 | 11 | Navnet er kjent pga uttrykket: "Øyvind het han og gråt da han ble født" 12 | -------------------------------------------------------------------------------- /tests/data/ro/portocale.wiki: -------------------------------------------------------------------------------- 1 | =={{limba|ron}}== 2 | {{-etimologie-}} 3 | Din ''portocală''. 4 | {{-pronunție-}} 5 | * {{AFI}}: {{AFI|/por.toˈka.le/}} 6 | {{-substantiv-}} 7 | #''forma de plural nearticulat pentru'' '''[[portocală]]'''. 8 | [[Categorie:{{flex. subst}} {{ron}}]] 9 | -------------------------------------------------------------------------------- /tests/data/ca/expertes.wiki: -------------------------------------------------------------------------------- 1 | == {{-ca-}} == 2 | {{ca-pron|è}} 3 | 4 | === Adjectiu === 5 | {{ca-adj-forma|fp}} 6 | 7 | # {{forma-p|ca|experta}} 8 | 9 | === Nom === 10 | {{ca-nom-forma|fp}} 11 | 12 | # {{forma-p|ca|experta}} 13 | 14 | === Miscel·lània === 15 | * {{ca-sil}} 16 | -------------------------------------------------------------------------------- /tests/data/ro/aventurierul.wiki: -------------------------------------------------------------------------------- 1 | =={{limba|ron}}== 2 | {{-etimologie-}} 3 | Din ''aventurier''. 4 | {{-pronunție-}} 5 | * {{AFI}}: {{AFI|/a.ven.tu.riˈe.rul/}} 6 | {{-substantiv-}} 7 | #''forma de singular articulat pentru'' '''[[aventurier]]'''. 8 | [[Categorie:{{flex. subst}} {{ron}}]] 9 | -------------------------------------------------------------------------------- /add-to-cache.py: -------------------------------------------------------------------------------- 1 | from wikidict import caches 2 | 3 | # Regexp for search & replace: 4 | # `- \[ \] \d+-\d+-\d+ \d+:\d+:\d+ WARNING:wikidict\.svg:\d+ \[new SVG\] ` 5 | new_data: dict[str, str] = dict([ 6 | 7 | ]) 8 | print(len(new_data)) 9 | caches.expand_cache_file("svg", new_data) 10 | -------------------------------------------------------------------------------- /tests/data/da/PMV.wiki: -------------------------------------------------------------------------------- 1 | {{=da=}} 2 | {{-alt-}} 3 | [[pmv]] 4 | {{-noun-|da}} 5 | {{pn}} {{c}} 6 | # {{militær|engelsk}} {{abbr of|lang=da|pansret mandskabsvogn}} 7 | {{-trans-}} 8 | {{trans-top|Oversættelser}} 9 | *{{en}}: {{t|en|APC}} 10 | {{trans-bottom}} 11 | {{-ref-}} 12 | *{{DDO}} 13 | -------------------------------------------------------------------------------- /tests/data/no/funnet.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Substantiv=== 3 | '''{{PAGENAME}}''' {{norm|nb=ja|nrm=ja|nn=ja}} 4 | 5 | #{{no-sub-bøyningsform|be|funn|nb=ja|nrm=ja|nn=ja}} 6 | 7 | ===Verb=== 8 | '''{{PAGENAME}}''' {{norm|nb=ja|nrm=ja}} 9 | #{{no-verb-bøyningsform|pret|finne|nb=ja|nrm=ja}} 10 | -------------------------------------------------------------------------------- /tests/data/no/Kiberg.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | 3 | ===Ordklasse=== 4 | substantiv, egennavn 5 | 6 | # et tettsted i [[Vardø]] kommune i [[Finnmark]] 7 | 8 | ===Demonymer (innbyggernavn)=== 9 | *Bokmål: [[kibergsværing]] 10 | *Nynorsk: [[kibergsværing]] 11 | 12 | [[kategori:Tettsteder i Finnmark]] 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Language aware diff headers 2 | # https://tekin.co.uk/2020/10/better-git-diff-output-for-ruby-python-elixir-and-more 3 | # https://gist.github.com/tekin/12500956bd56784728e490d8cef9cb81 4 | # https://github.com/git/git/blob/master/userdiff.c 5 | *.md diff=markdown 6 | *.py diff=python 7 | 8 | # Binary data types 9 | *.zip binary 10 | -------------------------------------------------------------------------------- /tests/data/zh/七講八講.wiki: -------------------------------------------------------------------------------- 1 | ==漢語== 2 | {{zh-forms|s=七讲八讲}} 3 | 4 | ===發音=== 5 | {{zh-pron 6 | |mn=xm,twt,ph:chhit-kóng#-poeh-kóng/twk:chhit-kóng#-peh-kóng 7 | |w=sh:7chiq kaon paq kaon 8 | |cat=v 9 | }} 10 | 11 | ===動詞=== 12 | {{zh-verb}} 13 | 14 | # {{lb|zh|漳泉話|吳語}} [[亂講]]、[[胡說]] 15 | # {{lb|zh|柳州官話}} 用各種方式解釋 16 | 17 | ====同義詞==== 18 | {{zh-dial|亂說}} 19 | -------------------------------------------------------------------------------- /tests/data/es/Mús..wiki: -------------------------------------------------------------------------------- 1 | == {{lengua|es}} == 2 | {{pron-graf}} 3 | 4 | === Etimología 1 === 5 | {{etimología}}. 6 | 7 | ==== {{abreviatura|es}} ==== 8 | ;1: ''Abreviatura lexicográfica convencional de la palabra'' [[música]]. 9 | 10 | ==== Traducciones ==== 11 | {{trad-arriba}} 12 | {{trad-abajo}} 13 | 14 | == Referencias y notas == 15 | 16 | -------------------------------------------------------------------------------- /check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Small script to ensure quality checks pass before submitting a commit/PR. 4 | # 5 | [ -f ./venv/bin/python ] && python_exec='./venv/bin/python' || python_exec='python' 6 | $python_exec -m ruff format wikidict tests scripts 7 | $python_exec -m ruff check --fix --unsafe-fixes wikidict tests scripts 8 | $python_exec -m mypy wikidict scripts tests 9 | -------------------------------------------------------------------------------- /tests/data/pt/quebrar galho.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Expressões== 3 | '''quebrar galho''' 4 | # resolver uma situação difícil ou complicada 5 | 6 | ==Ver também== 7 | ===No Wikcionário=== 8 | {{verTambém.Ini}} 9 | * [[desgalhar]] 10 | * [[esgalhar]] 11 | {{verTambém.NovaColuna}} 12 | * [[esnocar]] 13 | {{verTambém.Fim}} 14 | 15 | [[Categoria:Expressão (Português)]] 16 | -------------------------------------------------------------------------------- /tests/data/eo/Teodoriko.wiki: -------------------------------------------------------------------------------- 1 | =={{Lingvo|eo}}== 2 | {{Etimologio}} 3 | : El la itala. 4 | 5 | ==={{Vortospeco|persona nomo|eo}}, {{g|m}}=== 6 | 7 | {{Ekzemploj}} 8 | :[1] '''''Teodoriko''''', dirita la Granda, estis reĝo de ostrogotoj ekde 474 kaj reĝo de Italujo ekde 493 ĝis 526, dua ĝermana reĝo de Romo. 9 | 10 | ===={{Tradukoj}}==== 11 | # [[Théodoric]], [[Thierry]]. 12 | -------------------------------------------------------------------------------- /tests/data/pt/tique-taque.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Onomatopeia== 3 | '''{{grifar|ti}}.que-{{grifar|ta}}.que''' 4 | # imitativa do som compassado do mecanismo de um relógio a trabalhar 5 | 6 | ===Sinônimos=== 7 | * [[tic-tac]] 8 | 9 | ==Verbetes derivados== 10 | * [[tiquetaquear]] 11 | 12 | [[Categoria:Onomatopeia (Português)]] 13 | 14 | [[en:tique-taque]] 15 | [[zh:tique-taque]] 16 | -------------------------------------------------------------------------------- /tests/data/eo/latina.wiki: -------------------------------------------------------------------------------- 1 | =={{Lingvo|eo}}== 2 | 3 | {{Etimologio}} 4 | De [[Latino]] 5 | 6 | === Adjektivo === 7 | #rilata al [[Latino]]. 8 | 9 | ===={{Tradukoj}}==== 10 | {{trad-eko}} 11 | * angla: {{t|en|Latin}} 12 | * franca: {{t|fr|latin}} 13 | {{trad-mezo}} 14 | * germana: {{t|de|lateinisch}} 15 | {{trad-fino}} 16 | 17 | {{Referencoj}} 18 | * {{ref-Majstro|latina}} 19 | 20 | [[Kategorio:Radiko latin']] 21 | -------------------------------------------------------------------------------- /tests/data/fr/gèlent.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|fr}} == 2 | === {{S|verbe|fr|flexion}} === 3 | {{fr-verbe-flexion|geler|impers=oui|ind.p.3p=oui|sub.p.3p=oui}} 4 | '''gèlent''' {{pron|ʒɛl|fr}} 5 | # ''Troisième personne du pluriel du présent de l’indicatif de'' [[geler]]. 6 | # ''Troisième personne du pluriel du présent du subjonctif de'' [[geler]]. 7 | 8 | === {{S|prononciation}} === 9 | ==== {{S|homophones|fr}} ==== 10 | * [[gel]] 11 | -------------------------------------------------------------------------------- /wikidict/lang/ro/template_overrides.py: -------------------------------------------------------------------------------- 1 | from ... import utils 2 | 3 | 4 | def template_trad(args: tuple[str, ...]) -> str: 5 | """ 6 | Temporary until #2541 is fixed. 7 | 8 | >>> template_trad(("trad", "el", "παρα")) 9 | 'παρα' 10 | """ 11 | parts = list(args[1:]) 12 | utils.extract_keywords_from(parts) 13 | return parts[1] 14 | 15 | 16 | overrides = { 17 | "trad": template_trad, 18 | } 19 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # GitHub Actions 4 | - package-ecosystem: github-actions 5 | directory: / 6 | schedule: 7 | interval: weekly 8 | assignees: 9 | - BoboTiG 10 | 11 | # Python requirements 12 | - package-ecosystem: pip 13 | directory: / 14 | schedule: 15 | interval: weekly 16 | assignees: 17 | - BoboTiG 18 | labels: 19 | - dependencies 20 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | docopt==0.6.2 2 | Jinja2==3.1.6 3 | marisa-trie==1.3.1 4 | mediawiki-langcodes==0.2.18 # for up-to-date langs data, required by wikitextprocessor 5 | mistune==3.1.4 # for DictFile reading 6 | pyglossary==5.2.1 7 | python-idzip==0.3.10 8 | requests==2.32.5 9 | rich==14.2.0 10 | scour==0.38.2 11 | wikitextparser==0.56.4 12 | git+https://github.com/reader-dict/wikitextprocessor.git@e571b26e7737fd848a0cb41e717837b8101ad4af 13 | -------------------------------------------------------------------------------- /tests/data/it/muratrici.wiki: -------------------------------------------------------------------------------- 1 | == {{-it-}} == 2 | {{-sost form-|it}} 3 | {{Pn}} ''f pl'' {{Tabs|muratore|muratori|muratrice|muratore|f2=muratora|fp2=muratrici}} 4 | 5 | # femminile plurale alternativo di [[muratore]] 6 | 7 | {{-sill-}} 8 | ; mu | ra | trì | ci 9 | 10 | {{-etim-}} 11 | {{Etim-link|muratore}} 12 | 13 | {{-trad-}} 14 | {{Trad1|}} 15 | :*{{en}}: 16 | {{Trad2}} 17 | -------------------------------------------------------------------------------- /tests/data/ca/halloweeniana.wiki: -------------------------------------------------------------------------------- 1 | == {{-ca-}} == 2 | {{ca-pron|hhālōwiniāna}} 3 | 4 | === Adjectiu === 5 | {{ca-adj-forma|f}} 6 | 7 | # {{forma-f|ca|halloweenià}} 8 | 9 | === Miscel·lània === 10 | * {{ca-sil|ha·llo·wee·ni·a·na}} 11 | 12 | == {{-es-}} == 13 | {{es-pron|jalogüiniana}} 14 | 15 | === Adjectiu === 16 | {{es-adj-forma|f}} 17 | 18 | # {{forma-f|es|halloweeniano}} 19 | 20 | === Miscel·lània === 21 | * {{es-sil|ha·llo·wee·nia·na}} 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/template-modification.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Template modification 3 | about: Issue to request a change in a template handler. 4 | title: '[LOCALE] Fix support for "xxx" template' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | - Wiktionary page: 11 | 12 | Wikicode: 13 | ``` 14 | 15 | ``` 16 | 17 | Output: 18 | ``` 19 | 20 | ``` 21 | 22 | Expected: 23 | ``` 24 | 25 | ``` 26 | 27 | --- 28 | 29 | Model link, if any: 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Project stuff 2 | *.sha256 3 | *.bz2 4 | *.df 5 | *.json 6 | *.log 7 | *.mobi 8 | *.xml 9 | *.zip 10 | dict-data.* 11 | sections.txt 12 | templates.txt 13 | /data/ 14 | tests/data/*/*/ 15 | 16 | # Project stuff - update-bot.py 17 | apicache-py3 18 | pywikibot.lwp 19 | throttle.ctrl 20 | 21 | # Python stuff 22 | .coverage* 23 | __pycache__/ 24 | .mypy_cache/ 25 | .pytest_cache/ 26 | .ruff_cache/ 27 | venv/ 28 | 29 | # macOS 30 | .DS_Store 31 | -------------------------------------------------------------------------------- /tests/data/es/buena.wiki: -------------------------------------------------------------------------------- 1 | {{desambiguación|Buena}} 2 | 3 | == {{lengua|es}} == 4 | {{pron-graf|fone=ˈbwe.na|pron=estándar}} 5 | 6 | === Forma adjetiva === 7 | ;1: {{forma adjetivo|bueno|femenino}}. 8 | 9 | === {{interjección|es}} === 10 | ;2: Expresión de saludo. [[hola|Hola]]. 11 | {{uso|coloquial}} 12 | {{ámbito|Chile}} 13 | ;3: Úsase para expresar satisfacción u aprobación. 14 | {{uso|coloquial}} 15 | {{ámbito|Chile}} 16 | 17 | == Referencias y notas == 18 | 19 | -------------------------------------------------------------------------------- /wikidict/lang/pt/template_adapters.py: -------------------------------------------------------------------------------- 1 | adapters = { 2 | "Predefinição:etimo2": lambda body: body.replace(":{{#seigual", "{{#seigual", count=1).replace( 3 | '{{#se:{{codlingua-codwmf|{{{1}}}}}| ([[:{{codlingua-codwmf|{{{1}}}}}:{{{2}}}|{{codlingua-codwmf|{{{1}}}}}]])}}', 4 | "", 5 | ), 6 | "Predefinição:étimo junção": lambda body: body.replace(":De", "De", count=1), 7 | } 8 | -------------------------------------------------------------------------------- /tests/data/fr/colligeait.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|fr}} == 2 | === {{S|verbe|fr|flexion}} === 3 | {{fr-verbe-flexion|colliger|ind.i.3s=oui}} 4 | '''colligeait''' {{pron|kɔ.li.ʒɛ|fr}} 5 | # ''Troisième personne du singulier de l’indicatif imparfait du verbe'' [[colliger]]. 6 | #* {{exemple | lang=fr 7 | | Chazelle '''colligeait''' les prospectus de librairie, les affiches à lithographies et à dessins ; mais il ne souscrivait à rien. 8 | | source=Honoré de Balzac, ''Les Employés'', édition définitive}} 9 | -------------------------------------------------------------------------------- /tests/data/da/et.wiki: -------------------------------------------------------------------------------- 1 | {{=da=}} 2 | {{-art-|da}} 3 | {{pn}} 4 | #intetkøn af [[en]] 5 | #:''[[jeg|Jeg]] [[ønske]]r [[mig]] [[et]] [[skib]]'' 6 | 7 | {{=et=}} 8 | {{-conj-|et}} 9 | {{pn}} 10 | #[[at]] 11 | 12 | {{=fr=}} 13 | {{-conj-|fr}} 14 | {{pn}} 15 | #[[og]] 16 | {{-rel-}} 17 | # et .. et. 18 | #* [[såvel]] .. [[som]]. 19 | 20 | 21 | {{=la=}} 22 | {{-conj-|la}} 23 | {{pn}} 24 | #[[og]] 25 | 26 | {{=tr=}} 27 | {{-noun-|tr}} 28 | {{pn}} 29 | # [[kød]] 30 | [[Kategori:Mad på tyrkisk]] 31 | -------------------------------------------------------------------------------- /tests/data/pt/não tenho trocado.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Frase== 3 | '''não tenho trocado''' 4 | # usado por prestador de serviço para informar que não tem dinheiro amiúde que possa servir de troco ao valor pago por cliente 5 | # usado por cliente de serviço para informar que não tem dinheiro amiúde que possa servir de diferença ao valor maior pretendido para devolução pelo prestador de serviço quando este não tem o valor em moeda exato para devolver ao cliente 6 | 7 | [[Categoria:Livro de frases (Português)]] 8 | -------------------------------------------------------------------------------- /tests/data/es/hala.wiki: -------------------------------------------------------------------------------- 1 | == {{lengua|es}} == 2 | === Etimología === 3 | {{etimología|incierta}}. {{etimología|expresiva}}. 4 | {{pron-graf|acentuación=grave|audio=LL-Q1321_(spa)-Rodelar-ala.wav|división=ha - la|fone=ˈa.la|homófono=ala|longitud_silábica=2|número_letras=4}} 5 | 6 | === Forma verbal === 7 | ;1: {{forma verbo|halar|m=indicativo|p=3s|t=presente}}. 8 | ;2: {{forma verbo|halar|afirmativo=sí|p=2s|t=imperativo}}. 9 | 10 | === {{interjección|es}} === 11 | ;1: Expresión para demandar prisa o sorpresa. 12 | {{sinónimo|ala|alá}} 13 | -------------------------------------------------------------------------------- /tests/data/zh/稍後.wiki: -------------------------------------------------------------------------------- 1 | ==漢語== 2 | {{zh-forms|s=稍后}} 3 | 4 | ===發音=== 5 | {{zh-pron 6 | |m=shāohòu 7 | |c=saau2 hau6 8 | |cat=adv,v 9 | }} 10 | 11 | ===副詞=== 12 | {{head|zh|副詞}} 13 | 14 | # 在[[短暫]]的[[時間]]之後 15 | #: {{zh-x|網絡 錯誤,請 稍後 重{chóng}試。 |}} 16 | 17 | ===動詞=== 18 | {{zh-verb}} 19 | 20 | # {{misspelling of|zh|稍候}} 21 | #* {{zh-x|之後,開始 漫長 的 音樂 等待,然後 出現 語音:「現在 客服 全部 忙線 中,請 稍後,我們 盡快 為 您 服務……」||ref='''2015'''年,洪繡巒《行家這樣做好服務》,台北:時報文化,{{ISBN|978-957-13-6308-0}},page 67}} 22 | 23 | ====引文==== 24 | * {{seemoreCites|zh}} 25 | 26 | {{C|zh|時間}} 27 | -------------------------------------------------------------------------------- /tests/data/ca/-ass-.wiki: -------------------------------------------------------------------------------- 1 | == {{-ca-}} == 2 | {{-etimologia-}} Del sufix {{m|ca|-às}} amb valor augmentatiu. 3 | 4 | === Infix === 5 | {{entrada|ca|infix|lema=-ass-}} 6 | 7 | # Infix que afegeix un [[matís]] [[augmentatiu]]. 8 | 9 | ==== Notes d'us ==== 10 | * Se sol utilitzar per formar verbs a partir de verbs de la primera conjugació resultant la terminació ''-assar''. 11 | * Alguns verbs acabats en ''-assar'' estan formats per un radical en ''-às'', ''-assa'', ''-aç'' més la desinència ''-ar''. 12 | 13 | {{-der-}} 14 | {{vegeu-der-afix|ca}} 15 | -------------------------------------------------------------------------------- /tests/data/ca/PMF.wiki: -------------------------------------------------------------------------------- 1 | == {{-ca-}} == 2 | {{ca-pron|péémaéfa|rima=}} 3 | 4 | === Sigles === 5 | {{sigles|ca|nc|f-p}} 6 | 7 | # {{sigles de|ca|[[preguntes]] [[més]] [[freqüents]]}} 8 | 9 | {{-trad-}} 10 | {{t-inici}} 11 | * {{en}} : {{trad|en|FAQ}} 12 | * {{es}} : {{trad|es|PMF|f}} 13 | * {{fi}} : {{trad|fi|UKK}} 14 | * {{fr}} : {{trad|fr|FAQ|f}} 15 | {{t-final}} 16 | 17 | == {{-es-}} == 18 | {{es-pron|/ˌpeˈe.meˌe.fe/|rima=}} 19 | 20 | === Sigles === 21 | {{sigles|es|nc|f-p}} 22 | 23 | # {{sigles de|es|[[preguntas]] [[más]] [[frecuentes]]}} 24 | -------------------------------------------------------------------------------- /tests/data/de/Informationsverlusts.wiki: -------------------------------------------------------------------------------- 1 | == Informationsverlusts ({{Sprache|Deutsch}}) == 2 | === {{Wortart|Deklinierte Form|Deutsch}} === 3 | 4 | {{Nebenformen}} 5 | :[[Informationsverlustes]] 6 | 7 | {{Worttrennung}} 8 | :In·for·ma·ti·ons·ver·lusts 9 | 10 | {{Aussprache}} 11 | :{{IPA}} {{Lautschrift|ɪnfɔʁmaˈt͡si̯oːnsfɛɐ̯ˌlʊst͡s}} 12 | :{{Hörbeispiele}} {{Audio|De-Informationsverlusts.ogg}} 13 | 14 | {{Grammatische Merkmale}} 15 | *Genitiv Singular des Substantivs '''[[Informationsverlust]]''' 16 | 17 | {{Grundformverweis Dekl|Informationsverlust}} 18 | -------------------------------------------------------------------------------- /tests/data/no/uten.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Preposisjon=== 3 | {{nb-prep}} 4 | #som ikke har;som [[mangler]] 5 | 6 | ====Andre skrivemåter==== 7 | {{nynorsk|utan}} 8 | 9 | ====Uttale==== 10 | (Nord-Trøndelag) IPA: ['ʉː.tⁿn̩] 11 | 12 | ====Antonymer==== 13 | *[[med]] 14 | 15 | ====Oversettelser==== 16 | {{overs-topp|som ikke har}} 17 | *{{overs|en|without}} 18 | *{{overs|eo|sen}} 19 | *{{overs|fr|sans}} 20 | *{{overs|ia|sin}} 21 | {{overs-midt}} 22 | *{{overs|pl|bez}} 23 | *{{overs|es|sin}} 24 | *{{overs|sv|utan}} 25 | *{{overs|de|ohne}} 26 | {{overs-bunn}} 27 | -------------------------------------------------------------------------------- /tests/data/da/skulle.wiki: -------------------------------------------------------------------------------- 1 | {{=da=}} 2 | {{-verb-|da}} 3 | {{pn}} ''([[modalverbum]])'' 4 | #Er nødt til at gøre. Forpligtet til at gøre. 5 | #:''Du skal bruge sikkerhedssele.'' 6 | {{-decl-}} 7 | {{da-verb|-|skulle|skal|skulle|har|skullet}} 8 | {{-syn-}} 9 | * [[måtte]] 10 | * [[burde]] 11 | {{-trans-}} 12 | {{(}} 13 | *{{en}}: {{t|en|must}}, {{t|en|shall}}, {{t|en|have to}} 14 | *{{fr}}: {{trad|fr|devoir}} 15 | {{-}} 16 | *{{nl}}: {{trad|nl|moeten}} 17 | *{{sv}}: {{t|sv|skola}}, {{t|sv|måste}}, {{t|sv|böra}} 18 | {{)}} 19 | 20 | [[Kategori:Modalverber på dansk]] 21 | -------------------------------------------------------------------------------- /tests/data/pt/ababalhar.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Verbo== 3 | {{oxítona|a|ba|ba|lhar}}, ''transitivo direto'' 4 | # {{escopo|pt|Popular}} [[babar]]; [[conspurcar]] 5 | ===Conjugação=== 6 | {{conj/pt|ababalh|ar}} 7 | =={{Etimologia|pt}}== 8 | : De [[baba]]. 9 | =={{Pronúncia|pt}}== 10 | ===Brasil=== 11 | ====Paulistana==== 12 | * AFI: /a.ba.ba.ˈʎa''ɾ''/ 13 | * SAMPA: /a.ba.ba."Lar/ 14 | ====Carioca==== 15 | * AFI: /a.ba.ba.ˈʎax/ 16 | * SAMPA: /a.ba.ba."Lax/ 17 | ==Ver também== 18 | ===No Wikcionário=== 19 | * [[ababalho]] 20 | 21 | [[Categoria:Verbo (Português)]] 22 | -------------------------------------------------------------------------------- /log-analyzer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | file = Path(sys.argv[1]) 5 | with file.open("rt", encoding="utf-8") as fh: 6 | jobs = {} 7 | for line in fh: 8 | if not line.startswith("INFO:wikidict.render"): 9 | continue 10 | job, word = line.split(" ", 1) 11 | job = job.split(":")[-1] 12 | if "Job done." in word: 13 | jobs.pop(job, None) 14 | else: 15 | jobs[job] = word.strip() 16 | 17 | main_proc = list(jobs.keys())[0] 18 | jobs.pop(main_proc, None) 19 | print(jobs) 20 | -------------------------------------------------------------------------------- /wikidict/lang/ca/template_adapters.py: -------------------------------------------------------------------------------- 1 | adapters = { 2 | "Plantilla:-etimologia-": lambda body: body.replace(':*Etimologia: ', ""), 3 | "Plantilla:etim-comp": lambda body: body.replace(":* '''Etimologia:''' ", ""), 4 | **dict.fromkeys( 5 | {"Plantilla:etim-fpref", "Plantilla:etim-fsuf", "Plantilla:etim-lang"}, 6 | lambda body: body.replace(':* Etimologia: ', ""), 7 | ), 8 | "Plantilla:etimologia": lambda body: body.replace(":*'''Etimologia:''' ", ""), 9 | } 10 | -------------------------------------------------------------------------------- /tests/data/es/ruego.wiki: -------------------------------------------------------------------------------- 1 | {{desambiguación||}} 2 | == {{lengua|es}} == 3 | {{pron-graf}} 4 | 5 | === Etimología 1 === 6 | {{etimología}}. 7 | 8 | ==== {{sustantivo masculino|es}} ==== 9 | {{es.sust}} 10 | ;1: {{plm|súplica}}, [[petición]] hecha con el fin de alcanzar lo que se pide.{{DLC1914|908}} 11 | 12 | ==== Traducciones ==== 13 | {{trad-arriba}} 14 | {{trad-abajo}} 15 | 16 | === Forma flexiva === 17 | ==== Forma verbal ==== 18 | ;1: {{forma verbo|rogar|p=1s|t=presente|m=indicativo}}. 19 | 20 | == Referencias y notas == 21 | 22 | -------------------------------------------------------------------------------- /tests/data/pt/anões.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | [[Categoria:Forma de substantivo (Português)]] 3 | ==Substantivo== 4 | '''anões''' ''masculino '' 5 | # [[plural]] [[de]] '''[[anão]]''' 6 | #* ''Sempre a tinham sentido como um património, alguns talvez mesmo a considerassem em reserva para a casta e judiosa decisão de um dia, sempre tinham existido naquele encantamento dos sete '''anões''', ouvindo-a um pouco com o coração quando ela vinha pelo caminho cimentado sob as ramadas de lusalite, gritando de longe " é tarde, é tarde ", como o coelho do, país das maravilhas. '' - Agustina Bessa Luiz - OS INCURAVEIS 7 | -------------------------------------------------------------------------------- /tests/data/ca/AFI.wiki: -------------------------------------------------------------------------------- 1 | {{vegeu|afi|afí}} 2 | 3 | == {{-ca-}} == 4 | {{ca-pron}} 5 | 6 | === Sigles === 7 | {{sigles|ca|np}} 8 | 9 | # {{marca|ca|masculí}} {{sigles de|ca|[[alfabet|Alfabet]] [[fonètic|Fonètic]] [[internacional|Internacional]]}} 10 | # {{marca|ca|femení}} {{sigles de|ca|[[associació|Associació]] [[fonètica|Fonètica]] [[internacional|Internacional]]}} 11 | 12 | {{-trad-}} 13 | {{t-inici}} 14 | * {{de}}: {{trad|de|IPA}} 15 | * {{en}}: {{trad|en|IPA}} 16 | * {{es}}: {{trad|es|AFI}} 17 | * {{fr}}: {{trad|fr|API}} 18 | {{t-final}} 19 | 20 | === Vegeu també === 21 | * {{Viquipèdia}} 22 | -------------------------------------------------------------------------------- /tests/data/ja/みてる.wiki: -------------------------------------------------------------------------------- 1 | == {{ja}} == 2 | [[Category:{{ja}}|みてる]] 3 | 4 | ===動詞 見てる・{{contraction}}=== 5 | [[Category:{{ja}}_{{contraction}}]] 6 | #「[[みる|み]][[て]][[いる]]」の音便 7 | 8 | === {{verb}}(中国地方) === 9 | [[Category:{{ja}}_{{verb}}|みてる]] 10 | [[Category:{{ja}}_中国方言|みてる]] 11 | # [[なくなる]]、[[消失]]する 12 | #*「ジュースがみてる。」 13 | 14 | === {{verb}}(土佐) === 15 | [[Category:{{ja}}_土佐弁|みてる]] 16 | # [[しぬ]]、[[くさる]] 17 | 18 | == 古典{{ja}} == 19 | [[Category:{{ojp}}|みてる]] 20 | 21 | 満てる・充てる 22 | # [[みちた]] 23 | #* 憐れみ満てる心 24 | 25 | ==== {{etym}} ==== 26 | 動詞[[みつ]] 27 | 28 | ==== {{rel}} ==== 29 | * {{syn}}:[[みちる]] 30 | -------------------------------------------------------------------------------- /tests/data/no/NS.wiki: -------------------------------------------------------------------------------- 1 | {{se også|nS|ns}} 2 | {{wikipediaartikkel}} 3 | 4 | ==Norsk== 5 | ===Initialord=== 6 | {{no-fork}} 7 | 8 | #''initialord for'' partiet [[:w:Nasjonal Samling|Nasjonal Samling]] 9 | #''initialord for'' [[:w:Norsk Standard|Norsk Standard]] 10 | 11 | ==Nederlandsk== 12 | ===Initialord=== 13 | {{infl|nl|fork}} 14 | 15 | #''initialord for'' jernbaneselskapet [[:w:Nederlandse Spoorwegen|Nederlandse Spoorwegen]] 16 | 17 | ==Tysk== 18 | ===Forkortelser=== 19 | {{infl|de|fork}} 20 | 21 | #''forkortelse for'' ''[[Nationalsozialismus]]'', norsk [[nasjonalsosialisme]] eller [[nazisme]] 22 | -------------------------------------------------------------------------------- /tests/data/pt/6.wiki: -------------------------------------------------------------------------------- 1 | {{caracteres info|hex=0036|nome=DIGIT SIX|morse=-....|anterior=5|próximo=7}} 2 | 3 | ={{-pt-}}= 4 | ==Forma de pronome== 5 | '''6''' 6 | # {{escopo|pt|Internetês}} [[cês]] 7 | 8 | [[Categoria:Forma de pronome (Português)]] 9 | 10 | 11 | ={{-mult-}}= 12 | =={{Símbolo|mult}}== 13 | '''6''' 14 | # [[algarismo]] [[indo-arábico]] que representa o [[numeral]] [[seis]] 15 | 16 | ==Ver também== 17 | ===No Wikcionário=== 18 | {{Algarismos indo-arábicos}} 19 | {{números indo-arábicos}} 20 | 21 | [[Categoria:Algarismo indo-arábico]] 22 | [[Categoria:Numeral cardinal (Multilíngue)|# 000006]] 23 | -------------------------------------------------------------------------------- /tests/data/da/godt nytår.wiki: -------------------------------------------------------------------------------- 1 | {{=da=}} 2 | {{-frase-|da}} 3 | {{pn}} 4 | #En hilsen der siges omkring den 1. januar. 5 | {{-trans-}} 6 | {{(}} 7 | *{{en}}: {{trad|en|happy new year}} 8 | *{{fo}}: {{trad|fo|gott nýggjár}} 9 | *{{O|fr|bon nouvel an}} 10 | *{{O|it|felice anno nuovo}} 11 | *{{zh}}: {{trad|zh|新年好}} (xīn nián hào), {{trad|zh|恭喜发财}} (gōngxǐ fācái) 12 | *{{nl}}: {{trad|nl|gelukkig nieuwjaar}} 13 | {{-}} 14 | *{{no}}: {{trad|no|godt nytt år}} 15 | *{{O|pt|feliz ano novo}} 16 | *{{O|es|feliz año nuevo}} 17 | *{{sv}}: {{trad|sv|gott nytt år}} 18 | *{{O|cs|šťastný nový rok}} 19 | *{{O|de|frohes neues Jahr}} 20 | {{)}} 21 | -------------------------------------------------------------------------------- /wikidict/lang/sv/variant_handlers.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | def render_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 5 | """ 6 | >>> render_variant("böjning", ["sv", "subst", "boll"], defaultdict(str), "") 7 | 'boll' 8 | >>> render_variant("avledning", ["sv", "abnorm", "adj"], defaultdict(str), "") 9 | 'abnorm' 10 | """ 11 | return parts[1 if tpl.endswith("avledning") else -1] 12 | 13 | 14 | handlers = { 15 | **dict.fromkeys( 16 | { 17 | "avledning", 18 | "böjning", 19 | }, 20 | render_variant, 21 | ) 22 | } 23 | -------------------------------------------------------------------------------- /tests/data/pt/COPOM.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Acrônimo== 3 | '''COPOM''', {{m}} 4 | # [[centro|'''C'''entro]] de [[operação|'''O'''perações]] da [[Polícia Militar|'''Po'''lícia '''M'''ilitar]] 5 | # {{escopo2|Brasil|governo}} [[comitê|'''Co'''mitê]] de [[política|'''Po'''lítica]] [[monetário|'''M'''onetária]] 6 | 7 | ==={{-varort-}}=== 8 | * [[Copom]] 9 | 10 | =={{pronúncia|pt}}== 11 | ===Brasil=== 12 | * AFI: {{AFI|/ko.ˈpõ/}} 13 | 14 | ==Ver também== 15 | ===Na Wikipédia=== 16 | * [[w:Comitê de Política Monetária|Comitê de Política Monetária]] 17 | 18 | [[Categoria:Acrônimo (Português)]] 19 | [[Categoria:Sigla (Português)]] 20 | [[Categoria:Economia (Português)]] 21 | [[Categoria:Segurança (Português)]] 22 | -------------------------------------------------------------------------------- /tests/data/pt/neo-.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Prefixo== 3 | '''neo-''' 4 | # exprime a ideia de ''[[novo]]'' 5 | {{verTambém.Ini}} 6 | * '''Nota:''' Liga-se por hífen ao morfema seguinte quando este começa por '''vogal''', '''h''', '''r''' ou '''s'''. 7 | *: '''[[e.g.]]:''' [[neo-árico]], [[neo-helénico]], [[neo-impressionismo]], [[neo-realismo]], [[neófito]], [[neófobo]], etc 8 | {{verTambém.Fim}} 9 | 10 | ===Sinônimo=== 11 | * [[novi-]] 12 | 13 | =={{etimologia|pt}}== 14 | {{etimo2|grc|νέος|pt}} 15 | 16 | ==Ver também== 17 | ===No Wikcionário=== 18 | {{verTambém.Ini}} 19 | * [[nuper-]] 20 | {{verTambém.NovaColuna}} 21 | * [[recém-]] 22 | {{verTambém.Fim}} 23 | 24 | [[Categoria:Prefixo (Português)]] 25 | -------------------------------------------------------------------------------- /tests/data/fr/venoient.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|fr}} == 2 | === {{S|verbe|fr|flexion}} === 3 | '''venoient''' {{pron||fr}} 4 | # ''Ancienne forme de la troisième personne du pluriel de l’indicatif imparfait du verbe'' [[venir]] (on écrit maintenant ''[[venaient]]''). 5 | #* {{exemple | lang=fr 6 | | Ils trouvèrent nos bons Chrétiens et Néophites, lors qu’ils '''venoient''' de décabaner pour enfoncer davantage dans les bois, et qu’ils étoient chargez de femmes, d’enfans et de bagage; ce qui ne leur donna pas le loisir de se mettre en défense. 7 | | source=''Marie de l’Incarnation Ursuline, ''Correspondance'', Abbaye Saint-Pierre de Solesmes, 1971, page 325}} 8 | 9 | === {{S|anagrammes}} === 10 | {{voir anagrammes|fr}} 11 | -------------------------------------------------------------------------------- /tests/data/da/bakterie.wiki: -------------------------------------------------------------------------------- 1 | {{også|Bakterie}} 2 | {{=da=}} 3 | {{-noun-|da}} 4 | {{pn}} {{c}} 5 | # (mikrobiologi) en [[encellet]] [[mikroskopisk]] [[organisme]] uden [[cellekerne]] 6 | {{-etym-}} 7 | : fra latin ''bacterium'', latinisering af græsk ''bakterion'' (βακτήριον - lille stav), diminutiv af ''baktron'' (βάκτρον - stav) 8 | {{-syn-}} 9 | * [[bacille]] (ældre sprogbrug) 10 | {{-decl-}} 11 | {{da-noun|en|bakterie|bakterien|bakterier|bakterierne}} 12 | {{-trans-}} 13 | {{(}} 14 | :*{{en}}: {{trad|en|bacterium}} 15 | :*{{el}}: {{trad|el|βακτηρίδιο}} 16 | :*{{nl}}: {{trad|nl|bacterie}} 17 | {{-}} 18 | :*{{pl}}: {{t|pl|bakteria|f}} 19 | :*{{sv}}: {{t|sv|bakterie|c}} 20 | :*{{de}}: {{t|de|Bakterie|f}} 21 | {{)}} 22 | -------------------------------------------------------------------------------- /tests/data/da/tolvte.wiki: -------------------------------------------------------------------------------- 1 | {{=da=}} 2 | {{-etym-}} 3 | Fra {{etyl|non|da}} {{term|tolfti|lang=non}}. 4 | {{-pronun-}} 5 | *{{IPA|/ˈtɔldə/|lang=da}} 6 | {{da-ord-numbers}} 7 | {{-seq-num-|da}} 8 | {{pn}} (''forkortes'': 12.) 9 | # nummer [[tolv]] i rækken 10 | #* før: [[ellevte]] 11 | #* efter: [[trettende]] 12 | {{-trans-}} 13 | {{(}} 14 | * {{en}}: {{t|en|twelfth}} 15 | * {{fr}}: {{t|fr|douzième}} 16 | * {{el}}: {{t|el|δωδέκατος}} 17 | * {{it}}: {{t|it|dodicesimo}} 18 | * {{O|ku|duwanzdehemîn}} 19 | * {{nl}}: {{t|nl|twaalfde}} 20 | {{-}} 21 | * {{pt}}: {{t|pt|duodécimo}} 22 | * {{es}}: {{t|es|duodécimo}} 23 | * {{O|ru|двенадцатый}} 24 | * {{O|sv|tolfte}} 25 | * {{O|de|zwölfte}} 26 | {{)}} 27 | {{-ref-}} 28 | *{{DDO}} 29 | -------------------------------------------------------------------------------- /tests/data/pt/UTC.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Sigla== 3 | '''''UTC''''' 4 | # {{escopo|pt|estrangeirismo}} ver [[TUC]] 5 | 6 | ==Ver também== 7 | ===Na Wikipédia=== 8 | * [[w:Tempo Universal Coordenado|Artigo sobre UTC na Wikipédia]] 9 | 10 | ==Ligações externas== 11 | * [http://www.zenite.nu/menu02/hora.shtml Breves comentários sobre UTC e fusos horários] 12 | 13 | [[Categoria:Sigla (Português)]] 14 | 15 | 16 | ={{-en-}}= 17 | ==Sigla== 18 | '''UTC''' 19 | # ''Universal Time Coordinated'': [[TUC]] (Tempo Universal Coordenado) 20 | 21 | =={{etimologia|en}}== 22 | : Formado a partir de ''Universal Time'', pela adição do modificador ''Coordinated''. 23 | 24 | [[Categoria:Sigla (Inglês)]] 25 | [[Categoria:Tempo (Inglês)]] 26 | -------------------------------------------------------------------------------- /tests/data/pt/galium.wiki: -------------------------------------------------------------------------------- 1 | {{confundir|gálio|gallium}} 2 | 3 | ={{-pt-}}= 4 | {{flex.pt|ms=galium|mp=galiuns}} 5 | {{paroxítona|ga|lium}} 6 | # planta do gênero ''[[wikispecies:Galium|Galium]]''. De entre elas o [[amor-de-hortelão]], (''[[wikispecies:Galium aparine|G. aparine]]'') 7 | 8 | =={{etimologia|pt}}== 9 | : Do nome do gênero ao que pertence a planta, ''[[wikispecies:Galium|Galium]]''. Pelo grego γάλιον, (galion), (planta [[galião]], ''[[wikispecies:Galium verum|G. verum]]''), de γάλα, (gala), ([[leite]], por ser usada para [[coalhar]] o leite). 10 | 11 | ==Ver também== 12 | ===Na Wikipédia=== 13 | * [https://pt.wikipedia.org/wiki/Galium galium] 14 | 15 | [[Categoria:Substantivo (Português)]] 16 | [[Categoria:Botânica (Português)]] 17 | -------------------------------------------------------------------------------- /tests/data/fr/chacune.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|fr}} == 2 | === {{S|pronom indéfini|fr|flexion}} === 3 | {{fr-accord-ind|m=chacun|pm=ʃa.kœ̃|pf=ʃa.kyn}} 4 | {{fr-accord-un|ms=chacun|pron=ʃa.k}} 5 | '''chacune''' {{pron|ʃa.kyn|fr}} {{s}} 6 | # ''Féminin (singulier) de'' [[chacun]]. 7 | #* {{exemple | lang=fr 8 | | Lino est apparu par la porte d'entrée, en se frottant les mains l'une contre l'autre pour les réchauffer. De la buée sortait de sa bouche à '''chacune''' de ses respirations. 9 | | source=Mélisa Godet, ''Les Augustins'', éd. Jean-Claude Lattès, 2014}} 10 | 11 | === {{S|prononciation}} === 12 | * {{écouter|lang=fr|France (Brétigny-sur-Orge)||audio=LL-Q150 (fra)-Pamputt-chacune.wav}} 13 | 14 | === {{S|anagrammes}} === 15 | {{voir anagrammes|fr}} 16 | -------------------------------------------------------------------------------- /tests/data/fr/djed.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|fr}} == 2 | === {{S|étymologie}} === 3 | : {{ébauche-étym|fr}} 4 | 5 | === {{S|nom|fr}} === 6 | {{fr-rég|dʒɛd}} 7 | [[Fichier:Pilastro-djed in feience, prov. sconosciuta 01.JPG|vignette|Amulette en forme de '''djed''' conservée à Milan.]] 8 | '''djed''' {{pron|dʒɛd|fr}} {{m}} 9 | # {{lexique|archéologie|fr}} Objet sculpté et peint de l’Égypte ancienne datant de la période thinite dont le sens reste discuté. R11 10 | #* {{exemple|lang=fr}} 11 | 12 | ==== {{S|variantes}} ==== 13 | * [[zed]] (ancienne appellation) 14 | 15 | ==== {{S|traductions}} ==== 16 | {{trad-début}} 17 | * {{T|de}} : {{trad-|de|Djed-Pfeiler}} 18 | * {{T|en}} : {{trad+|en|djed}} 19 | {{trad-fin}} 20 | 21 | === {{S|voir aussi}} === 22 | * {{WP|pilier Djed}} 23 | -------------------------------------------------------------------------------- /tests/data/no/rasshol.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Substantiv=== 3 | {{nn-sub|n}} 4 | 5 | # [[anus]]; brukt som [[skjellsord]] 6 | 7 | ====Andre former==== 8 | {{andre former|rasshòl|nn=ja|nb=nei|nrm=nei}} 9 | {{andre former|rasshull|nb=ja|nrm=ja}} 10 | {{andre former|rasshøl|nb=ja|nrm=nei}} 11 | 12 | ====Uttale==== 13 | {{uttale mangler|språk=no}} 14 | {{lydfil mangler|språk=no}} 15 | 16 | ====Grammatikk==== 17 | {{nn-sub-n1}} 18 | 19 | ====Oversettelser==== 20 | {{overs-topp}} 21 | * {{overs|en|asshole}} 22 | {{overs-bunn}} 23 | 24 | ===Interjeksjon=== 25 | {{infl|no|inter}} 26 | 27 | # {{kontekst|brukt som skjellsord|språk=no}} [[utropsord]] med samme betydning som substantivet 28 | 29 | [[Kategori:no:Skjellsord]] 30 | 31 | ===Referanser=== 32 | * {{R:Bokmålsordboka-Nynorskordboka}} 33 | -------------------------------------------------------------------------------- /wikidict/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Finally descent dictionaries based on Wiktionary for your beloved eBook reader. 3 | 4 | You can always get the latest version of this module at: 5 | https://github.com/reader-dict/monolingual 6 | """ 7 | 8 | __version__ = "5.0.0" 9 | __author__ = "Mickaël Schoentgen" 10 | __copyright__ = f""" 11 | Copyright (c) 2020-2025, {__author__} 12 | 13 | Permission to use, copy, modify, and distribute this software and its 14 | documentation for any purpose and without fee or royalty is hereby 15 | granted, provided that the above copyright notice appear in all copies 16 | and that both that copyright notice and this permission notice appear 17 | in supporting documentation or portions thereof, including 18 | modifications, that you make. 19 | """ 20 | -------------------------------------------------------------------------------- /.github/workflows/quality.yml: -------------------------------------------------------------------------------- 1 | name: Quality 2 | 3 | on: 4 | - pull_request 5 | - workflow_dispatch 6 | 7 | concurrency: 8 | group: ${{ github.ref }}-${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name != 'pull_request' && github.sha || '' }} 9 | cancel-in-progress: true 10 | 11 | jobs: 12 | tests: 13 | runs-on: ubuntu-latest 14 | timeout-minutes: 3 15 | 16 | steps: 17 | - uses: actions/checkout@v6 18 | 19 | - name: Set up Python 20 | uses: actions/setup-python@v6 21 | with: 22 | python-version: "3.13" 23 | cache: pip 24 | 25 | - name: Install requirements 26 | run: python -m pip install -r requirements-tests.txt 27 | 28 | - name: Quality checks, and linters 29 | run: ./check.sh 30 | -------------------------------------------------------------------------------- /tests/data/no/bak lås og slå.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Frase=== 3 | {{no-frase}} 4 | 5 | # {{kontekst|om straffedømt|språk=no}} [[i fengsel]] 6 | #: ''Den mistenkte ble satt bak '''lås og slå'''.'' 7 | 8 | ====Oversettelser==== 9 | {{overs-topp|i fengsel}} 10 | * {{overs|da|bag tremmer}} 11 | * {{overs|en|behind bars}} 12 | * {{overs|fi|telkien takana}}, {{o|fi|kalterien takana}}, {{o|fi|vankilassa}} 13 | * {{overs|fr|derrière les barreaux}} 14 | * {{overs|it|dietro le sbarre}} 15 | {{overs-midt}} 16 | * {{overs|nl|achter de tralies}} 17 | * {{overs|ru|за решёткой|sc=Cyrl|tr=za rešótkoj}} 18 | * {{overs|es|entre rejas}} 19 | * {{overs|sv|bakom lås och bom}} 20 | * {{overs|de|hinter Gitter}} 21 | {{overs-bunn}} 22 | 23 | ===Referanser=== 24 | * {{R:Bokmålsordboka-Nynorskordboka}} 25 | * {{R:NAOB|slå}} 26 | -------------------------------------------------------------------------------- /tests/data/fr/Bogotanais.wiki: -------------------------------------------------------------------------------- 1 | {{voir|bogotanais}} 2 | 3 | == {{langue|fr}} == 4 | === {{S|étymologie}} === 5 | : Du nom [[Bogota]] avec le préfixe [[-ais]]. 6 | 7 | === {{S|nom|fr}} === 8 | {{fr-inv|bɔ.ɡɔ.ta.nɛ|sp=1}} 9 | '''Bogotanais''' {{pron|bɔ.ɡɔ.ta.nɛ|fr}} {{m}} {{équiv-pour|une femme|Bogotanaise|lang=fr}} {{sp}} 10 | # Habitant de [[Bogota]]. 11 | #* {{exemple |Tous font le plein. Notamment le week-end, lors des mariages célébrés dans l’église baroque Santa Barbara et lors du traditionnel marché aux puces, rendez-vous dominical des '''Bogotanais'''. |source=« Bogota, capitale en or », ''LePoint.fr'', 25 novembre 2011 |lang=fr}} 12 | 13 | ==== {{S|notes}} ==== 14 | : {{note-gentilé|fr}} 15 | 16 | ==== {{S|traductions}} ==== 17 | {{trad-début}} 18 | * {{T|ast}} : {{trad+|ast|bogotanu}} 19 | {{trad-fin}} 20 | -------------------------------------------------------------------------------- /wikidict/lang/no/variant_handlers.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | def render_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 5 | """ 6 | >>> render_variant("bøyingsform", ["no", "verb", "uttrykke"], defaultdict(str), "") 7 | 'uttrykke' 8 | >>> render_variant("no-adj-bøyningsform", ["b", "vis"], defaultdict(str, {"nb": "ja", "nrm": "ja", "nn": "ja"}), "") 9 | 'vis' 10 | """ 11 | return parts[-1] 12 | 13 | 14 | handlers = { 15 | **dict.fromkeys( 16 | { 17 | "bøyingsform", 18 | "bøyningsform", 19 | "no-adj-bøyningsform", 20 | "no-sub-bøyningsform", 21 | "no-verb-bøyningsform", 22 | "no-verbform av", 23 | }, 24 | render_variant, 25 | ), 26 | } 27 | -------------------------------------------------------------------------------- /wikidict/lang/ca/variant_handlers.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | def render_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 5 | """ 6 | >>> render_variant("ca-forma-conj", ["abacallanar", "1", "pres", "ind"], defaultdict(str), "abacallan") 7 | 'abacallanar' 8 | >>> render_variant("forma-f", ["ca", "-à"], defaultdict(str), "-ana") 9 | '-à' 10 | >>> render_variant("forma-p", ["ca", "-alla"], defaultdict(str), "-alles") 11 | '-alla' 12 | """ 13 | return parts[0 if "forma-conj" in tpl else -1] 14 | 15 | 16 | handlers = { 17 | **dict.fromkeys( 18 | { 19 | "ca-forma-conj", 20 | "forma-conj", 21 | "forma-f", 22 | "forma-p", 23 | }, 24 | render_variant, 25 | ), 26 | } 27 | -------------------------------------------------------------------------------- /tests/data/es/zzz.wiki: -------------------------------------------------------------------------------- 1 | == {{lengua|es}} == 2 | {{pron-graf|pron=no}} 3 | 4 | === Etimología 1 === 5 | {{etimología|ONOM}}. 6 | 7 | ==== {{onomatopeya|es}} ==== 8 | ;1: {{plm|onomatopeya}} que representa el sonido de la respiración durante el [[sueño]]. Se usa para indicar que alguien está [[dormir|dormido]]. 9 | {{ejemplo|Tan monocorde fue su discurso que muchos parlamentarios se durmieron, zzz... zzz, y los ronquidos, norrr, norrr, norrr, al final se impusieron a unos tímidos aplausos, plas, plas.|c=pagina|t=La escritura silenciosa|f=2016-03-05|u=https://web.archive.org/web/20160305040815/http://www.juanmarin.net/index.php?aid}} 10 | 11 | ==== Véase también ==== 12 | *[[como un tronco]] 13 | 14 | ==== Traducciones ==== 15 | {{trad-arriba}} 16 | {{t|en|t1=zzz}} 17 | {{trad-abajo}} 18 | 19 | == Referencias y notas == 20 | 21 | -------------------------------------------------------------------------------- /wikidict/stubs.py: -------------------------------------------------------------------------------- 1 | """Type annotations.""" 2 | 3 | from dataclasses import dataclass, field 4 | 5 | SubDefinition = str | tuple[str, ...] 6 | Definition = str | tuple[str, ...] | tuple[SubDefinition, ...] 7 | Definitions = dict[str, list[Definition]] 8 | Parts = tuple[str, ...] 9 | Variants = dict[str, list[str]] 10 | 11 | 12 | @dataclass(slots=True) 13 | class Word: 14 | pronunciations: list[str] = field(default_factory=list) 15 | genders: list[str] = field(default_factory=list) 16 | etymology: list[Definition] = field(default_factory=list) 17 | definitions: Definitions = field(default_factory=dict) 18 | variants: list[str] = field(default_factory=list) 19 | reverse_variants: list[str] = field(default_factory=list) 20 | is_variant: bool = False 21 | 22 | 23 | Words = dict[str, Word] 24 | Groups = dict[str, Words] 25 | -------------------------------------------------------------------------------- /tests/data/en/42.wiki: -------------------------------------------------------------------------------- 1 | ==Translingual== 2 | {{wikipedia|42 (number)}} 3 | 4 | ===Symbol=== 5 | {{head|mul|numeral symbol|previous|41|next|43}} 6 | 7 | # The cardinal number [[forty-two]]. 8 | 9 | ====Usage notes==== 10 | * The number 42 is often given as a joking [[answer]] to the question of the meaning of [[life]], the [[universe]], and [[everything]], and is sometimes used as an example value in [[programming]] where a [[number]] is desired, in reference to ''{{w|The Hitchhiker's Guide to the Galaxy}}''. 11 | 12 | {{see citations|mul}} 13 | 14 | ==English== 15 | {{wikipedia|Texas 42}} 16 | 17 | ===Proper noun=== 18 | {{en-proper noun}} 19 | 20 | # {{alternative form of|en|Texas 42}}. 21 | 22 | ===See also=== 23 | *{{l|en|42-gon}} 24 | 25 | ===Anagrams=== 26 | * {{anagrams|en|a=24|2/4}} 27 | 28 | [[Category:Hindu-Arabic script]] 29 | {{C|en|Dominoes}} 30 | -------------------------------------------------------------------------------- /tests/data/en/humans.wiki: -------------------------------------------------------------------------------- 1 | {{also|Humans|humāns}} 2 | ==English== 3 | 4 | ===Pronunciation=== 5 | {{rfp|en}} 6 | * {{audio|en|LL-Q1860 (eng)-Vealhurl-humans.wav|a=Southern England}} 7 | 8 | ===Noun=== 9 | {{head|en|noun form}} 10 | 11 | # {{plural of|en|human}} 12 | 13 | ===Verb=== 14 | {{head|en|verb form}} 15 | 16 | # {{infl of|en|human||s-verb-form}} 17 | 18 | ===Anagrams=== 19 | * {{anagrams|en|a=ahmnsu|Hamsun|Husman|Shuman|husman}} 20 | 21 | ==Catalan== 22 | 23 | ===Pronunciation=== 24 | * {{ca-IPA}} 25 | * {{rhymes|ca|ans|s=2}} 26 | 27 | ===Noun=== 28 | {{head|ca|noun form}} 29 | 30 | # {{plural of|ca|humà}} 31 | 32 | ==Latin== 33 | 34 | ===Etymology=== 35 | Present active participle of {{m|la|humō||bury}} 36 | 37 | ===Participle=== 38 | {{la-part|humāns}} 39 | 40 | # [[burying]] 41 | 42 | ====Declension==== 43 | {{la-adecl|humāns<3-P+>}} 44 | -------------------------------------------------------------------------------- /tests/data/it/debolmente.wiki: -------------------------------------------------------------------------------- 1 | == {{-it-}} == 2 | {{-avv-|it}} 3 | {{Pn}} 4 | # in maniera [[debole]], con [[debolezza]] 5 | 6 | {{-sill-}} 7 | ; de | bol | mén | te 8 | 9 | {{-pron-}} 10 | {{IPA|/debolˈmente/}} 11 | 12 | {{-etim-}} 13 | composto dall'[[aggettivo]] [[debole]] e dal [[suffisso]] [[-mente]] 14 | 15 | {{-sin-}} 16 | * [[fragilmente]], [[fiaccamente]], [[mollemente]] 17 | * [[lievemente]], [[scarsamente]] 18 | * [[stancamente]] 19 | 20 | {{-ant-}} 21 | * [[vigorosamente]], [[energicamente]] 22 | 23 | {{-trad-}} 24 | {{Trad1|}} 25 | :*{{fr}}: 26 | :*{{en}}: [[weakly]] 27 | :* {{la}}: [[ieiune]], [[infirme]], [[molliter]], [[tenuiter]] 28 | :*{{es}}: 29 | :*{{ru}}: [[слабо]] 30 | :*{{de}}: 31 | {{Trad2}} 32 | 33 | {{-ref-}} 34 | * {{Fonte|dizit}} 35 | * {{Fonte|hoep}} 36 | * {{Fonte|sin-co}} 37 | 38 | -------------------------------------------------------------------------------- /tests/data/pt/paulista.wiki: -------------------------------------------------------------------------------- 1 | {{confundir|paulistano}} 2 | 3 | ={{-pt-}}= 4 | ==Adjetivo== 5 | {{flex.pt|s=paulista|p=paulistas}} 6 | {{paroxítona|pau|lis|ta}}, {{c2g}} 7 | # diz-se de [[pessoa]] de origem do Estado de São Paulo, Brasil 8 | # diz-se de artigo ou objeto do Estado de São Paulo 9 | 10 | ==Substantivo== 11 | {{paroxítona|pau|lis|ta}}, {{c2g}} 12 | # [[pessoa]] de origem do Estado de São Paulo, Brasil 13 | # artigo ou objeto do Estado de São Paulo 14 | 15 | =={{Pronúncia|pt}}== 16 | ===Brasil=== 17 | ====Paulistana e Caipira==== 18 | * [[AFI]]: {{IPA|/paw.ˈlis.tə/|id=pt}} 19 | * [[SAMPA]]: /paw."lis.ta/ 20 | 21 | ==Ver também== 22 | ===No Wikcionário=== 23 | * [[paulistano]] 24 | 25 | ===Na Wikipédia=== 26 | * [[w:São Paulo|São Paulo]] 27 | 28 | [[Categoria:Adjetivo (Português)]] 29 | [[Categoria:Substantivo (Português)]] 30 | [[Categoria:Gentílico (Português)]] 31 | -------------------------------------------------------------------------------- /tests/data/pt/etc.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Abreviatura== 3 | {{dica de uso|1='''etc''' equivale a "e outras coisas.", logo, não se deve utilizar a conjunção e ou vírgula antes de etc. 4 | *Exemplo: 5 | **correto: amor, felicidade etc. 6 | **errado: amor, felicidade, e etc. 7 | 8 | 9 | Além disso, não deve ser usado para dizer "e outras pessoas". Para esses casos, usa-se [[et al.]], abreviatura de "et alii" ("e outras pessoas", em latim) 10 | }} 11 | '''etc''' 12 | # [[abreviação]] do latim ''et cetera'', que significa "e outros", "e os restantes" e "e outras coisas mais" 13 | 14 | ==={{-varort-}}=== 15 | * [[&c]] 16 | 17 | ===Tradução=== 18 | {{tradini}} 19 | * {{trad|de|usw.|etc.}} 20 | {{tradmeio}} 21 | * {{trad|en|etc.}} 22 | {{tradfim}} 23 | 24 | [[Categoria:Abreviatura (Português)]] 25 | -------------------------------------------------------------------------------- /tests/data/ca/hivernacle.wiki: -------------------------------------------------------------------------------- 1 | == {{-ca-}} == 2 | {{ca-pron}} 3 | {{-etimologia-}} {{Del-lang|la|ca|hībernāculum}}, de {{m|la|hībernō|t=hivernar}}. 4 | 5 | === Nom === 6 | {{ca-nom|m}} 7 | 8 | # Cobert per a protegir plantes del vent o del fred extrem. 9 | #: {{ex-cit|ca|—Segur que la teva família no... voldrà fer servir aquest lloc? —No, ningú ve a seure a l''''hivernacle''' als matins. —D'acord. |ref=Alice Oseman (trad. Lluís Delgado), ''Heartstopper 2: La meva persona favorita'' (pàg. 340), jul. 2020.}} 10 | 11 | {{-comp-}} 12 | * [[efecte hivernacle]] 13 | 14 | {{-trad-}} 15 | {{t-inici}} 16 | * {{en}}: {{trad|en|conservatory}} 17 | * {{es}}: {{trad|es|invernadero}} 18 | * {{fr}}: {{trad|fr|serre}} 19 | * {{it}}: {{trad|it|serra}} 20 | {{t-final}} 21 | 22 | === Miscel·lània === 23 | * {{ca-sil}} 24 | 25 | === Vegeu també === 26 | * {{Viquipèdia}} 27 | * {{ca-dicc|diec|gdlc|dnv|dcvb}} 28 | -------------------------------------------------------------------------------- /tests/data/de/kartel.wiki: -------------------------------------------------------------------------------- 1 | == kartel ({{Sprache|Deutsch}}) == 2 | === {{Wortart|Konjugierte Form|Deutsch}} === 3 | 4 | {{Nebenformen}} 5 | :''2. Person Singular Imperativ Präsens Aktiv:'' [[kartle]] 6 | :''1. Person Singular Indikativ Präsens Aktiv:'' [[kartle]] 7 | :''2. Person Singular Imperativ Präsens Aktiv:'' [[kartele]] 8 | :''1. Person Singular Indikativ Präsens Aktiv:'' [[kartele]] 9 | 10 | {{Worttrennung}} 11 | :kar·tel 12 | 13 | {{Aussprache}} 14 | :{{IPA}} {{Lautschrift|ˈkaʁtl̩}} 15 | :{{Hörbeispiele}} {{Audio|De-kartel.ogg}} 16 | :{{Reime}} {{Reim|aʁtl̩|Deutsch}} 17 | 18 | {{Grammatische Merkmale}} 19 | *2. Person Singular Imperativ Präsens Aktiv des Verbs '''[[karteln]]''' 20 | *1. Person Singular Indikativ Präsens Aktiv des Verbs '''[[karteln]]''' 21 | 22 | {{Grundformverweis Konj|karteln}} 23 | 24 | {{Ähnlichkeiten 1|Anagramme=[[erkalt]], [[kalter]], [[kartle]], [[rektal]]}} 25 | -------------------------------------------------------------------------------- /wikidict/lang/en/template_overrides.py: -------------------------------------------------------------------------------- 1 | def code(kind: str, value: str) -> str: 2 | """ 3 | >>> code("html", "") 4 | '' 5 | >>> code("html", "") 6 | '</span>' 7 | 8 | >>> code("js", "(65535).toString(16) === 'ffff'") 9 | "(65535).toString(16) === 'ffff'" 10 | >>> code("js", "=(65535).toString(16) === 'ffff'") 11 | "(65535).toString(16) === 'ffff'" 12 | >>> code("js", "==(65535).toString(16) === 'ffff'") 13 | "=(65535).toString(16) === 'ffff'" 14 | """ 15 | from html import escape 16 | 17 | if not value: 18 | return "" 19 | if value[0] == "=": 20 | value = value[1:] 21 | if kind == "html": 22 | value = escape(value) 23 | return f"{value}" 24 | 25 | 26 | overrides = { 27 | "code": lambda args: code(args[1], args[2]), 28 | } 29 | -------------------------------------------------------------------------------- /tests/data/no/slå to fluer i en smekk.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Idiom=== 3 | {{no-verb|tittel=[[slå]] [[to]] [[fluer]] [[i]] [[en]] [[smekk]]}} 4 | 5 | #{{idiomatisk|no}} få [[gjøre|gjort]] to ting med én [[handling]] 6 | 7 | ====Andre former==== 8 | {{andre former|slå to fluer i én smekk|nn=nei|nb=ja|nrm=ja}} 9 | {{andre former|slå to fluer i ett smekk|nn=nei|nb=ja|nrm=ja}} 10 | {{andre former|slå to fluger i ein smekk|nn=ja|nb=nei|nrm=nei}} 11 | {{andre former|slå to fluger i eitt smekk|nn=ja|nb=nei|nrm=nei}} 12 | 13 | ====Oversettelser==== 14 | {{overs-topp|to gjerninger i én}} 15 | *{{overs|da|slå to fluer med ét smæk}} 16 | *{{overs|en|kill two birds with one stone}} 17 | *{{overs|nl|twee vliegen in een klap slaan}} 18 | {{overs-midt}} 19 | *{{overs|sv|slå två flugor i en smäll}} 20 | *{{overs|de|zwei Fliegen mit einer Klappe schlagen}} 21 | {{overs-bunn}} 22 | 23 | [[Kategori:Idiom med dyr i norsk]] 24 | -------------------------------------------------------------------------------- /tests/data/no/tolvte.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Tallord=== 3 | {{no-tall}} 4 | 5 | # ordenstallet til [[tolv]] 6 | 7 | ====Etymologi==== 8 | Fra {{etyl|non|no}} {{term|tolfti|språk=non}}; {{suffiks|tolv|te|språk=no}} 9 | 10 | ====Oversettelser==== 11 | {{overs-topp|Ordenstall}} 12 | *{{overs|da|tolvte}} 13 | *{{overs|en|twelfth}} 14 | *{{overs|fr|douzième|m|f}} 15 | *{{overs|it|dodicesimo|m}}, {{o|it|dodicesima|f}} 16 | *{{overs|nl|twaalfde}} 17 | {{overs-midt}} 18 | *{{overs|pt|duodécimo|m}}, {{o|pt|duodécima|f}} 19 | *{{overs|es|duodécimo|m}}, {{o|es|duodécima|f}} 20 | *{{overs|sv|tolfte}} 21 | *{{overs|de|zwölfte}} 22 | {{overs-bunn}} 23 | 24 | ===Referanser=== 25 | *{{R:NAOB}} 26 | *{{R:Bokmålsordboka-Nynorskordboka}} 27 | 28 | ==Dansk== 29 | ===Tallord=== 30 | {{da-tall}} 31 | 32 | #[[#Norsk|tolvte]] 33 | 34 | ====Etymologi==== 35 | Fra {{etyl|non|da}} {{term|tolfti|språk=non}}; {{suffiks|tolv|te|språk=da}} 36 | -------------------------------------------------------------------------------- /tests/data/no/lumpen.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Adjektiv=== 3 | {{no-adj}} 4 | 5 | # tarvelig, nedrig 6 | 7 | ====Etymologi==== 8 | {{etymologi mangler|språk=no}} 9 | 10 | ====Uttale==== 11 | {{uttale mangler|språk=no}} 12 | {{lydfil mangler|språk=no}} 13 | 14 | ====Grammatikk==== 15 | {{no-adj-a5-a4|lump}} 16 | {{ordbank|NY}} 17 | 18 | ====Oversettelser==== 19 | {{overs-topp|sjofel}} 20 | * {{overs|da|lumpen}} 21 | * {{overs-mangler|en}} 22 | * {{overs-mangler|fi}} 23 | * {{overs-mangler|fr}} 24 | * {{overs-mangler|it}} 25 | {{overs-midt}} 26 | * {{overs-mangler|nl}} 27 | * {{overs-mangler|ru}} 28 | * {{overs-mangler|es}} 29 | * {{overs|sv|lumpen}} 30 | * {{overs-mangler|de}} 31 | {{overs-bunn}} 32 | 33 | ===Referanser=== 34 | * {{R:NAOB}} 35 | * {{R:Bokmålsordboka-Nynorskordboka}} 36 | 37 | ===Substantiv=== 38 | '''{{PAGENAME}}''' {{norm|nb=ja|nrm=ja|nn=ja}} 39 | 40 | #{{no-sub-bøyningsform|be|lump}} {{norm|nb=ja|nrm=ja|nn=ja}} 41 | -------------------------------------------------------------------------------- /tests/data/de/trage.wiki: -------------------------------------------------------------------------------- 1 | {{Siehe auch|[[Trage]], [[träge]]}} 2 | 3 | == trage ({{Sprache|Deutsch}}) == 4 | === {{Wortart|Konjugierte Form|Deutsch}} === 5 | 6 | {{Nebenformen}} 7 | :''Imperativ Singular:'' [[trag]] 8 | 9 | {{Worttrennung}} 10 | :tra·ge 11 | 12 | {{Aussprache}} 13 | :{{IPA}} {{Lautschrift|ˈtʁaːɡə}} 14 | :{{Hörbeispiele}} {{Audio|De-trage.ogg}} 15 | :{{Reime}} {{Reim|aːɡə|Deutsch}} 16 | 17 | {{Grammatische Merkmale}} 18 | * 1. Person Singular Indikativ Präsens Aktiv des Verbs '''[[tragen]]''' 19 | * 1. Person Singular Konjunktiv I Präsens Aktiv des Verbs '''[[tragen]]''' 20 | * 3. Person Singular Konjunktiv I Präsens Aktiv des Verbs '''[[tragen]]''' 21 | * 2. Person Singular Imperativ Präsens Aktiv des Verbs '''[[tragen]]''' 22 | 23 | {{Grundformverweis Konj|tragen}} 24 | 25 | {{Ähnlichkeiten 1|[[trabe]], [[träge]]|Anagramme=[[Egart]], [[Ertag]], [[garet]], [[garte]], [[gerat]], [[Grate]], [[raget]], [[ragte]], [[Terga]]}} 26 | -------------------------------------------------------------------------------- /tests/data/da/søm.wiki: -------------------------------------------------------------------------------- 1 | {{=da=}} 2 | ===Etymologi 1=== 3 | Fra oldnordisk [[saumr]], fra [[sýja]] (''at sy''). 4 | ====Substantiv==== 5 | {{infl|da|noun|g=c}} 6 | # [[sammensyning]] 7 | {{-decl-}} 8 | {{da-noun-infl|stem=sømm|en|e}} 9 | {{-trans-}} 10 | {{top}} 11 | * {{en}}: {{t|en|seam}} 12 | * {{nl}}: {{t|nl|zoom}} 13 | {{midt}} 14 | * {{sv}}: {{t|sv|söm}} 15 | *{{O|de|Saum|m}} 16 | {{bund}} 17 | 18 | ===Etymologi 2=== 19 | Fra oldnordisk [[saumr]] {{m}}. 20 | 21 | ====Substantiv==== 22 | {{infl|da|noun|g=n}} 23 | # [[spids]] [[metal]]pind med et [[hoved]], beregnet til at [[sammenføje]] træstykker til [[hinanden]] 24 | {{-rel-}} 25 | * [[partisansøm]] 26 | {{-decl-}} 27 | {{da-noun-infl|stem=sømm|et}} 28 | {{-trans-}} 29 | {{top}} 30 | *{{O|en|nail}} 31 | *{{O|fr|clou|m}} 32 | *{{O|nl|spijker}} 33 | {{midt}} 34 | * {{sv}}: {{t|sv|spik|c}}, {{t|sv|söm|c}} 35 | *{{O|cs|hřebík|m}} 36 | *{{O|de|Nagel|m}} 37 | {{bund}} 38 | {{-ref-}} 39 | *{{DDO}} 40 | -------------------------------------------------------------------------------- /scripts/da-langs.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from scripts_utils import get_content 4 | 5 | # Primary 6 | data = get_content("https://da.wiktionary.org/wiki/Modul:lang/data?action=raw") 7 | pattern = re.compile(r'data\["([^"]+)"\]\s+=\s+\{\s+name\s+=\s+"([^"]+)",') 8 | langs = re.findall(pattern, data) 9 | 10 | # Missing langs 11 | langs.append(("enm", "middelengelsk")) 12 | langs.append(("otk", "oldtyrkisk")) 13 | langs.append(("syr", "assyrisk")) 14 | 15 | # Aliases 16 | data = get_content("https://da.wiktionary.org/wiki/Bruger:PolyBot~dawiktionary/Languages?action=raw") 17 | pattern = re.compile(r"^\| (\w+)\|\|\|\|(\w+)\|\|", flags=re.MULTILINE) 18 | langs.extend(re.findall(pattern, data)) 19 | 20 | known_langs: set[str] = set() 21 | print("langs = {") 22 | for key, name in sorted(langs): 23 | if key in known_langs: 24 | continue 25 | known_langs.add(key) 26 | print(f' "{key}": "{name.lower()}",') 27 | print(f"}} # {len(langs):,}") 28 | -------------------------------------------------------------------------------- /tests/data/ca/pen.wiki: -------------------------------------------------------------------------------- 1 | {{vegeu|PEN}} 2 | 3 | == {{-ca-}} == 4 | 5 | === Verb === 6 | {{ca-verb-forma|penar}} 7 | 8 | # {{marca-nocat|ca|balear|alguerès}} {{ca-forma-conj|penar|1|pres|ind}} 9 | 10 | {{-var-}} 11 | * [[peno]], [[pene]], [[peni]] 12 | 13 | === Miscel·lània === 14 | * {{ca-sil}} 15 | 16 | == {{-en-}} == 17 | [[Fitxer:Ballpoint-pen-parts.jpg|miniatura|right|Pen (bolígraf)]] 18 | {{pronafi|en|/pɛn/}} {{àudio simple|En-us-pen.ogg|àudio (EUA)}} 19 | 20 | === Nom === 21 | {{en-nom}} 22 | 23 | # [[ploma]] 24 | # [[bolígraf]] 25 | # Àrea encerclada utilitzada per a contenir animals domèstics, especialment [[bestiar]]; [[corral]]. 26 | 27 | == {{-rmq-x-ca-}} == 28 | {{rmq-x-ca-pron}} 29 | 30 | === Verb === 31 | {{entrada|rmq-x-ca|verb-forma}} 32 | 33 | # {{forma-conj|rmq-x-ca|penar|forma obliqua|imp}} 34 | 35 | == {{-nl-}} == 36 | {{pronafi|nl|/pɛn/}} 37 | 38 | === Nom === 39 | {{entrada|nl|nom|plural|pennen}} 40 | 41 | # [[bolígraf]] 42 | -------------------------------------------------------------------------------- /tests/data/fr/5E.wiki: -------------------------------------------------------------------------------- 1 | {{voir|5e}} 2 | 3 | == {{langue|conv}} == 4 | 5 | === {{S|symbole|conv}} === 6 | * Code {{lien|AITA|fr}} de la compagnie d’aviation {{lien|SGA Airlines|fr}} ''({{lang|en|Siam General Aviation Company Limited}}'', {{lang|th|บริษัท สยาม เจนเนอรัล เอวิเอชั่น จำกัด}}). 7 | 8 | == {{langue|en}} == 9 | 10 | === {{S|étymologie}} === 11 | : {{laé|en|nom}} Abréviation de ''5° {{lien|East|en}}''. 12 | : {{laé|en|nom propre}} {{date|2014}} Abréviation de ''{{lien|fifth|en}} {{lien|edition|en}}'' (« cinquième édition »). 13 | 14 | === {{S|nom|en}} === 15 | '''5E''' {{pron||en}} {{sigle|en}} 16 | # {{lexique|géographie|en}} {{lien|méridien|fr|dif=Méridien}} situé à {{unité|5|[[°]]}} de {{lien|longitude|fr}} {{lien|est|fr|nom}}. 17 | #* {{exemple|lang=en}} 18 | 19 | === {{S|nom propre|en}} === 20 | '''5E''' {{pron|ˈfaɪv ˈiː|en}} {{sigle|en}} 21 | # {{lexique|jeux de rôle|en}} Cinquième édition du jeu de rôle sur table ''{{w|Donjons et Dragons}}''. 22 | -------------------------------------------------------------------------------- /tests/data/pt/dezassete.wiki: -------------------------------------------------------------------------------- 1 | {{grafiaPtpt|dezessete}} 2 | 3 | ={{-pt-}}= 4 | ==Numeral== 5 | {{paroxítona|de|zas|se|te}}, {{gramática|card}} 6 | # vide [[dezessete]] 7 | 8 | ==Substantivo== 9 | {{flex.pt|ms=dezassete|mp=dezassetes}} 10 | {{paroxítona|de|zas|se|te}}, {{gramática|m}} 11 | # o [[número]] dezassete (17, XVII) 12 | # nota correspondente a dezassete valores 13 | #* ''Mãe, tive '''dezassete''' valores no teste de Língua Portuguesa.'' 14 | # pessoa ou [[coisa]] que apresenta o número dezassete numa ordenação 15 | #* ''Olha, o '''dezassete''' não veio hoje...'' 16 | 17 | =={{etimologia|pt}}== 18 | : Contração do latim vulgar ''[[decem#Latim|decem]]'' + ''[[ac]]'' + ''[[septem]]''. 19 | 20 | =={{pronúncia|pt}}== 21 | ===Portugal=== 22 | * AFI: {{AFI|/dɨ.zɐ.ˈsɛ.tɨ/}} 23 | 24 | ==Ver também== 25 | ===Na Wikipédia=== 26 | * {{interwiki|w|pt|dezessete}} 27 | 28 | [[Categoria:Substantivo (Português)]] 29 | [[Categoria:Numeral cardinal (Português)|# 000017]] 30 | -------------------------------------------------------------------------------- /tests/data/no/-bar.wiki: -------------------------------------------------------------------------------- 1 | {{se også|bar|bär|Bär}} 2 | ==Norsk== 3 | ===Suffiks=== 4 | {{no-suff}} 5 | 6 | # [[suffiks]] som lager [[adjektiv]] av [[substantiv]] (''[[fruktbar]]''), [[verb]] (''[[sammenlignbar]]'') og [[adjektiv]] (''[[åpenbar]]'') 7 | 8 | ====Etymologi==== 9 | Fra {{opphav|nedertysk}}, egentlig «bærende» 10 | 11 | ====Se også==== 12 | *[[:Kategori:Ord i norsk dannet av suffikset «-bar»]] 13 | 14 | ====Oversettelser==== 15 | {{overs-topp|suffisk som lager adjektiv av substantiv, verb eller adjektiv}} 16 | *{{overs|en|-able}} 17 | *{{overs|fr|-able}} 18 | {{overs-midt}} 19 | *{{overs|sv|-bar}} 20 | *{{overs|de|-bar}} 21 | {{overs-bunn}} 22 | 23 | ==Tysk== 24 | ===Suffiks=== 25 | {{infl|de|suffiks}} 26 | 27 | # [[suffiks]] som lager [[adjektiv]] av [[substantiv]] - som ''[[fruchtbar]]'' («fruktbar») - og [[verb]] - som ''[[vergleichbar]]'' («sammenlignbar» ) 28 | 29 | ====Se også==== 30 | *[[:Kategori:Ord i tysk som er dannet av suffikset «-bar»]] 31 | -------------------------------------------------------------------------------- /tests/data/it/brillantino.wiki: -------------------------------------------------------------------------------- 1 | =={{-it-}}== 2 | {{-sost-|it}} 3 | {{Pn}} ''m'' {{Linkp|brillantini}} 4 | # piccolo [[foglietto]] di materiale [[lucido]] e [[riflettente]] usato come [[ornamento]] per abiti 5 | # {{Est}} [[glitter]] 6 | 7 | {{-sill-}} 8 | ; bril | lan | tì | no 9 | 10 | {{-etim-}} 11 | da [[brillare]] 12 | 13 | {{-verb form-}} 14 | {{Pn}} 15 | # terza persona plurale del congiuntivo presente di [[brillantare]] 16 | # terza persona plurale dell'imperativo di [[brillantare]] 17 | 18 | {{-etim-}} 19 | {{Etim-link|brillantare}} 20 | 21 | {{-trad-}} 22 | {{Trad1|piccolo foglietto di materiale lucido}} 23 | :* {{en}}: 24 | {{Trad2}} 25 | 26 | {{Trad1|glitter}} 27 | :* {{en}}: [[glitter]] 28 | {{Trad2}} 29 | 30 | {{Trad1|terza persona plurale del congiuntivo presente di brillantare}} 31 | :* {{en}}: 32 | {{Trad2}} 33 | 34 | {{Trad1|terza persona plurale dell'imperativo di brillantare}} 35 | :* {{en}}: 36 | {{Trad2}} 37 | 38 | {{-ref-}} 39 | * {{Fonte|hoep}} 40 | -------------------------------------------------------------------------------- /tests/data/pt/algo.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Advérbio== 3 | {{paroxítona|al|go}}, {{datação|897|pt}} 4 | # um [[pouco]], de [[certo]] [[modo]] 5 | #* ''Isto me parece '''algo''' estranho.'' 6 | 7 | ==Pronome== 8 | {{paroxítona|al|go}}, ''pronome indefinido substantivo'' 9 | # [[objeto]] (não-identificado) de que se fala 10 | #* '''''Algo''' assim eu nunca vi.'' 11 | 12 | ===Tradução=== 13 | {{tradini}} 14 | * {{trad|de|ein wenig|etwas}} 15 | * {{trad|fr|quelque}} 16 | * {{trad|en|something|somewhat}} 17 | {{tradmeio}} 18 | * {{trad|sv|något}} 19 | * {{trad|ur|کچھ}} 20 | {{tradfim}} 21 | 22 | ===Verbetes derivados=== 23 | * [[nalgo]] 24 | 25 | =={{pronúncia|pt}}== 26 | ===Brasil=== 27 | ====Paulistana e Caipira==== 28 | * [[AFI]]: /ˈaw.gu/ 29 | * [[SAMPA]]: /"aw.gu/ 30 | 31 | ===Portugal=== 32 | * AFI: {{AFI|/ˈaɫ.ɡu/}} 33 | 34 | ==Anagramas== 35 | # [[galo]] 36 | # [[gola]] 37 | # [[lago]] 38 | 39 | [[Categoria:Advérbio (Português)]] 40 | [[Categoria:Pronome (Português)]] 41 | -------------------------------------------------------------------------------- /tests/data/es/uni-.wiki: -------------------------------------------------------------------------------- 1 | == {{lengua|es}} == 2 | {{pron-graf|p1=unir|palt1=uní}} 3 | 4 | === Etimología 1 === 5 | {{etimología|la|uni-}}, {{etim|la|unus}}. 6 | 7 | ==== {{prefijo|es}} ==== 8 | ;1: {{impropia|{{plm|elemento compositivo}} que significa}} uno. un [[único]], relativo a [[uno]] [[solo]]. 9 | {{ejemplo|[[unifamiliar]], [[unidimensional]], [[uniforme]].}} 10 | {{sinónimo|mono-|nota1=griego}} 11 | {{antónimo|multi-|pluri-|poli-|nota3=griego}} 12 | {{relacionado|bi-|di-|tri-|cuatri-|cuadri-|tetra-|quinque-|penta-|hexa-|hepta-|octa-|octo-|nona-|deca-}} (prefijos de número o cantidad) 13 | 14 | ==== Véase también ==== 15 | * [[:Categoría:ES:Palabras con el prefijo uni-|Palabras con el prefijo ''uni-'' en ''Wikcionario'']] 16 | {{w|Prefijos del español}} 17 | 18 | ==== Traducciones ==== 19 | {{nota traducción afijo|pre}} 20 | {{trad-arriba}} 21 | {{t|en|a1=1|t1=uni-}} 22 | {{t|it|a1=1|t1=uni-}} 23 | {{trad-abajo}} 24 | 25 | == Referencias y notas == 26 | 27 | -------------------------------------------------------------------------------- /wikidict/caches/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | from gzip import compress, decompress 3 | from pathlib import Path 4 | 5 | CACHE_PATH = Path(__file__).parent 6 | 7 | 8 | def load_cache_file(kind: str) -> dict[str, str]: 9 | file = CACHE_PATH / f"{kind}.gz" 10 | contents: dict[str, str] = json.loads(decompress(file.read_bytes())) 11 | return contents 12 | 13 | 14 | def expand_cache_file(kind: str, values: dict[str, str]) -> None: 15 | contents = load_cache_file(kind) 16 | contents |= values 17 | save_cache_file(kind, contents) 18 | 19 | 20 | def save_cache_file(kind: str, contents: dict[str, str]) -> None: 21 | file = CACHE_PATH / f"{kind}.gz" 22 | file.write_bytes( 23 | compress( 24 | json.dumps( 25 | contents, 26 | check_circular=False, 27 | ensure_ascii=False, 28 | indent=0, 29 | sort_keys=True, 30 | ).encode() 31 | ) 32 | ) 33 | -------------------------------------------------------------------------------- /tests/data/eo/alkazabo.wiki: -------------------------------------------------------------------------------- 1 | {{Vorto de la semajno|45|2011}}{{Vorto de la semajno|45|2012}}{{Vorto de la semajno|45|2013}} 2 | 3 | =={{Lingvo|eo}}== 4 | 5 | ==={{Vortospeco|substantivo|eo}}=== 6 | {{Deklinacio-eo}} 7 | {{bildodek|Almeria Alcazaba (fcm).jpg|'''alkazabo''' de Almería}} 8 | 9 | {{Deveno}} 10 | : el la [[andalus-araba]] ''[[alqaṣába]]'', kaj tiu ĉi el la klasika [[araba]] ''[[qaṣabah]]'', [[قصبة]] 11 | 12 | ===={{Signifoj}}==== 13 | # {{k|eo|F: historio|arkitekturo|militado}} fortikita konstruaĵaro; [[citadelo]] [[aŭ]] [[palaco]] [[de]] [[araba]] [[ĉefo]] en [[Nord-Afriko]] kaj [[Suda-Hispanio]] [http://vortaro.net/#Alkazabo Alkazabo en vortaro.net] 14 | 15 | ===={{Tradukoj}}==== 16 | {{trad-eko}} 17 | * franca: {{t|fr|alcazaba|f}}, {{t|fr|citadelle|f}}, casbah 18 | * germana: {{t|de|Alcazaba|f}}, {{t|de|Festung|f}} 19 | {{trad-mezo}} 20 | * hispana: {{t|es|alcazaba|f}} 21 | * itala: {{t|it|alcazaba|f}}, {{t|it|cittadella|f}} 22 | {{trad-fino}} 23 | 24 | {{Referencoj}} 25 | -------------------------------------------------------------------------------- /wikidict/lang/it/variant_handlers.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | def render_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 5 | """ 6 | >>> render_variant("flexion", ["foo"], defaultdict(str), "") 7 | 'foo' 8 | >>> render_variant("flexion", ["salumiere#Sostantivo", "salumiere"], defaultdict(str), "") 9 | 'salumiere' 10 | 11 | >>> render_variant("tabs", ["muratore", "muratori", "muratrice", "muratore"], defaultdict(str, {"f2": "muratora", "fp2": "muratrici"}), "") 12 | 'muratore' 13 | >>> render_variant("Tabs", [], defaultdict(str, {"f": "tradotta", "m": "tradotto", "mp": "tradotti", "fp": "tradotte"}), "") 14 | 'tradotto' 15 | """ 16 | return parts[-1] if tpl == "flexion" else data["m"] or parts[0] 17 | 18 | 19 | handlers = { 20 | **dict.fromkeys( 21 | { 22 | "flexion", 23 | "tabs", 24 | "Tabs", 25 | }, 26 | render_variant, 27 | ), 28 | } 29 | -------------------------------------------------------------------------------- /wikidict/lang/ro/variant_handlers.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | def render_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 5 | """ 6 | >>> render_variant("adj form of", ["ro", "frumos", "", "m", "p"], defaultdict(str), "") 7 | 'frumos' 8 | >>> render_variant("forma de vocativ singular pentru", ["a", "word"], defaultdict(str), "") 9 | 'word' 10 | """ 11 | return parts[1] if "adj form of" in tpl else parts[-1] 12 | 13 | 14 | def render_reverse_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 15 | """ 16 | >>> render_reverse_variant("rev-flexion", ["pietrele"], defaultdict(str), "piatră") 17 | 'pietrele' 18 | """ 19 | return parts[0] 20 | 21 | 22 | handlers = { 23 | **dict.fromkeys( 24 | { 25 | "adj form of", 26 | "flexion", 27 | }, 28 | render_variant, 29 | ), 30 | "rev-flexion": render_reverse_variant, 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_5_gen_dict.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | from wikidict import context, gen_dict 7 | 8 | 9 | @pytest.fixture(scope="module", autouse=True) 10 | def setup_lua_ctx() -> None: 11 | with patch.dict("os.environ", {"CWD": str(Path(context.__file__).parent.parent)}): 12 | assert context.reset("fr") 13 | 14 | 15 | @pytest.mark.webtest 16 | @pytest.mark.parametrize( 17 | "locale, words", 18 | [ 19 | ("fr", "logiciel"), # Single word 20 | ("fr", "base,logiciel"), # Multiple words 21 | ("fr", "cercle unité"), # Accentued word + space 22 | ("fr:fr", "logiciel"), # Sublang falsy 23 | ("fr:it", "glielo"), # Another lang 24 | ], 25 | ) 26 | def test_gen_dict(locale: str, words: str, tmp_path: Path) -> None: 27 | for format in ["dictfile", "df", "dictorg", "kobo", "dicthtml", "kindle", "mobi", "stardict"]: 28 | assert gen_dict.main(locale, words, tmp_path, format=format) == 0 29 | -------------------------------------------------------------------------------- /tests/data/es/extenuado.wiki: -------------------------------------------------------------------------------- 1 | == {{lengua|es}} == 2 | {{pron-graf}} 3 | 4 | === Etimología 1 === 5 | {{etimología}}. 6 | 7 | ==== {{adjetivo|es}} ==== 8 | {{es.adj}} 9 | ;1: {{plm|cansado}}, [[debilitado]]. 10 | {{ejemplo|La naturaleza parecía suspirar y rendirse ''extenuada''.|título=La Última Niebla|capítulo=La Historia de María Griselda|fecha=1987|editorial=Revista VEA|páginas=81|c=libro|a=María Luisa Bombal}} 11 | ;2: Se dice de un individuo: sin energía, debido a un gran esfuerzo físico o mental. 12 | {{ejemplo|—Ana María, más vale no seguir el viaje. Los caballos están ''extenuados''. El coche no tiene faroles. Esperemos que amanezca.|título=La Última Niebla/La Amortajada|capítulo=La Amortajada|fecha=2012|editorial=Planeta|páginas=145|c=libro|a=María Luisa Bombal}} 13 | 14 | ==== Traducciones ==== 15 | {{trad-arriba}} 16 | {{trad-abajo}} 17 | 18 | === Forma flexiva === 19 | ==== Forma verbal ==== 20 | ;1: {{participio|extenuar}} (debilitar o debilitarse). 21 | 22 | == Referencias y notas == 23 | 24 | -------------------------------------------------------------------------------- /scripts/all-namespaces.py: -------------------------------------------------------------------------------- 1 | from scripts_utils import get_content 2 | 3 | url = "https://{0}.wiktionary.org/w/api.php?action=query&meta=siteinfo&siprop={1}&format=json" 4 | 5 | # https://en.wiktionary.org/wiki/Wiktionary:Namespace 6 | ids = {6, 14} # File, and Category 7 | 8 | results: dict[str, list[str]] = {} 9 | # XXX_LOCALES 10 | locales = ("ca", "da", "de", "el", "en", "eo", "es", "fr", "it", "ja", "no", "pt", "ro", "ru", "sv", "zh") 11 | 12 | for locale in locales: 13 | result_discard_last: list[str] = [] 14 | for kind in ("namespaces", "namespacealiases"): 15 | json = get_content(url.format(locale, kind), as_json=True) 16 | data = json["query"][kind] 17 | if kind == "namespaces": 18 | result_discard_last.extend(data[str(id_)]["*"] for id_ in ids) 19 | else: 20 | result_discard_last.extend(namespace["*"] for namespace in data if namespace["id"] in ids) 21 | results[locale] = sorted(result_discard_last) 22 | 23 | print("namespaces =", end=" ") 24 | print(results) 25 | -------------------------------------------------------------------------------- /tests/data/fr/-aux.wiki: -------------------------------------------------------------------------------- 1 | {{voir|aux}} 2 | 3 | == {{langue|fr}} == 4 | === {{S|étymologie}} === 5 | : Ayant dans le passé la forme « -als », au cours du XII{{e}} siècle, le « l » précédant une autre consonne se modifia en « u », comme dans « colp – coup, altre – autre ». Étant suivi d'une consonne uniquement au pluriel, la terminaison « -als » pris la forme de « aus ». Le « x » provient des manuscrits, qui étaient extrêmement chers à l'époque, il va de soi qu'on voulut y mettre le plus de texte possible. S'inspirant du latin où « us » s'écrivait « x », on obtint ainsi la forme « -ax ». Le « u » vient s'ajouter plus tard pour s'accorder à la prononciation [o]. 6 | 7 | === {{S|suffixe|fr|flexion}} === 8 | '''-aux''' {{pron|o|fr}} 9 | # ''Forme courante du pluriel de'' [[-al]]. 10 | 11 | ==== {{S|variantes}} ==== 12 | * [[-als]], {{term|dans certains cas particuliers}}. 13 | {{clé de tri|aux}} 14 | 15 | == {{langue|eo}} == 16 | {{voir autres systèmes|eo|-aŭ|-au}} 17 | === {{S|suffixe|eo}} === 18 | '''-aux''' {{pron|aw|eo}} 19 | # {{eo-sys-x|-aŭ}}. 20 | -------------------------------------------------------------------------------- /tests/data/da/til.wiki: -------------------------------------------------------------------------------- 1 | {{-da-}} 2 | {{-prep-|da}} 3 | '''til''' 4 | #Ordet betegner en retning hen imod eller et tilhørsforhold 5 | #:'''Til''' [[land]]s, til [[vand]]s [[og]] [[i]] [[luft]]en.'' 6 | 7 | ====Syntax==== 8 | Ordet '''til''' styrer [[genitiv]] (ejefald). 9 | 10 | {{-ant-}} 11 | *[[fra]] 12 | 13 | {{-etym-}} 14 | [[indoeuropæisk sprog|Indoeuropæisk]]: *ad (i betydningen: fastsætte, ordne) -> [[germansk sprog|germansk]] *tila- (i betydningen: mål; jf. tysk: Ziel) -> [[oldnordisk sprog|oldnordisk]] til. Ordet betyder altså egentlig: "''med'' xxx ''som mål''", hvor xxx kan erstattes af et substantiv (navneord). 15 | 16 | {{-trans-}} 17 | {{(}} 18 | * {{en}}: {{t|en|to}} 19 | * {{O|fr|à}}, {{t|fr|dans}}, {{t|fr|en}}, {{t|fr|pour}}, {{t|fr|chez}}, {{t|fr|de}}, {{t|fr|vers}} 20 | * {{is}}: {{t|is|til}} 21 | * {{nl}}: {{t|nl|te}} 22 | * {{nb}}: {{t|nb|til}} 23 | {{-}} 24 | * {{nn}}: {{t|nn|til}} 25 | * {{pt}}: {{t|pt|para}} 26 | * {{sv}}: {{t|sv|till}} 27 | * {{de}}: {{t|de|zu}}, {{t|de|nach}}, {{t|de|bis}}, {{t|de|an}}, {{t|de|für}} 28 | {{)}} 29 | -------------------------------------------------------------------------------- /tests/data/da/jørme.wiki: -------------------------------------------------------------------------------- 1 | == Dansk == 2 | === Alternativ form === 3 | * {{l|da|vørme}} 4 | === Verbum === 5 | {{da-verb}} 6 | # [[vrimle]], [[myldre]]; [[sværme]] 7 | #* '''1958''', ''Sprog og kultur'' 8 | #*: »Ja: jørme. Eks.: Han bløw saa maj jørmendes gal.« 1714 Fjelsø, Rinds h. - » Det '''jørmede''' op i mig. Tror ikke, det bruges i andre betydninger.« 1715 Simested, Rinds h. - »Dæe wa manno mæ te de markin, di '''jørme''' hiele plassen fuld. 9 | #* '''1841''', Christian Molbech, ''Dansk Dialect-Lexikon, indeholdende Ord, Udtryk og Talemaader af den danske Almues Tungemaal i Rigets forskiellige Landskaber og Egne, forsaavidt som de ere fremmede for Skriftsproget og almindelig Sprogbrug, med Forklaring og Oplysninger'', page 664: 10 | #*: (fra Virnæs S.) vørme, v. m. (J. Ogsaa: '''jørme'''.) vrimle, mylre; f. Er. om en stor Mængde Smaadyr, der bevæge sig imellem hverandre, som Orme, Myrer, o. desl . (Støvring Herr) – Deraf: Vørm, en Vrimmel, en mylrende Mængde. En Vørm Bier ... 11 | === Referencer === 12 | * http://jyskordbog.dk/jyskordbog/ordbog.html 13 | -------------------------------------------------------------------------------- /tests/data/no/verken.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Konjunksjon=== 3 | {{no-kon|tittel=verken — eller — |norm|nb=ja|nn=ja|nrm=nei}} 4 | 5 | # danner sammen med [[eller]] en konjunksjon som binder sammen to nektinger 6 | #:''Han fikk [[verken]] vått '''eller''' tørt.'' 7 | #:''Jeg har [[verken]] tid '''eller''' råd.'' 8 | 9 | ====Faste uttrykk==== 10 | * '''verken''' det ene '''eller''' det andre 11 | * '''verken''' fugl '''eller''' fisk 12 | 13 | ====Uttale==== 14 | {{lyd|no-verken.ogg|Lyd (Dialekt: Oslo)}} 15 | 16 | ====Andre former==== 17 | {{andre former|korkje|nn=ja}} 18 | {{andre former|hverken|nb=ja|nrm=ja}} 19 | 20 | ====Etymologi==== 21 | Fra {{opphav|gammeldansk|språk=no}}: [[hwærki]]/[[hwærkin]] via {{opphav|dansk|språk=no}}: [[hverken]]. Jamfør [[norrønt]]: [[hvárki]]. 22 | 23 | ====Oversettelser==== 24 | {{overs-se|konjunksjon|[[hverken]]}} 25 | 26 | ===Referanser=== 27 | * {{R:Bokmålsordboka-Nynorskordboka}} 28 | 29 | ===Substantiv=== 30 | '''{{PAGENAME}}''' {{norm|nb=ja|nrm=ja|nn=ja}} 31 | 32 | #{{no-sub-bøyningsform|be|verk|nb=ja|nrm=ja|nn=ja}} 33 | -------------------------------------------------------------------------------- /tests/data/fr/corollaires.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|fr}} == 2 | === {{S|étymologie}} === 3 | : {{cf|corollaire}}. 4 | 5 | === {{S|nom|fr|flexion}} === 6 | {{fr-rég|kɔ.ʁɔ.lɛʁ|s=corollaire|mf=oui}} 7 | '''corollaires''' {{pron|kɔ.ʁɔ.lɛʁ|fr}} 8 | # ''Pluriel de'' [[corollaire#fr-nom|corollaire]]. 9 | #* {{exemple | lang=fr 10 | | L’illustre docteur regrettait de ne pouvoir emporter les seize volumes in-quarto, mais il en possédait tous les développements dans sa tête, ainsi que les notes, les '''corollaires''', les renvois et une foule d’observations inédites et curieuses, résultant de ses nouvelles études. 11 | | source={{w|Erckmann-Chatrian}}, ''{{ws|Livre:Erckmann-Chatrian - Contes et romans populaires, 1867.djvu|L’illustre docteur Mathéus}}'', page 6, J. Hetzel, 1867}} 12 | {{Clr}} 13 | 14 | === {{S|adjectif|fr|flexion}} === 15 | {{fr-rég|kɔ.ʁɔ.lɛʁ|s=corollaire|mf=oui}} 16 | '''corollaires''' {{pron|kɔ.ʁɔ.lɛʁ|fr}} {{mf}} 17 | # ''Pluriel de'' [[corollaire#fr-adj|corollaire]]. 18 | 19 | === {{S|prononciation}} === 20 | * {{écouter|lang=fr|France (Île-de-France)|kɔ.ʁɔ.lɛʁ|audio=Fr-Paris--corolaire.ogg}} 21 | -------------------------------------------------------------------------------- /tests/data/ja/有する.wiki: -------------------------------------------------------------------------------- 1 | {{DEFAULTSORT:ゆうする}} 2 | =={{ja}}== 3 | ==={{verb}}=== 4 | {{ja-verb|ゆうする}} 5 | #{{タグ|ja|他動詞|文章語}}[[もつ|持つ]]。[[もつ|持っ]][[ている]]。 6 | #*1922年、小川未明「小さな草と太陽」青空文庫(2011年11月16日作成、底本:「定本小川未明童話全集 2」講談社、1977(昭和52)年C第3刷)https://www.aozora.gr.jp/cards/001475/files/53676_46244.html 7 | #*:子供のすべてはロマンチシストであった。なんで、人間は、大きくなって、この心を'''有し'''ないのか。そして、旧習慣、常套、俗悪なる形式作法に囚われなければならぬのか。 8 | #*1948年、中谷宇吉郎「心霊現象と科学」青空文庫(2020年6月27日作成、底本:「中谷宇吉郎随筆選集第二巻」朝日新聞社、1966(昭和41)年9月30日第2刷)https://www.aozora.gr.jp/cards/001569/files/59195_71308.html 9 | #*:ところが十九世紀の末頃から、心霊現象の研究が、一種の学問の形をとり出した。各国にいろいろな団体などが出来て、こういう現象に特に興味を'''有する'''人たちの間では、かなり活溌な運動が展開されてきた。 10 | #*2016年、高村典子「淡水域の保全、その政策を支える生物多様性評価の現状と課題」高村 典子, 淡水域の保全、その政策を支える生物多様性評価の現状と課題, 保全生態学研究, 2016, 21 巻, 2 号, p. 117-124, 公開日 2017/07/17, Online ISSN 2424-1431, Print ISSN 1342-4327, https://doi.org/10.18960/hozen.21.2_117 [https://creativecommons.org/licenses/by/4.0/deed.ja CC BY 4.0]で公開 11 | #*:このように淡水域は生物の種多様性が極めて高く、固有種が卓越するという特徴を'''有する'''系である。 12 | 13 | ===={{conjug}}==== 14 | {{日本語変格活用|{{ruby|有|ゆう}}|する}} 15 | 16 | ==註== 17 | 18 | -------------------------------------------------------------------------------- /wikidict/svg.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | from optparse import Values 3 | 4 | from scour.scour import scourString 5 | 6 | from . import caches 7 | 8 | CACHE = caches.load_cache_file("svg") 9 | SCOUR_OPTIONS = Values( 10 | defaults={ 11 | "enable_viewboxing": True, 12 | "group_create": True, 13 | "newlines": False, 14 | "quiet": True, 15 | "remove_descriptions": True, 16 | "remove_descriptive_elements": True, 17 | "remove_metadata": True, 18 | # "shorten_ids": True, # /!\ When set to True, display will be incorrect 19 | "strip_comments": True, 20 | "strip_xml_prolog": True, 21 | } 22 | ) 23 | 24 | log = getLogger(__name__) 25 | 26 | 27 | def get(formula: str) -> str: 28 | return CACHE.get(formula, "") 29 | 30 | 31 | def set(formula: str, svg_raw: str) -> None: 32 | log.warning("[new SVG] (%r, %r),", formula, svg_raw) 33 | CACHE[formula] = svg_raw 34 | 35 | 36 | def optimize(svg_raw: str) -> str: 37 | """Optimize a given SVG.""" 38 | return str(scourString(svg_raw, options=SCOUR_OPTIONS)) 39 | -------------------------------------------------------------------------------- /tests/data/da/tyv.wiki: -------------------------------------------------------------------------------- 1 | {{=da=}} 2 | {{-noun-|da}} 3 | {{pn}} {{c}} 4 | #En person, der uretmæssigt tager andre folks ejendele i besiddelse. 5 | #:''Pas på, at tyven ikke stjæler dine ting, når du går på gaden.'' 6 | {{-decl-}} 7 | {{da-noun|en|tyv|tyven|tyve|tyvene}} 8 | {{-expr-}} 9 | *(når noget bliver gjort uden at nogen får det at vide før det er for sent): Som en '''tyv''' om natten. 10 | {{-rel-}} 11 | *[[tyveri]] - den handling tyven begår ved at stjæle 12 | {{-trans-}} 13 | {{(}} 14 | *{{en}}: {{trad|en|thief}} 15 | *{{fi}}: {{trad|fi|varas}} 16 | *{{fr}}: {{trad|fr|voleur}} 17 | *{{fo}}: {{trad|fo|tjóvur}} 18 | *{{el}}: {{trad|el|κλέφτης}} 19 | *{{is}}: {{trad|is|þjófur}} 20 | *{{it}}: {{trad|it|ladro}} 21 | *{{O|ku|diz}} {{mf}} 22 | *{{nl}}: {{trad|nl|dief}} 23 | {{-}} 24 | *{{no}}: 25 | *:{{nb}}: {{t|no|tyv|m}}, {{t|no|tjuv|m}} 26 | *:{{nn}}: {{t|nn|tjuv|m}} 27 | *{{pt}}: {{trad|pt|ladrão}} 28 | *{{ru}}: {{trad|ru|похититель}} 29 | *{{es}}: {{trad|es|ladrón}} 30 | *{{sv}}: {{t|sv|tjuv|c}} 31 | *{{de}}: {{trad|de|Dieb}} 32 | {{)}} 33 | [[Kategori:Forbrydelser på dansk]] 34 | [[Kategori:Personer på dansk]] 35 | -------------------------------------------------------------------------------- /tests/data/ro/paronim.wiki: -------------------------------------------------------------------------------- 1 | =={{limba|ron}}== 2 | {{-etimologie-}} 3 | Din franceză ''[[paronyme]]'', latină ''[[paronymon]]'', originar format din greacă {{trad|el|παρα}} + '''{{trad|el|ονομα}}''' {{trad|el|-onym}} 4 | {{-pronunție-}} 5 | * {{AFI}}: {{AFI|/pa.ro'nim/}} 6 | {{-substantiv-|ron}} 7 | {{substantiv-ron 8 | |gen={{n}} 9 | |nom-sg=paronim 10 | |nom-pl=paronime 11 | |art-sg=paronimul 12 | |art-pl=paronimele 13 | |dat-sg=paronimului 14 | |dat-pl=paronimelor 15 | |voc-sg=- 16 | |voc-pl=- 17 | }} 18 | # [[cuvânt]] asemănător cu altul din punctul de vedere al formei, dar [[deosebit]] de acesta ca [[sens]] (și ca [[origine]]). 19 | # cuvânt care se aseamănă [[parțial]] cu altul din punctul de vedere al formei, dar se [[deosebi|deosebește]] ca sens de acesta. 20 | {{-trans-}} 21 | {{(}} 22 | * {{eng}}: {{trad|en|paronym}} 23 | * {{fra}}: {{trad|fr|paronyme}} 24 | {{-}} 25 | * {{ina}}: {{trad|ia|paronyme}} 26 | * {{ido}}: {{trad|io|paronimo}} 27 | {{)}} 28 | ===Vezi și:=== 29 | * [[acronim]] 30 | * [[antonim]] 31 | * [[omonim]] 32 | * [[sinonim]] 33 | * [[hipernim]] 34 | * [[hiponim]] 35 | 36 | [[Categorie:Lingvistică]] 37 | -------------------------------------------------------------------------------- /tests/data/eo/kaskedo.wiki: -------------------------------------------------------------------------------- 1 | =={{Lingvo|eo}}== 2 | {{oficialeco|8}} 3 | ==={{Vortospeco|substantivo|eo}}=== 4 | [[Dosiero:Casquette a helice.jpg|thumb|[1] '''''kaskedo''''']] 5 | {{Deklinacio-eo}} 6 | 7 | {{Vorterseparo}} 8 | :kasked/o. 9 | 10 | ===={{Signifoj}}==== 11 | :[1] Ĉapo kun viziero, civilvesta aŭ uniforma: ''homoj armitaj en nigraj kaskedetoj{{ref-Grabowski}}; la hotela pordisto levis sian kaskedon.'' 12 | 13 | {{Ekzemploj}} 14 | :[1] Volis 15 | :la ĝardenisto pajlan ĉapelon, 16 | :la ŝipisto maristan ĉapon, 17 | :la ĵurnalisto kvadratitan ''kaskedon'', 18 | :la ĉambristino puntan kufon, 19 | :la ĝendarmo militistan kepon, 20 | :la ĉampano [[korko]]n, 21 | :la ĝemelo du supersignojn, 22 | :la ĵonglisto tri aŭ kvar...{{La milito de la ĉapeloj}} 23 | 24 | ===={{Tradukoj}}==== 25 | {{trad-eko}} 26 | *dana: [1] {{t|da|kasket}} {{g|u}} 27 | 28 | *franca: [1] {{t|fr|casquette}} {{g|f}} 29 | {{trad-mezo}} 30 | 31 | {{trad-fino}} 32 | 33 | {{Referencoj}} 34 | * {{vikipedenlin|kaskedo}} 35 | * {{ref-ReVo|kaskedo}} 36 | * {{ref-PIV}} 37 | * {{ref-Tato|eo}} 38 | * {{ref-Simpla Vortaro|kaskedo}} 39 | 40 | {{Fontoj}} 41 | -------------------------------------------------------------------------------- /wikidict/namespaces.py: -------------------------------------------------------------------------------- 1 | """ 2 | List of mediawiki namespaces per language. 3 | Auto-generated with `python -m scripts`. 4 | """ 5 | 6 | # START 7 | namespaces = { 8 | "ca": ["Categoria", "Fitxer", "Image", "Imatge"], 9 | "da": ["Billede", "Fil", "Image", "Kategori"], 10 | "de": ["Bild", "Datei", "Image", "Kategorie"], 11 | "el": ["Image", "Αρχείο", "Εικόνα", "Κατηγορία"], 12 | "en": ["CAT", "Category", "File", "Image"], 13 | "eo": ["Dosiero", "Image", "Kategorio"], 14 | "es": ["Archivo", "Categoría", "Image", "Imagen"], 15 | "fr": ["Catégorie", "Fichier", "Image"], 16 | "it": ["Categoria", "File", "Image", "Immagine"], 17 | "ja": ["Image", "カテゴリ", "ファイル", "画像"], 18 | "no": ["Bilde", "Fil", "Image", "Kategori"], 19 | "pt": ["Arquivo", "Categoria", "Ficheiro", "Image", "Imagem"], 20 | "ro": ["Categorie", "Fişier", "Fișier", "Image", "Imagine"], 21 | "ru": ["Image", "Изображение", "К", "Категория", "Файл"], 22 | "sv": ["Bild", "Fil", "Image", "KAT", "Kategori"], 23 | "zh": ["CAT", "Category", "File", "Image", "分类", "分類", "图像", "图片", "圖像", "圖片", "文件", "档案", "檔案"], 24 | } 25 | # END 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020-2025 Mickaël Schoentgen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import http.server 3 | import socketserver 4 | import mimetypes 5 | import os 6 | 7 | PORT = 8000 8 | 9 | class GzipStaticHandler(http.server.SimpleHTTPRequestHandler): 10 | def end_headers(self): 11 | # If the requested file ends with .gz, add Content-Encoding 12 | if self.path.endswith(".gz"): 13 | self.send_header("Content-Encoding", "gzip") 14 | 15 | # Try to infer the original MIME type (e.g., .js.gz -> .js) 16 | base, _ = os.path.splitext(self.path) 17 | mime, _ = mimetypes.guess_type(base) 18 | if mime: 19 | self.send_header("Content-Type", mime) 20 | 21 | super().end_headers() 22 | 23 | 24 | if __name__ == "__main__": 25 | socketserver.TCPServer.allow_reuse_address = True 26 | with socketserver.TCPServer(("", PORT), GzipStaticHandler) as httpd: 27 | print(f"Serving HTTP on port {PORT} (http://localhost:{PORT}/)") 28 | try: 29 | httpd.serve_forever() 30 | except KeyboardInterrupt: 31 | print("\nShutting down server.") 32 | httpd.server_close() 33 | -------------------------------------------------------------------------------- /tests/data/sv/auto.wiki: -------------------------------------------------------------------------------- 1 | {{se även|Auto|autó|auto-}} 2 | 3 | ==Svenska== 4 | ===Substantiv=== 5 | {{subst|sv}} 6 | '''auto''' 7 | #[[automatisk]]t läge 8 | #[[autostart]] 9 | :{{avgränsare}} 10 | :{{besläktade ord|[[automatisk]]}} 11 | 12 | ==Finska== 13 | ===Substantiv=== 14 | {{fi-subst-valo|aut}} 15 | '''auto''' 16 | #{{tagg|kat=fordon|språk=fi}} [[bil]] 17 | #:{{sammansättningar|[[autotehdas]], [[autourheilu]], [[henkilöauto]], [[hybridiauto]], [[jäätelöauto]], [[kirjastoauto]], [[maastoauto]], [[maitoauto]], [[pakettiauto]], [[poliisiauto]], [[sähköauto]]}} 18 | 19 | ==Nederländska== 20 | ===Substantiv=== 21 | {{nl-subst|auto|auto's}} 22 | '''auto''' {{m}} 23 | #{{tagg|kat=fordon|språk=nl}} [[bil]] 24 | #:{{sammansättningar|[[auto-ongeval]], [[auto-ongeluk]]}} 25 | 26 | ==Polska== 27 | ===Substantiv=== 28 | {{subst|pl}} 29 | '''auto''' 30 | #{{tagg|kat=fordon|språk=pl}} [[bil]] 31 | 32 | ==Spanska== 33 | ===Substantiv=== 34 | {{es-subst-m-s}} 35 | '''auto''' {{m}} 36 | #{{tagg|kat=fordon|språk=es}} [[bil]] 37 | #:{{etymologi|Avkortning av ''[[automóvil]]'', av {{härledning|es|grc|αὐτός|själv}}.}} 38 | #:{{synonymer|[[automóvil]], [[carro]] ''(Latinamerika)'', [[coche]]}} 39 | -------------------------------------------------------------------------------- /tests/data/it/modalità Goblin.wiki: -------------------------------------------------------------------------------- 1 | [[modalità|Modalità]] [[goblin|Goblin]] 2 | 3 | == {{-it-}} == 4 | {{-loc nom-|it}} 5 | {{Pn|w}} ''f inv'' 6 | #[[modalità]] [[goblin|Goblin]], oppure [[in]] [[modalità]] [[goblin|Goblin]] è un [[tipo]] di [[comportamento]] [[autoindulgente]], [[pigro]], [[sciatto]] o [[avido]], che [[rifiuta]] le [[norme]] o le [[aspettative]] [[sociali]]. Questo [[comportamento]] si deve [[anche]] all'[[influsso]] del [[COVID-19|covid]] nell'[[ambiente]] [[fisico]] sulla [[mente]] e la [[socialità]] delle [[persone]] 7 | 8 | {{-sill-}} 9 | ; mo | da | li | tà | go | blin | 10 | 11 | {{-pron-}} 12 | {{IPA|/modali'ta 'go blin/}} 13 | 14 | {{-etim-}} 15 | ; [[modalità]] dal lat. [[modus|mòdus]], [[modo]] o [[maniera]] e [[goblin|Goblin]] o [[goblin|gobelin]], [[piccolo]] e [[grottesco]] [[spirito]] [[maligno]] del [[folklore]] [[inglese]], [[scozzese]] e [[irlandese]]. 16 | 17 | {{-trad-}} 18 | [https://en.m.wikipedia.org/wiki/Goblin_mode]{{Trad1|}} 19 | :*{{en}}: [[Goblin mode]] 20 | 21 | 22 | {{-ref-}} 23 | :*[https://en.m.wikipedia.org/wiki/Goblin_mode] 24 | :* 25 | [T. F. Hoad, English Etymology, Oxford University Press, p. 196b.] 26 | :*[https://www.garzanti.it/tag/goblin-mode/] 27 | -------------------------------------------------------------------------------- /tests/data/sv/-hörning.wiki: -------------------------------------------------------------------------------- 1 | {{se även|-horning}} 2 | 3 | ==Svenska== 4 | ===Efterled=== 5 | {{efterled|sv}} 6 | '''-hörning''' (''suffix'') 7 | #{{tagg|geometri|vardagligt}} ''suffix för [[månghörning]]ar'' 8 | #:{{etymologi|Av ''[[hörn]]'' + ''[[-ing]]''.}} 9 | #:{{synonymer|[[-gon]]}} 10 | #:{{sammansättningar|[[femhörning]], [[fyrhörning]], [[hundrahörning]], [[månghörning]], [[niohörning]], [[sexhörning]], [[sjuhörning]], [[sjuttonhörning]], [[tiohörning]], [[trehörning]], [[åttahörning]]}} 11 | #:{{användning|Mer formellt talar man om [[polygon]]er i stället för [[månghörningar]], och då används suffixet ''[[-gon]]'', fast då med de likaså mer formella icke-svenska förleden: ''[[pentagon]]'', ''[[hexagon]]'' osv.}} 12 | #''suffix i ord som har med [[djur]]s [[horn]] att göra'' 13 | #:{{etymologi|Av ''[[horn]]'' + ''[[-ing]]'' med omljud.}} 14 | #:{{sammansättningar|[[enhörning]], [[noshörning]]}} 15 | #:{{jämför|[[-horning]]}} 16 | 17 | ====Översättningar==== 18 | {{ö-topp|suffix för månghörningar}} 19 | *finska: {{ö+|fi|-kulmio}} 20 | *nederländska: {{ö|nl|-hoek}} 21 | {{ö-botten}} 22 | 23 | {{ö-topp|suffix i ord som har med djurs horn att göra}} 24 | *isländska: {{ö+|is|hyrningur}} 25 | {{ö-botten}} 26 | -------------------------------------------------------------------------------- /tests/data/pt/ũa.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Artigo== 3 | {{flex.pt|ms=ũu|mp=ũus|fs=ũa|fp=ũas}} 4 | {{paroxítona|ũ|a}} ''indefinido'' 5 | # ortografia antiga de [[uma]] 6 | 7 | =={{etimologia|pt}}== 8 | : Do Latim ''una-'': ''una-'' deu '''ũa''' por queda do '''n''' com a nasalação do '''ũ'''. 9 | 10 | ==Ver também== 11 | ===No Wikcionário=== 12 | * [[uã]] 13 | 14 | [[Categoria:Artigo (Português)]] 15 | [[Categoria:Entrada com etimologia (Português)]] 16 | [[Categoria:Entrada de étimo latino (Português)]] 17 | 18 | ={{-roa-gpm-}}= 19 | ==Artigo== 20 | {{flex.roa-gpm|ms=ũu|mp=ũus|fs=ũa|fp=ũas}} 21 | {{paroxítona|ũ|a|id=roa-gpm}}, ''indefinido'' 22 | # [[uma]] 23 | 24 | =={{etimologia|roa-gpm}}== 25 | {{etimo2|la|una|roa-gpm}} 26 | 27 | [[Categoria:Artigo (Galego-Português Medieval)]] 28 | 29 | ={{-mwl-}}= 30 | ==Artigo== 31 | {{paroxítona|ũ|a|id=mwl}} ''indefinido'' 32 | # [[uma]] 33 | # feminino da forma [[un]] ''(um)'' 34 | 35 | =={{etimologia|mwl}}== 36 | : Do Latim ''una-'': ''una-'' deu '''ũa''' por queda do '''n''' com a nasalação do '''ũ'''. 37 | 38 | [[Categoria:Artigo (Mirandês)]] 39 | [[Categoria:Entrada com etimologia (Mirandês)]] 40 | [[Categoria:Entrada de étimo latino (Mirandês)]] 41 | 42 | [[en:ũa]] 43 | [[fr:ũa]] 44 | -------------------------------------------------------------------------------- /tests/test_4_get_word.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | from requests import HTTPError 6 | 7 | from wikidict import context, get_word 8 | 9 | 10 | @pytest.fixture(scope="module", autouse=True) 11 | def setup_lua_ctx() -> None: 12 | with patch.dict("os.environ", {"CWD": str(Path(context.__file__).parent.parent)}): 13 | assert context.reset("fr") 14 | 15 | 16 | @pytest.mark.webtest 17 | def test_simple() -> None: 18 | # The word exists and contains subsublists. 19 | assert get_word.main("fr", "base") == 0 20 | 21 | 22 | @pytest.mark.webtest 23 | def test_get_random_word() -> None: 24 | assert get_word.main("fr", "") == 0 25 | 26 | 27 | @pytest.mark.webtest 28 | def test_subdefinitions() -> None: 29 | assert get_word.main("fr", "mesure") == 0 30 | 31 | 32 | @pytest.mark.webtest 33 | def test_raw() -> None: 34 | assert get_word.main("fr", "marron", raw=True) == 0 35 | 36 | 37 | @pytest.mark.webtest 38 | def test_word_with_variants() -> None: 39 | assert get_word.main("fr", "suis") == 0 40 | 41 | 42 | @pytest.mark.webtest 43 | def test_word_not_found() -> None: 44 | with pytest.raises(HTTPError): 45 | get_word.main("fr", "mutinerssssssss") 46 | -------------------------------------------------------------------------------- /tests/update-data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | HERE = Path(__file__).parent 5 | sys.path.insert(0, str(HERE.parent)) 6 | from wikidict.constants import SESSION # noqa: E402 7 | 8 | 9 | def fetch_and_store_if_updated(file: Path, url: str) -> None: 10 | current_content = file.read_text().strip() 11 | with SESSION.get(url) as req: 12 | if not req.ok: 13 | return 14 | new_content = req.text.strip() 15 | if current_content != new_content: 16 | file.write_text(req.text + "\n") 17 | print(f"Updated {file}", flush=True) 18 | 19 | 20 | def main() -> int: 21 | url_fmt = "https://{}.wiktionary.org/w/index.php?title={}&action=raw" 22 | folder = HERE / "data" 23 | for locale in sorted(folder.iterdir()): 24 | for file in sorted(locale.glob("*.wiki")): 25 | url = url_fmt.format(locale.name, file.stem) 26 | fetch_and_store_if_updated(file, url) 27 | 28 | html_file = file.with_suffix(".html") 29 | if html_file.is_file(): 30 | url = url.replace("&action=raw", "") 31 | fetch_and_store_if_updated(html_file, url) 32 | 33 | return 0 34 | 35 | 36 | if __name__ == "__main__": 37 | sys.exit(main()) 38 | -------------------------------------------------------------------------------- /tests/data/ca/Castell.wiki: -------------------------------------------------------------------------------- 1 | {{vegeu|castell}} 2 | 3 | == {{-ca-}} == 4 | {{ca-pron|é 5 | |f-centr=LL-Q7026 (cat)-Unjoanqualsevol-castell.wav 6 | }} 7 | {{-etimologia-}} De {{m|ca|castell}}. 8 | 9 | === Nom propi === 10 | {{entrada|ca|nom propi}} 11 | 12 | # Diversos topònims, especialment: 13 | ## {{w|Es Castell}}, municipi de Menorca. 14 | ## {{w|Castell de l'Areny}}, municipi del Berguedà. 15 | ## {{w|Castell de Cabres}}, municipi del Baix Maestrat. 16 | ## {{w|Castell de Castells}}, municipi de la Marina Alta. 17 | ## {{w|El Castell de Guadalest}}, municipi de la Marina Baixa. 18 | ## {{w|Castell de Mur}}, municipi del Pallars Jussà. 19 | ## {{w|Castell i Platja d'Aro}}, municipi del Baix Empordà. 20 | ## {{w|Castell de Vernet}}, municipi del Conflent. 21 | ## {{w|El Castell de Vilamalefa}}, municipi de l’Alt Millars. 22 | # {{cognom|ca|d’habitatge}} 23 | 24 | {{-der-}} 25 | * [[Castells]] 26 | * {{q|gentilicis}} [[castellenc]], [[castellarenc]], [[casteller]] 27 | 28 | {{-rel-}} 29 | * [[Castelló]] 30 | 31 | === Miscel·lània === 32 | * {{ca-sil}} 33 | * {{anagrames|ca|llescat}} 34 | 35 | === Vegeu també === 36 | * {{Viquipèdia|Castell (desambiguació)|Castell}} 37 | * {{ca-dicc|dcvb}} 38 | * [http://www.idescat.cat/cognoms/?q=Castell Cognoms de la població], Idescat 39 | -------------------------------------------------------------------------------- /tests/data/es/también.wiki: -------------------------------------------------------------------------------- 1 | == {{lengua|es}} == 2 | {{pron-graf}} 3 | 4 | === Etimología === 5 | {{etimología|compuesto|tan|bien}} 6 | 7 | === {{adverbio de afirmación|es}} === 8 | ;1: {{impropia|Utilizado para especificar que una o varias cosas son similares, o que comparten atributos con otra previamente nombrada}}. 9 | {{sinónimos|igualmente|asimismo|de igual modo|incluso|al igual|paralelamente|encima}}. 10 | 11 | ;2: {{impropia|Usado para [[añadir]] algo a lo anteriormente mencionado}}. 12 | {{sinónimos|además|en añadidura}}. 13 | 14 | === Traducciones === 15 | {{trad-arriba}} 16 | {{t|de|t1=auch}} 17 | {{t|ast|t1=tamién}} 18 | {{t|bg|t1=също|t2=също така|t3=също и}} 19 | {{t|da|t1=også}} 20 | {{t|sl|a1=1|t1=tudi}} 21 | {{t|eo|t1=ankaŭ}} 22 | {{t|fi|t1=myös|t2=-kin}} 23 | {{t|fr|a1=1-2|t1=aussi}} 24 | {{t|fy|t1=ek}} 25 | {{t|hu|t1=is}} 26 | {{t|el|t1=και}} 27 | {{t|gn|t1=avei}} 28 | {{t|id|t1=juga|t2=pula}} 29 | {{t|en|a1=1|t1=too|a2=1-2|t2=also}} 30 | {{t|it|a1=1|t1=anche}} 31 | {{t|yua|t1=la’tene}} 32 | {{t|nl|t1=ook}} 33 | {{t|no|t1=også}} 34 | {{t|pap|t1=tambe}} 35 | {{t|pl|t1=też}} 36 | {{t|pt|t1=também}} 37 | {{t|sa|t1=अपि}} 38 | {{t|sv|t1=också}} 39 | {{t|su|t1=ogé}} 40 | {{t|wa|t1=eto}} 41 | {{t|eu|t1=ere}} 42 | {{trad-abajo}} 43 | 44 | == Referencias y notas == 45 | 46 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/template-dump.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Wiktionary dump 3 | about: Issue to list changes in a Wiktionary dump. 4 | title: '🥮 Issues found in the `2025-MM-DD` dump' 5 | labels: 6 | - dump 7 | - 'locale:Chinese' 8 | - 'locale:Russian' 9 | assignees: '' 10 | 11 | --- 12 | 13 | > [!NOTE] 14 | > 🧙♂️ All-in-one ticket listing new missing templates, errors, and variants problems, accross all locales, following the latest Wikimedia dump. 15 | > Feel free to comment if you are interested in tackling one ticket, so that we do not work both on the same thing. 16 | 17 | 18 | 19 | ## CA 20 | 21 | - [ ] 22 | 23 | ## DA 24 | 25 | - [ ] 26 | 27 | ## DE 28 | 29 | - [ ] 30 | 31 | ## EL 32 | 33 | - [ ] 34 | 35 | ## EN 36 | 37 | - [ ] 38 | 39 | ## EO 40 | 41 | - [ ] 42 | 43 | ## ES 44 | 45 | - [ ] 46 | 47 | ## FR 48 | 49 | - [ ] 50 | 51 | ## IT 52 | 53 | - [ ] 54 | 55 | ## NO 56 | 57 | - [ ] 58 | 59 | ## PT 60 | 61 | - [ ] 62 | 63 | ## RO 64 | 65 | - [ ] 66 | 67 | ## RU 68 | 69 | - Unhandled templates count: N (📈 +N since `2025-MM-DD` dump | 📉 -N since `2025-MM-DD` dump) 70 | 71 | ## SV 72 | 73 | - [ ] 74 | 75 | ## ZH 76 | 77 | - Unhandled templates count: N (📈 +N since `2025-MM-DD` dump | 📉 -N since `2025-MM-DD` dump) 78 | -------------------------------------------------------------------------------- /tests/data/no/konsentrasjon.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Substantiv=== 3 | {{no-sub|m}} 4 | 5 | #Det å [[konsentrere]] seg; ha stort fokus på noe. 6 | #:'''''Konsentrasjon''' er viktig for å ikke bli avledet.'' 7 | #{{kjemi|no}} Andelen stoff i noe; mengde stoff løst pr. enhet. 8 | #:'''''Konsentrasjonen''' i løsningen er på 0,1 [[molar]].'' 9 | 10 | ====Etymologi==== 11 | Fra {{suffiks|konsentrere|sjon|språk=no}} 12 | 13 | ====Uttale==== 14 | {{uttale mangler|språk=no}} 15 | {{lydfil mangler|språk=no}} 16 | 17 | ====Grammatikk==== 18 | {{no-sub-m1}} 19 | 20 | ====Oversettelser==== 21 | {{overs-topp|fokusere}} 22 | * {{overs|da|koncentration|c}} 23 | * {{overs|en|concentration}} 24 | * {{overs|fi|keskittyminen}} 25 | * {{overs|fr|concentration|f}} 26 | * {{overs-mangler|it}} 27 | {{overs-midt}} 28 | * {{overs-mangler|ru}} 29 | * {{overs-mangler|es}} 30 | * {{overs|sv|koncentration|c}} 31 | * {{overs|de|Konzentration|f}} 32 | {{overs-bunn}} 33 | {{overs-topp|begrep innen kjemi}} 34 | * {{overs|da|koncentration|c}} 35 | * {{overs|en|concentration}} 36 | * {{overs|fi|konsentraatio}} 37 | * {{overs|fr|concentration|f}} 38 | * {{overs-mangler|it}} 39 | {{overs-midt}} 40 | * {{overs-mangler|ru}} 41 | * {{overs-mangler|es}} 42 | * {{overs|sv|koncentration|c}} 43 | * {{overs|de|Konzentration|f}} 44 | {{overs-bunn}} 45 | -------------------------------------------------------------------------------- /wikidict/gen_dict.py: -------------------------------------------------------------------------------- 1 | """DEBUG: generate the dictionary for specific words.""" 2 | 3 | import os 4 | from datetime import UTC, datetime 5 | from pathlib import Path 6 | 7 | from .convert import convert, get_formatters, make_variants 8 | from .get_word import get_word 9 | from .stubs import Variants 10 | 11 | 12 | def main(locale: str, words: str, output: Path | str, *, format: str = "kobo") -> int: 13 | """Entry point.""" 14 | 15 | if isinstance(output, str): 16 | output_dir = Path(os.getenv("CWD", "")) / output 17 | output_dir.mkdir(parents=True, exist_ok=True) 18 | else: 19 | output_dir = output 20 | 21 | words_stripped = [word_stripped for word in words.split(",") if (word_stripped := word.strip())] 22 | all_words = {word: get_word(word, locale) for word in words_stripped} 23 | variants: Variants = make_variants(all_words) 24 | snapshot = datetime.now(tz=UTC).strftime("%Y%m%d") 25 | primary_formatters, secondary_formatters, mobi_run = get_formatters(format) 26 | convert( 27 | primary_formatters, 28 | secondary_formatters, 29 | mobi_run, 30 | output_dir, 31 | snapshot, 32 | locale, 33 | all_words, 34 | variants, 35 | with_etym_only=True, 36 | ) 37 | 38 | return 0 39 | -------------------------------------------------------------------------------- /wikidict/lang/de/variant_handlers.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | def render_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 5 | """ 6 | >>> render_variant("flexion", ["dass", "Reform 1996"], defaultdict(str), "daß") 7 | 'dass' 8 | >>> render_variant("flexion", ["profilierend"], defaultdict(str), "profilierende") 9 | 'profilierend' 10 | >>> render_variant("flexion", [], defaultdict(str, {"1": "rauspumpen"}), "pumpt raus") 11 | 'rauspumpen' 12 | >>> render_variant("flexion", ["rauspumpen#rauspumpen_(Deutsch)"], defaultdict(str), "pumpt raus") 13 | 'rauspumpen' 14 | >>> render_variant("flexion", [], defaultdict(str, {"Verb": "ansprechen", "Partizip": "angesprochen"}), "angesprochenen") 15 | 'ansprechen' 16 | """ 17 | variant = data["1"] or data["Verb"] or parts[0] 18 | return variant.split("#", 1)[0] 19 | 20 | 21 | def render_reverse_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 22 | """ 23 | >>> render_reverse_variant("rev-flexion", ["kartlet"], defaultdict(str), "kartelt") 24 | 'kartlet' 25 | """ 26 | return parts[0].strip() 27 | 28 | 29 | handlers = { 30 | "flexion": render_variant, 31 | "rev-flexion": render_reverse_variant, 32 | } 33 | -------------------------------------------------------------------------------- /tests/data/pt/cabrum.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Adjetivo== 3 | {{flex.pt|s=cabrum|p=cabruns}} 4 | {{oxítona|ca|brum}}, {{mf}} 5 | # {{escopo|pt|Pecuária}} de [[cabra]]s: 6 | #* ''o gado '''cabrum''' adapta-se facilmente às regiões montanhosas...'' 7 | # {{escopo|pt|Brasil}} [[marido]] de [[mulher]] [[adúltero|adúltera]] 8 | 9 | ===Sinônimos=== 10 | De '''1''' (de cabras): 11 | {{verTambém.Ini}} 12 | * [[caprídeo]] 13 | {{verTambém.NovaColuna}} 14 | * [[caprino]] 15 | {{verTambém.Fim}} 16 | 17 | ===Tradução=== 18 | {{tradini|De 1 (de cabra)}} 19 | * {{trad|es|caprino}} 20 | * {{trad|eo|kapra}} 21 | * {{trad|fr|chevrier}} 22 | {{tradmeio}} 23 | * {{trad|en|goatish|goatlike}} 24 | * {{trad|it|caprino}} 25 | * {{trad|pl|kozi}} 26 | {{tradfim}} 27 | 28 | ==Interjeição== 29 | {{oxítona|ca|brum}} 30 | # indica [[estrondo]] 31 | 32 | =={{etimologia|pt}}== 33 | {{etimo2|la|caprunu|pt||cabra}} 34 | 35 | ==Ver também== 36 | ===No Wikcionário=== 37 | {{verTambém.Ini}} 38 | * {{verTambém.Ligações|cabr-}} 39 | * {{verTambém.Ligações|capr(i)-}} 40 | * {{verTambém.Ligações|-capr(i)-}} 41 | * [[cabra]] 42 | {{verTambém.Fim}} 43 | 44 | [[Categoria:Adjetivo (Português)]] 45 | [[Categoria:Interjeição (Português)]] 46 | [[Categoria:Zoologia (Português)]] 47 | [[Categoria:Pecuária (Português)]] 48 | [[Categoria:Regionalismo (Português)]] 49 | -------------------------------------------------------------------------------- /tests/data/pt/objetiva.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Adjetivo== 3 | *{{f}} de [[objetivo]] 4 | ===Variante ortográfica=== 5 | * {{escopo2|Anterior ao AO 1990}} [[objectiva]] 6 | ===Tradução=== 7 | {{tradini}} 8 | * {{trad|es|objetiva}} 9 | {{tradmeio}} 10 | {{tradfim}} 11 | ==Forma verbal== 12 | {{paroxítona|ob|je|ti|va}} 13 | #[[terceira]] [[pessoa]] do [[singular]] do [[presente]] [[indicativo]] do [[verbo]] '''[[objetivar]]''' 14 | ===Variante ortográfica=== 15 | * {{escopo2|Anterior ao AO 1990}} [[objectiva]] 16 | 17 | ==Substantivo== 18 | {{paroxítona|ob|je|ti|va}} {{f}} 19 | #[[lente]] ou [[sistema]] de lentes de uma [[máquina]] [[fotográfica]] 20 | #lente que está voltada para o objeto que se quer ver ou examinar 21 | ===Variante ortográfica=== 22 | * {{escopo2|Anterior ao AO 1990}} [[objectiva]] 23 | 24 | ===Tradução=== 25 | {{tradini}} 26 | * {{trad|en|lens}} 27 | {{tradmeio}} 28 | * {{trad|mwl|oubjetiba|oujetiba}} 29 | {{tradfim}} 30 | 31 | ===Termos derivados=== 32 | {{verTambém.Ini}} 33 | * {{link idioma|objetivação|pt}} 34 | {{verTambém.NovaColuna}} 35 | * {{link idioma|objetivamente|pt}} 36 | {{verTambém.NovaColuna}} 37 | * {{link idioma|objetivo|pt}} 38 | {{verTambém.Fim}} 39 | 40 | [[Categoria:Adjetivo (Português)]] 41 | [[Categoria:Forma verbal (Português)]] 42 | [[Categoria:Substantivo (Português)]] 43 | -------------------------------------------------------------------------------- /tests/data/en/Acanthis.wiki: -------------------------------------------------------------------------------- 1 | {{also|acanthis}} 2 | ==Translingual== 3 | [[File:Carduelis flammea Oulu 20120409b.JPG|thumb|{{taxfmt|Acanthis flammea|species}}([[common redpoll]])]] 4 | 5 | ===Etymology=== 6 | {{root|mul|ine-pro|*h₂eḱ-|*h₂endʰ-}} 7 | See {{pedia|Acanthis (mythology)}} 8 | 9 | ===Proper noun=== 10 | {{taxoninfl|i=1|g=f}} 11 | 12 | # {{taxon|genus|family|Fringillidae|[[redpoll]]s, of northern woodlands, formerly included in ''[[Carduelis]]''}} 13 | 14 | ====Hypernyms==== 15 | * {{sense|genus}} {{Fringillidae Hypernyms}}; {{taxfmt|Carduelinae|subfamily}} - subfamily; {{taxlink|Carduelini|tribe}} - tribe 16 | 17 | ====Hyponyms==== 18 | * {{sense|genus}} {{taxfmt|Acanthis flammea|species}} ([[common redpoll]]) – type species; {{taxlink|Acanthis cabaret|species}} ([[lesser redpoll]]), {{taxfmt|Acanthis hornemanni|species}} ({{vern|Arctic redpoll}}) – other species 19 | 20 | ====Coordinate terms==== 21 | * {{sense|genus}} {{taxfmt|Loxia|genus}} ([[crossbill]]s) 22 | 23 | ====Derived terms==== 24 | * {{taxfmt|Acanthidops|genus}} 25 | 26 | ===References=== 27 | * {{pedia|Redpoll}} 28 | * {{specieslite|i=1}} 29 | * {{comcatlite|i=1}} 30 | * {{R:WoRMS|}} 31 | * {{R:ITIS|179237}} 32 | * {{R:EOL|45510340}} 33 | * {{R:NCBI|37604}} 34 | * {{R:Boyd|29|Fringillidae}} 35 | * {{R:Fossilworks|369430}} 36 | -------------------------------------------------------------------------------- /tests/data/el/-αίικο.wiki: -------------------------------------------------------------------------------- 1 | =={{-el-}}== 2 | {{el-κλίση-'σίδερο'}} 3 | ==={{ετυμολογία}}=== 4 | : '''{{PAGENAME}}''' < {{ουσεπ ο|-αίικος}} [[επίθημα]] σε επίθετα ή οικογενειακά [[επώνυμο|επώνυμα]] [[-αίοι|-αί(οι)]] + {{π|-ικος}} {{Π:ΛΚΝ|*-αίικο*|-αίικο}}{{Π:Μπαμπινιώτης 2010}} 5 | {{χρειάζεται προσοχή}} 6 | ==={{προφορά}}=== 7 | {{ΔΦΑ|el|ˈe.i.ko}} 8 | : {{συλλ|-αί|ι|κο}} 9 | 10 | ==={{επίθημα|el}}=== 11 | '''{{PAGENAME}}''' {{ο}} 12 | * {{ετ|λαϊκ}} [[επίθημα]] με πρώτο [[συνθετικό]] 13 | *# οικογενειακό [[επώνυμο]] που δηλώνει 14 | *## την [[οικογένεια]] ή το [[σπίτι]] 15 | *##: [[Κωλοκοτρόνης]], [[Κωλοκοτροναίοι]] > [[Κωλοκοτροναίικο|Κωλοκοτρον'''αίικο''']] / [[Κολοκοτρωναίικο]] 16 | *##: [[Παπαχρίστου]], [[Παπαχρισταίοι]] > [[Παπαχρισταίικο|Παπαχριστ'''αίικο''']] 17 | *## τη [[συνοικία]] ή τον τόπο όπου κατοικεί η οικογένεια 18 | *##: ''σημείωση: το τοπωνύμια και στον πληθυντικό:'' [[-αίικα]] 19 | *# {{ετ|περιληπτικό}} [[πατριδωνυμικό]] ή [[εθνικό]] όνομα 20 | *#:: [[Ρωμιός]] ([[Ῥωμαῖος]]) > [[ρωμαίικο|ρωμ'''αίικο''']] 21 | *#:: {{Π:ΑΛΝΕ|αιικο}} 22 | 23 | ===={{συγγενικά}}==== 24 | * [[-αίικα]] 25 | * [[-αίος]] 26 | 27 | ===={{βλέπε}}==== 28 | * {{π-κατ}} 29 | 30 | ==={{αναφορές}}=== 31 | 32 | 33 | ==={{πηγές}}=== 34 | * {{Π:Χρηστικό}} 35 | 36 | {{κλείδα-ελλ}} 37 | -------------------------------------------------------------------------------- /wikidict/lang/eo/variant_handlers.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | def render_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 5 | """ 6 | Souce: https://eo.wiktionary.org/w/index.php?title=Modulo:meoformo&oldid=1027456 7 | Date : 2021-12-19 22:43 8 | 9 | >>> render_variant("form-eo", [], defaultdict(str), "ekamus") 10 | 'ekami' 11 | >>> render_variant("form-eo", [], defaultdict(str), "hispanan") 12 | 'hispana' 13 | >>> render_variant("form-eo", [], defaultdict(str), "surdaj") 14 | 'surda' 15 | >>> render_variant("form-eo", [], defaultdict(str), "inexistant") 16 | 'inexistant' 17 | """ 18 | return next( 19 | ( 20 | f"{word.removesuffix(suffix)}{last_char}" 21 | for suffix, last_char in [ 22 | ("on", "o"), 23 | ("oj", "o"), 24 | ("ojn", "o"), 25 | ("an", "a"), 26 | ("aj", "a"), 27 | ("ajn", "a"), 28 | ("as", "i"), 29 | ("is", "i"), 30 | ("os", "i"), 31 | ("us", "i"), 32 | ("u", "i"), 33 | ] 34 | if word.endswith(suffix) 35 | ), 36 | word, 37 | ) 38 | 39 | 40 | handlers = { 41 | "form-eo": render_variant, 42 | } 43 | -------------------------------------------------------------------------------- /scripts/scripts_utils.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | from typing import Any 3 | 4 | import requests 5 | from requests.exceptions import HTTPError, RequestException 6 | 7 | SESSION = requests.Session() 8 | SESSION.headers["User-Agent"] = "https://www.reader-dict.com" 9 | 10 | 11 | def get_content(url: str, *, max_retries: int = 5, sleep_time: int = 5, as_json: bool = False) -> str | dict[str, Any]: 12 | """Fetch given *url* content with retries mechanism.""" 13 | retry = 0 14 | while retry < max_retries: 15 | try: 16 | with SESSION.get(url, timeout=10) as req: 17 | req.raise_for_status() 18 | return req.json() if as_json else req.text 19 | except TimeoutError: 20 | sleep(sleep_time) 21 | retry += 1 22 | except HTTPError as err: 23 | resp = err.response 24 | if resp is not None and resp.status_code == 404: 25 | return "" 26 | wait_time = 1 27 | if resp is not None and resp.status_code == 429: 28 | wait_time = int(resp.headers.get("retry-after") or "1") 29 | sleep(wait_time * sleep_time) 30 | retry += 1 31 | except RequestException: 32 | sleep(sleep_time) 33 | retry += 1 34 | raise RuntimeError(f"Sorry, too many tries [{retry}] for {url!r}") 35 | -------------------------------------------------------------------------------- /tests/data/pt/-a.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | {{ver também/a}} 3 | ==Pospositivo== 4 | {{flex.pt|0s=a|0p=as}} 5 | '''-a''', 6 | # desinência nominal feminina 7 | #* ''A ros'''a''', a alun'''a''', a vid'''a''', lam'''a'''.'' 8 | # desinência nominal masculina 9 | #* ''O di'''a''', o naut'''a'''.'' 10 | # desinência nominal comuns-de-dois 11 | #* ''artist'''a''', contist'''a'''.'' 12 | # vogal temática da primeira conjugação portuguesa 13 | {{flex.pt|0s=-|0p=a}} 14 | '''-a''', 15 | # {{cont|5}} desinência plural masculina em português de latinismos como ultimatum (os ultimata), o corpus (os corpora), o genus (os genera) etc. 16 | 17 | =={{etimologia|pt}}== 18 | * De '''1''': desinência nominal feminina acrescentada no português moderno a palavras anteriormente comuns-de-dois, como portuguesa (e praticamente o padrão ''-ês'' [masculino]:-esa [feminino]), espanhol(a), senhor(a). 19 | * De '''4''': da vogal temática da 1ª conjugação latina. 20 | * De '''5''': da desinência do plural neutro latino 21 | 22 | [[Categoria:Pospositivo (Português)]] 23 | [[Categoria:Entrada de étimo latino (Português)]] 24 | 25 | 26 | ={{-vo-}}= 27 | ==Sufixo== 28 | '''-a''' 29 | # designativo do [[caso]] [[genitivo]], coloca-se junto e imediatamente após a palavra no caso [[nominativo]]. 30 | #: Ex.: '''[[buk]]''' → '''buka''' ''(livro → de/do livro)''. 31 | 32 | [[Categoria:Sufixo (Volapuque)|a]] 33 | -------------------------------------------------------------------------------- /tests/data/pt/alguém.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Pronome== 3 | {{flex.pt|0s=alguém|0p=alguéns}} 4 | {{oxítona|al|guém}}, {{gramática|s|ind}} 5 | # [[pessoa]] não [[identificada]] 6 | #* '''''Alguém''' levou o meu carro!'' 7 | #* ''Toda vez que levei alguém lá foi uma decepção para os '''alguéns'''.'' (jornal O Globo de 12 de novembro de 2017 [https://web.archive.org/web/20230822015825/https://www.academia.org.br/artigos/conversacoes] [https://web.archive.org/web/20230822020336/https://oglobo.globo.com/ela/gastronomia/como-lidar-com-paris-22055807]) 8 | 9 | ==={{PEPB|Antónimo|Antônimo|inline=1}}=== 10 | * De '''1''': [[ninguém]] 11 | 12 | ===Tradução=== 13 | {{tradini}} 14 | * {{trad|de|jemand}} 15 | * {{trad|kea|algem}} 16 | * {{trad|es|alguien}} 17 | * {{trad|fi|joku|kukaan}} 18 | * {{trad|fr|quelqu’un}} 19 | * {{trad|gl|alguén}} 20 | * {{trad|nl|iemand}} 21 | * {{trad|hu|valaki}} 22 | {{tradmeio}} 23 | * {{trad|en|someone}} 24 | * {{trad|is|einhver}} 25 | * {{trad|it|qualcuno}} 26 | * {{trad|mwl|alguien}} 27 | * {{trad|nb|noen}} 28 | * {{trad|pl|ktoś}} 29 | * {{trad|sv|någon}} 30 | {{tradfim}} 31 | 32 | =={{Etimologia|pt}}== 33 | {{etimo2|la|alĭquem|pt}} 34 | 35 | =={{Pronúncia|pt}}== 36 | ===Brasil=== 37 | ====Paulistana e Caipira==== 38 | * [[AFI]]: {{AFI|[aw.ˈgẽj]}} 39 | * [[X-SAMPA]]: /aw."ge~j/ 40 | ===Portugal=== 41 | * AFI: {{AFI|/aɫ.ˈɡɐ̃j̃/}} 42 | 43 | [[Categoria:Pronome (Português)]] 44 | -------------------------------------------------------------------------------- /wikidict/show_pos.py: -------------------------------------------------------------------------------- 1 | """Render templates from raw data.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | import os 7 | 8 | from . import convert, render, utils 9 | from .stubs import Words 10 | 11 | log = logging.getLogger(__name__) 12 | 13 | 14 | def show_pos(words: Words) -> None: 15 | debug = os.getenv("DEBUG_POS", "") 16 | text = "\nPart Of Speech:" 17 | all_pos: list[str] = [] 18 | 19 | for word, details in words.items(): 20 | all_pos.extend(new_pos := details.definitions.keys()) 21 | if debug and any(debug in pos for pos in new_pos): 22 | print(f"{word!r}: {', '.join(new_pos)}") 23 | 24 | for count, pos in enumerate(sorted(set(all_pos)), 1): 25 | text += f"\n {str(count).rjust(2)}. {pos!r}" 26 | 27 | log.info(text) 28 | 29 | 30 | def main(locale: str) -> int: 31 | """Entry point.""" 32 | 33 | lang_src, lang_dst = utils.guess_locales(locale) 34 | utils.setup_logging(lang_src, lang_dst) 35 | 36 | source_dir = render.get_source_dir(lang_src, lang_dst) 37 | if not (input_file := convert.get_latest_json_file(source_dir)): 38 | log.error("No dump found. Run with --parse first ... ") 39 | return 1 40 | 41 | output = render.get_output_file(source_dir, input_file.stem.split("-")[-1]) 42 | words = convert.load(output) 43 | show_pos(words) 44 | return 0 45 | -------------------------------------------------------------------------------- /tests/data/pt/baiano.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | =={{Adjetivo|pt}}== 3 | {{flex.pt|ms=baiano|mp=baianos|fs=baiana|fp=baianas}} 4 | {{paroxítona|bai|a|no}} 5 | # do Estado da [[Bahia]], [[Brasil]] 6 | #: bala baiana 7 | 8 | ===Expressões=== 9 | * '''[[alqueire baiano]]''': 10 | 11 | =={{Substantivo|pt}}== 12 | {{paroxítona|bai|a|no}} 13 | # [[natural]] ou [[habitante]] do Estado da [[Bahia]], [[Brasil]] 14 | # {{escopo|pt|São Paulo|Brasil|popular|pejorativo|racismo}} [[pessoa]] que se [[vestir|veste]] de maneira incomum ou [[brega]]; fora da moda 15 | 16 | ===Expressões=== 17 | * '''[[rodar a baiana]]''': 18 | 19 | =={{etimologia|pt}}== 20 | : Derivado de [[Bahia]], mais o sufixo ano, com perda do H. 21 | 22 | =={{pronúncia|pt}}== 23 | ===Brasil=== 24 | ====Paulistana, Caipira e Carioca==== 25 | * [[AFI]]: /baj.ˈjã.nu/ 26 | * [[X-SAMPA]]: /baj."6.nu/ 27 | 28 | ==Ver também== 29 | ===No Wikcionário=== 30 | {{verTambém.Ini}} 31 | * [[abaianado]] 32 | * [[abaianar]] 33 | * [[baianada]] 34 | {{verTambém.NovaColuna}} 35 | * [[bahiano]] {{escopoGrafiaUsadaAte1911}} 36 | * [[gelo-baiano]] 37 | {{verTambém.Fim}} 38 | 39 | ===Na Wikipédia=== 40 | * [[:w:Bahia|Bahia]] 41 | 42 | ==Ligações externas== 43 | * {{Aulete}} 44 | * {{Michaelis}} 45 | * {{Dicio}} 46 | * {{DicioAberto}} 47 | * {{DLPO}} 48 | * {{Infopédia}} 49 | * {{PLP}} 50 | * {{VOLPABL}} 51 | * {{Forvo|baiano|pt}} 52 | 53 | [[Categoria:Gentílico (Português)]] 54 | -------------------------------------------------------------------------------- /wikidict/lang/defaults.py: -------------------------------------------------------------------------------- 1 | """Defaults values for locales without specific needs. 2 | See `wikidict.langs.__init__` for details. 3 | """ 4 | 5 | from collections import defaultdict 6 | from collections.abc import Callable 7 | 8 | module_trans = "Module" 9 | template_trans = "Template" 10 | appendix_trans = "Appendix" 11 | template_adapters: dict[str, Callable[[str], str]] = {} 12 | template_overrides: dict[str, Callable[[tuple[str, ...]], str]] = {} 13 | 14 | float_separator = "" 15 | thousands_separator = "" 16 | 17 | section_patterns = ("#",) 18 | sublist_patterns = ("#",) 19 | section_level = 2 20 | section_sublevels = (3,) 21 | head_sections = ("",) 22 | etyl_section = ("",) 23 | 24 | variant_titles: tuple[str, ...] = () 25 | variant_templates: tuple[str, ...] = () 26 | reverse_variant_titles: tuple[str, ...] = () 27 | reverse_variant_templates: tuple[str, ...] = () 28 | variant_handlers: dict[str, Callable[[str, list[str], defaultdict[str, str], str], str]] = {} 29 | 30 | definitions_to_ignore: tuple[str, ...] = () 31 | templates_ignored: tuple[str, ...] = () 32 | 33 | 34 | def find_genders(code: str, locale: str) -> list[str]: 35 | return [] 36 | 37 | 38 | def find_pronunciations(code: str, locale: str) -> list[str]: 39 | return [] 40 | 41 | 42 | def adjust_wikicode( 43 | code: str, 44 | locale: str, 45 | *, 46 | templates_status: list[tuple[str, str]] | None = None, 47 | word: str = "", 48 | ) -> str: 49 | return code 50 | -------------------------------------------------------------------------------- /wikidict/lang/fr/template_adapters.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | adapters = { 4 | "Modèle:date": lambda _: "{{#ifeq:{{{1|}}}|?||{{#if:{{{1|}}}|({{UCFIRST: {{{1}}} }})|}}}}", 5 | **dict.fromkeys( 6 | { 7 | "Modèle:emploi", 8 | "Modèle:lexique", 9 | "Modèle:term", 10 | }, 11 | lambda body: re.sub(r"\[\[Catégorie:[^\]]+\]\]", "", body), 12 | ), 13 | "Modèle:nom w pc": lambda body: body.removesuffix( 14 | """Le modèle ''nom w pc'' est désuet. Supprimez-le de cette ligne, ou remplacez-le par le modèle w si un lien vers Wikipédia est nécessaire.""" 15 | ), 16 | "Modèle:radical de Kangxi": lambda _: "Radical de Kangxi {{numéro|{{#expr: {{point de code|{{PAGENAME}}|format=%d}} - 12032 + 1}}}} [[{{str left|{{radical trait|{{PAGENAME}}}}|1}}]]. Unicode : U+{{point de code|{{PAGENAME}}}}.", 17 | "Modèle:référence nécessaire": lambda _: "{{#if:{{{1|}}}|{{#ifeq:{{{1|}}}|nocat||{{{1}}}}}}}", 18 | "Modèle:siècle": lambda _: """{{#ifeq:{{{1|}}}|?||{{#if:{{{1|}}}|''({{#invoke:date et heure|formate_un_siecle|{{{1|}}}|lang={{{lang|{{{langue|}}}}}}}}{{#if:{{{2|}}}| – {{#invoke:date et heure|formate_un_siecle|{{{2}}}}}}})''|}}}}""", 19 | "Modèle:variante du radical de Kangxi": lambda _: "Variante {{{1|}}} du radical de Kangxi [[{{str left|{{radical trait|{{PAGENAME}}}}|1}}]]. Unicode : U+{{point de code|{{PAGENAME}}}}.", 20 | } 21 | -------------------------------------------------------------------------------- /tests/data/no/liksom.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Subjunksjon=== 3 | {{infl|no|subj}} {{norm|nb=ja|nrm=ja|nn=ja}} 4 | 5 | #Antyder at noe er på lek, at man [[late]]r som noe. 6 | #:''Vi fant på masse liksom-ord, og lagde vårt eget språk.'' 7 | #Antyder en sammenligning, brukes ofte som et slags fyllord, særlig i muntlig språk. 8 | #:''Det var liksom veldig ordentlig.'' 9 | #:''Bark er liksom huden til trærne.'' 10 | 11 | ====Andre former==== 12 | *[[likesom]] {{norm|nb=ja|nrm=ja}} 13 | *[[lissom]] {{norm|nb=ja|nrm=ja}} 14 | 15 | ====Oversettelser==== 16 | {{overs-topp|På lek, at man later som noe}} 17 | * {{overs-mangler|da}} 18 | * {{overs|en|make believe}} 19 | * {{overs-mangler|fi}} 20 | * {{overs-mangler|fr}} 21 | * {{overs-mangler|it}} 22 | {{overs-midt}} 23 | * {{overs-mangler|nl}} 24 | * {{overs-mangler|ru}} 25 | * {{overs-mangler|es}} 26 | * {{overs|sv|låtsas}} 27 | * {{overs-mangler|de}} 28 | {{overs-bunn}} 29 | 30 | {{overs-topp|Antyder en sammenligning, at noe ligner på noe}} 31 | * {{overs-mangler|da}} 32 | * {{overs-mangler|en}} 33 | * {{overs-mangler|fi}} 34 | * {{overs-mangler|fr}} 35 | * {{overs-mangler|it}} 36 | {{overs-midt}} 37 | * {{overs-mangler|nl}} 38 | * {{overs-mangler|ru}} 39 | * {{overs-mangler|es}} 40 | *{{overs|sv|liksom}} 41 | * {{overs-mangler|de}} 42 | {{overs-bunn}} 43 | 44 | ===Referanser=== 45 | * {{R:Bokmålsordboka-Nynorskordboka}} 46 | * {{R:NAOB}} 47 | 48 | 49 | ---- 50 | 51 | ==Svensk== 52 | ===Konjunksjon=== 53 | {{infl|sv|konj}} 54 | 55 | #[[#Norsk|liksom]] 56 | -------------------------------------------------------------------------------- /tests/data/fr/corps portant.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|fr}} == 2 | === {{S|étymologie}} === 3 | : Locution {{composé de|lang=fr|corps|portant|f=1}}. 4 | 5 | === {{S|nom|fr}} === 6 | {{fr-rég|kɔʁ pɔʁ.tɑ̃}} 7 | '''corps portant''' {{pron|kɔʁ pɔʁ.tɑ̃|fr}} {{m}} 8 | [[Image:X24.jpg|thumb|upright=1.2|Martin Marietta X-24A construit pour un projet militaire américain expérimental de 1963 à 1975.]] 9 | # {{lexique|astronautique|fr}} [[aéronef|Aéronef]] à fuselage porteur, sur lequel la [[portance]] est produite par le [[fuselage]], destiné aux usages [[spatiaux]] ou [[hypersonique]]s, afin de limiter l'[[effet de traînée]] ou la [[surface de friction]]. 10 | # {{lexique|astronautique|fr}} {{term|aérodynamique}} Engin aérospatial possédant, à vitesse hypersonique, une portance qui lui assure une bonne manœuvrabilité lors de la rentrée atmosphérique. 11 | 12 | ==== {{S|vocabulaire}} ==== 13 | * [[corps émoussé]] 14 | * [[rentrée atmosphérique]] 15 | 16 | ==== {{S|traductions}} ==== 17 | {{trad-début}} 18 | * {{T|en}} : {{trad+|en|lifting body}} 19 | * {{T|hr}} : {{trad-|hr|ojačan uzdužni nosač trupa|tr=(1)}}, {{trad-|hr|spreg sila na trup|tr=(2)}}, {{trad-|hr|uzgonsko aerotijelo|tr=(2)}} 20 | {{trad-fin}} 21 | 22 | === {{S|prononciation}} === 23 | * {{écouter|Somain (France)||lang=fr|audio=LL-Q150 (fra)-Jérémy-Günther-Heinz Jähnick-corps portant.wav}} 24 | 25 | === {{S|voir aussi}} === 26 | * {{WP}} 27 | 28 | === {{S|références}} === 29 | * {{R:FranceTerme}} 30 | 31 | [[Catégorie:Néologismes recommandés en français]] 32 | -------------------------------------------------------------------------------- /tests/data/de/@.wiki: -------------------------------------------------------------------------------- 1 | {{Abschnitte fehlen|Deutsch}} 2 | == @ ({{Sprache|International}}) == 3 | === {{Wortart|Symbol|International}} === 4 | 5 | {{Symbol Übersicht 6 | |Navi-Titel=ASCII punctuation 7 | |Navi-1=? 8 | |Navi-2=A 9 | |Bezeichnung=commercial at 10 | |Block=Basis-Lateinisch 11 | |Nummer=0040 12 | |TeX1= \@ 13 | |TeX-Ref=ja 14 | |HTML-dez=@ 15 | |HTML-hex=@ 16 | |Morse= · − − · − · 17 | |Bild=At sign.svg|mini|1|das At-Zeichen 18 | }} 19 | 20 | {{Bedeutungen}} 21 | :[1] ''[[Informatik]] (seit 1972):'' das [[At]]; [[notwendig]]er [[Bestandteil]] und [[Trennzeichen]] zwischen [[Benutzername]] und [[Domain]]name bei [[E-Mail-Adresse]]n 22 | :[2] ''Informatik:'' das [[At]]; [[Syntax]]-Bestandteil einiger [[Programmiersprache]]n (beispielsweise als Präfix vor [[Array]]-Variablen in der Programmiersprache [[Perl]]) 23 | 24 | {{QS Herkunft|fehlt}} 25 | 26 | {{Synonyme}} 27 | :[1] [[At]], [[At-Symbol]], [[At-Zeichen]], [[at sign]], [[Ad-Zeichen]], [[Ad]], [[Affenschwanz]], [[Affenohr]], [[Affenschaukel]], [[Alef]], [[Astat]], [[Klammeraffe]] 28 | 29 | {{Beispiele}} 30 | :[1] Die E-Mail-Adresse lautet abcde''@''xyz.de. 31 | :[2] my ''@''teile = split m[/], $unixpfad; 32 | 33 | {{Referenzen}} 34 | :[1, 2] {{Wikipedia|At-Zeichen}}, Weiterleitung von [[w:@|@]] 35 | :[1] {{Lit-Duden: Rechtschreibung|V=|A=24}}, Seite 151 36 | :[*] SYMBL: „[https://symbl.cc/de/0040/ @]“ 37 | 38 | {{Quellen}} 39 | {{Absatz}} 40 | {{Ähnlichkeiten}} 41 | :[[§]], [[&]], [[a]], [[e]], [[€]] 42 | -------------------------------------------------------------------------------- /tests/data/ca/ch.wiki: -------------------------------------------------------------------------------- 1 | {{vegeu|CH}} 2 | 3 | == {{-mul-}} == 4 | 5 | === Símbol === 6 | {{entrada|mul|símbol}} 7 | 8 | # Codi de llengua [[w:ISO 639-1|ISO 639-1]] del [[chamorro]]. 9 | 10 | [[Categoria:Codis de llengua]] 11 | 12 | == {{-ca-}} == 13 | 14 | === Lletra === 15 | {{entrada|ca|lletra}} 16 | 17 | # {{marca|ca|arcaic}} Especialment a final de mot, dígraf amb una consonant muda per remarcar la grafia d’una oclusiva velar sorda {{IPAchar|[k]}} i no pas una de sonora {{IPAchar|[ɡ]}}. 18 | 19 | ==== Notes ==== 20 | * Per exemple, ''antic'' se sonoritza en femení ''antiga'', però ''antic/antich abat'' no se sonoritza. 21 | * En la [[:w:Normes ortogràfiques|reforma ortogràfica del 1913]] es va suprimir aquesta ''h'' no etimològica. Prèviament el seu ús ja era discutit i variable segons els autors. 22 | * Es manté fossilitzat en diversos cognoms i durant un temps es va mantenir fossilitzat en castellà per alguns topònims catalans. 23 | * Possiblement originat en la [[w:Gòtica (tipografia)|cal·ligrafia gòtica]] on la lletra ''c'' es podia confondre amb d’altres i en posició final s’hi afegia un ornament distintiu. 24 | 25 | === Vegeu també === 26 | * {{Viquipèdia}} 27 | 28 | == {{-es-}} == 29 | 30 | === Lletra === 31 | {{entrada|es|lletra}} 32 | 33 | # Dígraf ''{{e|es|c}}''+''{{e|es|h}}''. 34 | 35 | ==== Notes ==== 36 | * Abans de l’any 2010 formava part de l’alfabet castellà ordenat separadament després de la ce. 37 | * Representa el so africat postalveolar sord {{IPAchar|[tʃ]}} equivalent al dígraf català ''tx''. 38 | 39 | {{-rel-}} 40 | {{alfabet/es}} 41 | -------------------------------------------------------------------------------- /tests/data/ja/駐.wiki: -------------------------------------------------------------------------------- 1 | {{kanji header|部画=馬:10+5|IVS=00|包摂注記=y}} 2 | {{kanji header|IVS=01|[[旧字体]]|section=no}} 3 | {{kanji variants|驻=[[簡体字]]}} 4 | {{筆順}} 5 | ===字源=== 6 | * [[形声文字|形声]]。「[[馬]]」+音符「[[主]] {{phoneme|*TO}}」。[[漢語]]{駐 {{phoneme|*tros}}}を表す字。 7 | ===意義=== 8 | #(馬や車を)長時間、停める。 9 | #*[[駐車]] 10 | #(別に[[本拠]]とするところがあるが)じっと一箇所に[[いる]]。 11 | #*[[駐在]]、[[駐屯]] 12 | =={{ja}}== 13 | [[Category:{{ja}}|ちゆう ちゅう]] 14 | {{ja-kanji|常用=チュウ|呉音=チュウ|漢音=チュウ|訓=とど-める}} 15 | ====造語成分==== 16 | #[[国外]]に[[派遣]]されて、長期間[[滞在]]しているという意味の語を作る。 17 | #*[[駐米]]、[[駐英]] 18 | ==={{prov}}=== 19 | *[[駐軍]] 20 | *[[駐在]] 21 | *[[駐箚]] 22 | *[[駐車]] 23 | *[[駐屯]] 24 | *[[駐歩]] 25 | *[[駐留]] 26 | *[[駐輦]] 27 | *[[移駐]] 28 | *[[常駐]] 29 | *[[進駐]] 30 | =={{zh}}== 31 | [[Category:{{zh}}|zhu4]] 32 | {{trans_link|zh|{{PAGENAME}}}} 33 | * '''ローマ字表記''' 34 | ** '''[[普通話]]''' 35 | *** '''[[ピンイン]]''': [[zhù]] ([[zhu4]]) 36 | *** '''[[ウェード式]]''': chu4 37 | ** '''[[広東語]]''' 38 | *** '''[[イェール式]]''': jyu3 39 | ==={{prov}}=== 40 | 41 | =={{ko}}== 42 | [[Category:{{ko}}|ㅈㅜ]] 43 | {{ko-hanja|hangeul=[[주]]|eumhun=[[말]] [[머물다|머물]] 주|rv=ju|mr=chu|y=cwu}} 44 | ==={{prov}}=== 45 | 46 | =={{vi}}== 47 | [[Category:{{vi}}|trú]] 48 | {{trans_link|vi|{{PAGENAME}}}} 49 | * '''ローマ字表記''' 50 | ** [[Quốc ngữ]]: [[trú]] 51 | 52 | ==文字情報== 53 | {{character info}} 54 | {{文字コード|jis=1-35-83|mj={{mjmoji|028659|028660}}|gb=F176|cns=1-6F77|big5=BE6E|ksx=1001-7152}} 55 | {{検字|倉頡=SFYG|四角=7031.4}} 56 | {{点字|tenji6=6,1345,1356|tenji8=13568,234578}} 57 | {{字典|dj=1960.270|hdz=74549.090|大字源=11496/11497|大漢語林=13082/13083|康煕=1436.020|新大字典=19446|新潮漢字=14425/14426|諸橋=44660}} 58 | -------------------------------------------------------------------------------- /tests/data/no/én svale gjør ingen sommer.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Ordtak=== 3 | {{no-ordtak}} 4 | [[Kategori:Ordtak med dyr]] 5 | 6 | # Det at noen har vært observert én gang betyr ikke at det er en regel eller et sikkert tegn 7 | 8 | ====Andre former==== 9 | {{andre former|ei svale gjer ingen sumar|nn=ja}} 10 | {{andre former|ei svale gjer ingen sommar|nn=ja}} 11 | 12 | ====Etymologi==== 13 | {{etymologi mangler|språk=no}} 14 | 15 | ====Uttale==== 16 | {{uttale mangler|språk=no}} 17 | {{lydfil mangler|språk=no}} 18 | 19 | ====Oversettelser==== 20 | {{overs-topp|at noe forekommer en enkelt gang er ikke nok til å skape en forandring}} 21 | * {{overs-mangler|da}} 22 | * {{overs|en|one swallow does not a summer make}}, {{o|en|one swallow does not make a summer}} 23 | * {{overs|fi|ei yksi pääsky kesää tee}} 24 | * {{overs|fr|une hirondelle ne fait pas le printemps}} 25 | * {{overs|it|una rondine non fa primavera}} 26 | * {{overs|la|una hirundo non facit ver}} 27 | {{overs-midt}} 28 | * {{overs|nl|één zwaluw maakt nog geen zomer}}, {{o|nl|één zwaluw maakt de lente niet}} 29 | * {{overs|pt|uma andorinha só não faz verão}}, {{o|pt|uma andorinha não faz verão}} 30 | * {{overs|ru|пе́рвая ла́сточка весны́ не де́лает|sc=Cyrl|tr=pérvaja lástočka vesný ne délajet}} 31 | * {{overs|es|una golondrina no hace verano}} 32 | * {{overs|sv|en svala gör ingen sommar}} 33 | * {{overs|de|eine Schwalbe macht noch keinen Sommer}}, {{o|de|einmal ist keinmal}} 34 | {{overs-bunn}} 35 | 36 | {{DEFAULTSORT:en svale gjør ingen sommer}} 37 | 38 | ===Referanser=== 39 | * {{R:Bokmålsordboka-Nynorskordboka}} 40 | * {{R:NAOB}} 41 | -------------------------------------------------------------------------------- /tests/data/no/seg.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | ===Pronomen=== 3 | {{no-pron}} 4 | # [[refleksivt]] pronomen, tredje person entall og flertall 5 | #:{{eksempel|no|Han skyndte '''seg''' til bussen.}} 6 | #:{{eksempel|no|De bestemte '''seg''' for å vente.}} 7 | 8 | ====Etymologi==== 9 | Av {{etyl|non|no}} {{term|sik|||språk=non}}.{{R:Bokmålsordboka-Nynorskordboka}} 10 | 11 | [[Kategori:Refleksive pronomen i norsk]] 12 | 13 | ====Uttale==== 14 | * {{lyd|No-seg.ogg|språk=no}} 15 | 16 | ====Grammatikk==== 17 | Det refleksive pronomenet blir brukt som [[objekt]] istedenfor [[personlig|personlige]] pronomen i tredje person når objektet viser til det samme som [[subjekt|subjektet]] i setningen. 18 | {{Pron-alle-generisk|refleksivt||seg|sin|si|sitt|sine}} 19 | 20 | =====Eksempler===== 21 | * Wergeland ønsket [[seg]] et embete. (Objektet, [[seg]], viser til Wergeland, subjektet.) 22 | * Kongen gav [[ham]] et embete. (Objektet, [[ham]], viser ikke til subjektet som er kongen.) 23 | * [[ho|Ho]] lauga [[seg]]. (Ho lauga seg sjølv.) 24 | * Ho lauga [[henne]]. (Ho lauga ei anna.) 25 | 26 | ====Oversettelser==== 27 | {{overs-topp|Refleksivt pronomen}} 28 | * {{overs|da|sig}} 29 | * {{overs|fr|se}} 30 | * {{overs|pt|si|m|f|p}}, {{o|pt|si|m|f}}, {{o|pt|ele|m}}, {{o|pt|ela|f}}, {{o|pt|eles|m|p}}, {{o|pt|elas|f|p}} 31 | {{overs-midt}} 32 | * {{overs|es|se}} 33 | * {{overs|sv|sig}}, {{o|sv|sej}} 34 | * {{overs|de|sich}} 35 | {{overs-bunn}} 36 | 37 | ===Referanser=== 38 | {{reflist}} 39 | 40 | [[Kategori:100 vanligste ord i norsk]] 41 | 42 | ---- 43 | ==Svensk== 44 | ===Adjektiv=== 45 | {{sv-adj|segare|segast}} 46 | # [[seig]] 47 | -------------------------------------------------------------------------------- /wikidict/lang/ja/variant_handlers.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | import wikitextparser as wtp 4 | 5 | from ... import context, utils 6 | 7 | 8 | def cleanup(form: str) -> str: 9 | return form.removesuffix("(古)") 10 | 11 | 12 | def table_to_forms(word: str, wikitext: str) -> list[str]: 13 | forms: set[str] = set() 14 | tables = wtp.parse(wikitext).get_tables(recursive=True) 15 | 16 | for table in tables[1:]: # skip the information table 17 | data = table.data(span=False) 18 | for line in data[1:]: # skip headers 19 | form = str(line[1]) 20 | 21 | if "")) 23 | else: 24 | forms.add(cleanup(form)) 25 | 26 | forms.discard(word) 27 | return sorted(forms) 28 | 29 | 30 | def render_reverse_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 31 | """ 32 | >>> render_reverse_variant("rev-flexion", ["顧眄せず"], defaultdict(str), "顧眄") 33 | '顧眄せず' 34 | """ 35 | if tpl == "rev-flexion": 36 | return parts[0] 37 | 38 | table = context.expand(utils.reconstruct_tpl(tpl, parts, data), "ja") 39 | return "|".join(table_to_forms(word, table)) 40 | 41 | 42 | handlers = { 43 | "rev-flexion": render_reverse_variant, 44 | } 45 | 46 | 47 | def append_to_reverse_variants(tpl: str) -> None: 48 | """Dynamically append a template to reverse variants templates.""" 49 | if tpl in handlers: 50 | return 51 | handlers[tpl] = render_reverse_variant 52 | -------------------------------------------------------------------------------- /tests/data/da/her.wiki: -------------------------------------------------------------------------------- 1 | {{=da=}} 2 | {{-pronun-}} 3 | *{{IPA|/hɛːˀɒ̯/|lang=da}} 4 | {{-adv-|da}} 5 | {{pn}} 6 | # Stedet hvor vi er nu. Vores placering. 7 | # (''[[radiokommunikation]], [[radiotelefoni]]'') Dette opkalder stammer fra denne opkalder 8 | {{-syn-}} 9 | *her er 10 | {{-trans-}} 11 | {{trans-top|Stedet hvor vi er nu}} 12 | * {{en}}: {{t|en|here}} 13 | * {{fr}}: {{t|fr|ici}} 14 | * {{nl}}: {{t|nl|hier}} 15 | * {{zh}}: {{t|zh|这里}} (zhèlǐ), {{t|zh|这兒}} (zhè+ér=zhèr) 16 | {{trans-mid}} 17 | * {{O|pt|aqui}} 18 | * {{O|es|aquí}} 19 | * {{de}}: {{t|de|hier}} 20 | {{trans-bottom}} 21 | ===Formelt subjekt=== 22 | {{pn}} 23 | # [[bruge]]s som [[upersonlig]]t [[subjekt]], [[referere]]r [[ofte]] [[fremad]] eller [[tilbage]] til et andet [[led]] i [[sætning]]en. 24 | [[Kategori:Formelle subjekter på dansk]] 25 | {{-ref-}} 26 | *{{DDO}} 27 | {{=en=}} 28 | {{Personlige pronominer på engelsk}} 29 | {{-pronun-}} 30 | *{{IPA|/hɜ:ʳ/|lang=en}} 31 | {{-pers-pronom-|en}} 32 | {{pn}} 33 | #[[hende]] 34 | #:''Give '''her''' a gift card.'' 35 | #::''Giv '''hende''' et gavekort.'' 36 | ===Possessivt pronomen (Ejestedord)=== 37 | {{pn}} 38 | #Tredje person feminin ental; [[hendes]]. 39 | #:''I like '''her''' styling.'' 40 | #::''Jeg kan lide '''hendes''' styling.'' 41 | {{-ref-}} 42 | *{{Oxford Dictionaries}} 43 | [[Kategori:Possessive pronominer på engelsk]] 44 | 45 | {{=de=}} 46 | {{-etym-}} 47 | Fra {{etyl|goh|de}} {{term|hera|lang=goh}}.{{DWDS}} 48 | {{-pronun-}} 49 | *{{IPA|/he:ɐ̯/|lang=de}} 50 | {{-adv-|de}} 51 | {{pn}} 52 | #{{stedsbiord|tysk}} derfra til her 53 | #:''Wo kommst du '''her'''?'' 54 | #::''Hvor kommer du fra?'' 55 | {{-ant-}} 56 | *[[hin]] 57 | {{-ref-}} 58 | -------------------------------------------------------------------------------- /tests/data/es/-acho.wiki: -------------------------------------------------------------------------------- 1 | {{desambiguación|Acho|acho|achó}} 2 | 3 | == {{lengua|es}} == 4 | {{pron-graf}} 5 | 6 | === Etimología 1 === 7 | {{etimología|la|-aceus|alt=-acĕus}}.{{DRAE2001}} De allí también ''[[-áceo]]''. 8 | 9 | ==== {{sufijo|es}} ==== 10 | {{es.adj}} 11 | ;1: {{impropia|Forma [[aumentativo]]s, a veces [[despectivo]]s, a partir de [[adjetivo]]s y [[sustantivo]]s}}. 12 | {{uso|a veces se combina con -ar formando ''-aracho'': "vivaracho"|"dicharacho"}} 13 | :::También se combina con ''[[-uelo]]'', formando ''[[-achuelo]]'': [[riachuelo]]. 14 | :*'''Relacionados''': {{l|es|-aco|num=2}}, [[-ajo]], [[-arro]], [[-ejo]], [[-ete]], [[-ico]], [[-ijo]], [[-illo]], [[-ito]], [[-ín]], [[-ino]], [[-iño]], [[-izno]], [[-ón]], [[-ote]], [[-rro]], [[-uco]], [[-ucho]], [[-uelo]], [[-ujo]], [[-ulo]] ''(diminutivos, aumentativos y despectivos)'' 15 | {{ejemplo|''(despectivos)'': [[amigo]] → [[amigacho]] ([[amigote]], [[amiguete]]).}} 16 | {{ejemplo|''(sin despectivo ni aumentativo)'': [[verde]] → [[verdacho]].}} 17 | {{ejemplo|''(meramente aumentativo)'': [[bomba]] → [[bombacho]].}} 18 | {{derivad|aguacha|amigacho|barbicacho|bocacha|cocacho|dicharacho|hilacha|hornacho|libracho|mamarracho|mandracho|picacho|poblacho|riacho|ricacho|tablacho|terminacho|vivaracho|velacho|vulgacho}} 19 | 20 | ==== Véase también ==== 21 | * [[:Categoría:ES:Palabras con el sufijo -acho|Palabras con el sufijo -acho en ''Wikcionario'']]. 22 | * {{Wikipedia|Gram%C3%A1tica_del_espa%C3%B1ol#Sufijos|'''Sufijos del español'''}} 23 | 24 | ==== Traducciones ==== 25 | {{nota traducción afijo|su}} 26 | {{trad-arriba}} 27 | {{trad-abajo}} 28 | 29 | == Referencias y notas == 30 | 31 | -------------------------------------------------------------------------------- /tests/data/no/krokodille.wiki: -------------------------------------------------------------------------------- 1 | {{wikipediaartikkel}} 2 | ==Norsk== 3 | [[Fil:NileCrocodile.jpg|thumb|En Nil-'''krokodille''']] 4 | ===Substantiv=== 5 | {{no-sub|m}} 6 | 7 | # stort reptil, lever i og nær vann. ''([[w:Latin|lat.]] Crocodylia)'' 8 | [[kategori:no:Krypdyr]] 9 | 10 | ====Etymologi==== 11 | :Fra {{etyl|la-med|no}} {{term|cocodrillus||krokodille|språk=la}}, fra {{etyl|grc|no}} {{term|κροκόδειλος||sc=polytonic|tr=krokodeilos|språk=grc}} 12 | 13 | ====Se også==== 14 | * [[alligator]] 15 | * [[kaiman]] 16 | 17 | ====Avledede termer==== 18 | * [[krokodilletåre]]r 19 | 20 | ====Uttale==== 21 | {{uttale mangler|språk=no}} 22 | {{lydfil mangler|språk=no}} 23 | 24 | ====Grammatikk==== 25 | {{no-sub-m1e|krokodill}} 26 | 27 | ====Oversettelser==== 28 | {{overs-topp|reptil}} 29 | * {{overs|ar|تمساح|m|tr=timsaaH}} 30 | * {{overs|da|krokodille|c}} 31 | * {{overs|en|crocodile|c}} 32 | * {{overs|fi|krokotiili}} 33 | * {{overs|fr|crocodile|m}} 34 | * {{overs|ja|鰐|tr=わに, wani}}, {{o|ja|ワニ|tr=wani}} 35 | * {{overs|la|crocodilus|m}} 36 | * {{overs|cmn|鱷魚|sc=Hani}}, {{o|cmn|鳄鱼|tr=èyú|sc=Hani}} 37 | {{overs-midt}} 38 | * {{overs|nl|krokodil}} 39 | * {{overs-mangler|pt}} 40 | * {{overs|ru|крокодил|m|tr=krokodíl}} 41 | * {{overs|es|cocodrilo|m}} 42 | * {{overs|sv|krokodil|c}} 43 | *{{overs|tr|timsah}} 44 | * {{overs|de|Krokodil|n}} 45 | * {{overs|vi|cá sấu|xs=Vietnamese}} 46 | * {{overs|wo|jasig|alt=jasig ji}} 47 | {{overs-bunn}} 48 | 49 | ===Referanser=== 50 | * {{R:Bokmålsordboka-Nynorskordboka}} 51 | * {{R:NAOB}} 52 | 53 | 54 | ---- 55 | 56 | ==Dansk== 57 | ===Substantiv=== 58 | {{wikipedia|språk=da}} 59 | {{da-sub|c}} 60 | 61 | # [[#Norsk|krokodille]] 62 | [[kategori:da:Krypdyr]] 63 | -------------------------------------------------------------------------------- /tests/data/fr/π.wiki: -------------------------------------------------------------------------------- 1 | {{voir autres scripts/π}} 2 | {{voir/π}} 3 | 4 | == {{caractère}} == 5 | {{casse}} 6 | '''π''' 7 | # Lettre {{lien|minuscule|fr}} grecque {{lien|pi|fr}}. Seizième lettre et onzième consonne de l’{{lien|alphabet grec|fr}}. {{lien|Unicode|fr}} : U+03C0. 8 | 9 | === {{S|voir aussi}} === 10 | * {{WP}} 11 | * {{WV|Grammaire/Grec}} 12 | {{alphabet grec|π|}} 13 | 14 | === {{S|références}} === 15 | * {{R:Bloc Unicode}} 16 | 17 | == {{langue|conv}} == 18 | === {{S|symbole|conv}} === 19 | '''π''' 20 | # {{lexique|mathématiques|conv}} Symbole représentant le rapport constant entre la [[circonférence]] d’un [[cercle]] et son [[diamètre]], aussi appelé en français la ''[[constante d’Archimède]]''. 21 | #* {{exemple |lang=conv |pas-trad=1 |'''π''' = 3,1415926… }} 22 | # {{lexique|bases de données|conv}} Symbole de la [[projection]]. 23 | 24 | === {{S|voir aussi}} === 25 | * {{WP|Pi}} 26 | 27 | == {{langue|gaulois}} == 28 | === {{S|lettre|gaulois}} === 29 | '''π''' {{pron-recons|p|gaulois}} 30 | # [[lettre|Lettre]] utilisée dans l'alphabet grec du gaulois. 31 | 32 | === {{S|références}} === 33 | * Les références et attestations sont présentes : 34 | ** dans l'[[Annexe:Grammaire gauloise|annexe sur la grammaire gauloise]] ; 35 | ** dans l'[[Annexe:Ouvrages de référence pour le gaulois|annexe listant les ouvrages de référence en gaulois]]. 36 | [[Catégorie:Lettres en gaulois|p]] 37 | 38 | == {{langue|el}} == 39 | === {{S|lettre|el}} === 40 | '''π''' {{pron|p|el}} 41 | # Seizième [[lettre]] et douzième [[consonne]] de l’[[alphabet grec]] (minuscule). 42 | 43 | == {{langue|grc}} == 44 | === {{S|lettre|grc}} === 45 | '''π''' {{pron-recons|p|grc}} 46 | # Lettre de l’[[alphabet grec ancien]]. 47 | -------------------------------------------------------------------------------- /tests/data/eo/komputilo.wiki: -------------------------------------------------------------------------------- 1 | {{vikipedio}} 2 | 3 | == Esperanto == 4 | === Substantivo === 5 | {{livs|eo|SB|fra=[L:komput(i)]+[I:il]+[U:o]}} 6 | {{Deklinacio-eo}} 7 | {{bildodek|ThinkCentre_S50.jpg}} 8 | {{bildodek|ENIAC-changing_a_tube.jpg|tre granda kaj malnova '''komputilo'''|Raspberry_Pi_4_Model_B_-_Side.jpg|malgranda nova '''komputilo'''}} 9 | 10 | ===={{Signifoj}}==== 11 | # {{k|eo|F: komputado}} [[maŝino]] aŭ [[elektronikaĵo]] kiu kapablas [[kalkuli]], precipe sen intervenoj de homoj, aŭ rapide trakti, stori, kaj preni larĝajn kvantojn de [[datumo]] 12 | 13 | {{Sinonimoj}} 14 | ''(arkaikaj kaj evitendaj)'' [[komputero]], [[komputoro]], [[komputatoro]] 15 | 16 | ===={{Tradukoj}}==== 17 | {{trad-eko}} 18 | * angla: {{t|en|computer}} 19 | * finna: {{t|fi|tietokone}} 20 | * franca: '''1., 2.''' {{t|fr|calculatrice}} {{g|f}}, {{t|fr|ordinateur}} {{g|m}} 21 | * germana: '''1., 2.''' {{t|de|Computer}} {{g|m}}, {{t|de|Rechner}} {{g|m}} 22 | * hispana: '''1., 2.''' {{t|es|ordenador}} {{g|m}}, {{t|es|computadora}} {{g|f}}, {{t|es|computador|m}} 23 | * indonezia: {{t|id|komputer}} 24 | * itala: {{t|it|computer}}, {{t|it|computatore|not=rara}}, {{t|it|computiere|not=rara}}, {{t|it|calcolatore|not=rara}}, {{t|it|elaboratore|not=rara}}, {{t|it|ordinatore|not=Svislando, rara}} 25 | * kroata: {{t|hr|računalo}}, {{t|hr|kompjuter}} 26 | {{trad-mezo}} 27 | * nederlanda: {{t|nl|computer|m}} 28 | * pola: '''1., 2.''' {{t|pl|komputer|m}} 29 | * portugala: '''1., 2.''' {{t|pt|computador|m}} 30 | * rusa: {{t|ru|компьютер|m}}, {{t|ru|ЭВМ|f}} 31 | * sveda: {{t|sv|dator|u}} 32 | {{trad-fino}} 33 | 34 | {{Referencoj}} 35 | * {{ref-PIV}} 36 | * {{ref-Simpla Vortaro|{{PAGENAME}}}} 37 | * {{ref-Majstro|{{PAGENAME}}}} 38 | * {{ref-Tato|eo}} 39 | -------------------------------------------------------------------------------- /tests/data/fr/encyclopædie.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|fr}} == 2 | === {{S|étymologie}} === 3 | : {{cf|lang=fr|encyclopédie}} 4 | 5 | === {{S|nom|fr}} === 6 | {{fr-rég|ɑ̃.si.klɔ.pe.di}} 7 | '''encyclopædie''' {{pron|ɑ̃.si.klɔ.pe.di|fr}} {{f}} 8 | 9 | # {{archaïsme|fr}} {{variante ortho de|encyclopédie}}. 10 | #* {{exemple | lang=fr 11 | | Oint qu’on ne peut rien ſçavoir ſolidement ſans ſçavoir vn peu de tout , qui eſt cette '''encyclopædie''' : ne plus ni moins qu’on ne peut ſçavoir vne charte particuliere ſans avoir connoiſſance de la generale , & meſmes les païs voiſins. 12 | | source=auteur incertain, ''Premiere Centvrie des Qvestions Traitees ez Conferences'', 1638}} 13 | #* {{exemple | lang=fr 14 | | Ainſi , la Logique leur rend le reciproque par vne correſpondance mutuelle, comme l’on pourra encore mieux remarquer dans l’obſeruation generale de noſtre '''Encyclopædie'''. 15 | | source=auteur incertain, ''La Science vniverselle de Sorel'', 1647}} 16 | #* {{exemple | lang=fr 17 | | Nous ne doutons point qu’il n’y en ayt eu aſſez qui ont ſçeu qu’il faloit tenir vn compte exact de toutes les Diſciplines , afin que les Hommes viſſe nt en peu de temps quelles pouuoient eſtre les richeſſes de leur Eſpirit, & qui pour y donner plus de facilité , ont taſché de reduire tant les Sciences que les Arts dans leurs dependances & leurs limites, mais ils n’ont pas tous reuſſi à trouuer leurs correſpondances & leurs iuſteſſes : Voyons quels ſont ceux qui ayans donné vne eſtenduẽ generalle à leur ouurage , ont trouué la vraye forme d’vne '''Encyclopædie'''. 18 | | source=[http://fr.wikipedia.org/wiki/Charles_Sorel Charles Sorel], ''De la Perfection de l’Homme'', 1655}} 19 | 20 | [[Catégorie:æ en français|encyclopaedie]] 21 | 22 | {{clé de tri|encyclopaedie}} 23 | -------------------------------------------------------------------------------- /tests/data/ja/併.wiki: -------------------------------------------------------------------------------- 1 | {{kanji header|部画=人:2+6}} 2 | {{kanji variants|倂=[[康煕字典体]]/[[旧字体]]|并=[[簡体字]]}} 3 | ===字源=== 4 | * [[形声文字|形声]]。「[[人]]」+音符「[[幷]] {{phoneme|*PENG}}」。「[[ならぶ]]」「[[あわさる]]」を意味する漢語{[[併]] {{phoneme|*peng}}}を表す字。もと「幷」が{併}を表す字であったが、人偏を加えた。 5 | {{字源}} 6 | ===意義=== 7 | #(『[[説文解字]]』では「[[幷]]・[[并]]」)[[あわす]]。[[あわさる]]。[[あわせる]]。 8 | #*[[合併]]、[[併合]] 9 | #(『[[説文解字]]』では「[[倂]]・併」)[[ならぶ]]。[[ならべる]]。「[[並]]」とも書く。 10 | =={{L|ja}}== 11 | [[Category:{{ja}}|へい]] 12 | {{ja-kanji|常用=ヘイ,あわ-せる|呉音=ヒョウ<ヒャゥ|漢音=ヘイ<ヘィ|訓=あわ-せる}} 13 | ===={{prov}}==== 14 | *[[併起]] 15 | *[[併行]] 16 | *[[併合]] 17 | *[[併設]] 18 | *[[併吞]] 19 | *[[併発]] 20 | *[[併用]] 21 | *[[合併]] 22 | =={{L|zh}}== 23 | {{zh-cat|bing4|動詞}} 24 | {{trans_link|zh|{{PAGENAME}}}} 簡体字:「并」 25 | * '''ローマ字表記''' 26 | ** '''[[普通話]]''' 27 | *** '''[[ピンイン]]''': [[bìng]] ([[bing4]]) 28 | *** '''[[ウェード式]]''': ping4 29 | *** '''[[注音符号]]''': ㄅㄧㄥˋ 30 | * '''ローマ字表記''' 31 | ** '''[[広東語]]''' 32 | *** '''[[イェール式]]''': bing3 33 | ** '''[[閩南語]]''' 34 | *** '''[[POJ]]''': pèng 35 | ** '''[[呉語]]''' 36 | *** '''[[ピンイン]]''': pin3 37 | ** '''[[中古音]]''': pjiengX, bengX, pjiengH 38 | ** '''[[上古音]]''': 39 | *** '''鄭張''': *breːŋʔ, *breːŋs, *peŋʔ, *peŋs, *beːŋʔ 40 | ==={{verb}}=== 41 | #[[合併]]する。 42 | ===={{prov}}==== 43 | 44 | =={{L|ko}}== 45 | [[Category:{{ko}}|병]] 46 | {{ko-hanja|hangeul=[[병]]|rv=byeong|mr=pyŏng}} 47 | ===={{prov}}==== 48 | 49 | =={{L|vi}}== 50 | {{vi-han|tính|tinh|pos=verb}} 51 | ==={{verb}}=== 52 | tính 53 | #[[計算]]する。 54 | #[[熟考]]する、[[考慮]]する。 55 | #〜する[[つもり]]である。 56 | 57 | ==文字情報== 58 | {{character info}} 59 | {{文字コード|jis=1-42-27|mj={{mjmoji|006672}}|gb=81E3|cns=1-4B77|big5=A8D6|ksx=1027-1-2227}} 60 | {{検字|倉頡=OTT|四角=2824.1}} 61 | {{点字|tenji6=46,12346,1|tenji8=127,34567}} 62 | {{字典|dj=0212.170|hdz=10153.030|大字源=258|大漢語林=335|康煕=0100.220|新大字典=437|新潮漢字=347|諸橋=561}} 63 | -------------------------------------------------------------------------------- /tests/data/eo/luko.wiki: -------------------------------------------------------------------------------- 1 | =={{Lingvo|eo}}== 2 | {{oficialeco|8}} 3 | ==={{Vortospeco|substantivo|eo}}=== 4 | 5 | {{Deklinacio-eo}} 6 | 7 | {{Deveno}} 8 | :el la germana ''[[Luke]]'' 9 | 10 | {{Vorterseparo}} 11 | :luk/o. 12 | 13 | ===={{Signifoj}}==== 14 | :Aperturo: 15 | :[1] ordinare vitrita aŭ kradita, en tegmento, plafono aŭ kelo, por enlasi lumon: ''mansarda luko''. 16 | :[2] fermebla per pordo aŭ tabuloj, en la ferdeko de ŝipo, por ebligi penetron en la holdon (pli precize: holdluko). 17 | :[3] fermita per kovrilo el giso, kiu en la strato, sur trotuaro ks ebligas al metiisto malsupreniri en kloakon, aŭ subteran galerion. 18 | 19 | {{Sinonimoj}} 20 | :[1] lumluko, bovokulo, vazistaso. 21 | 22 | {{Derivaĵoj}} 23 | :[1] 24 | 25 | {{Derivaĵoj}} 26 | :[[luketo]]. Malgranda luko en pordo tra kiu oni povas ekvidi la personon, kiu sonoris. 27 | :[[lumluko]]. luko 1. 28 | 29 | ===={{Tradukoj}}==== 30 | {{trad-eko}} 31 | *angla: [1] {{t|en|bull's eye}}, {{t|en|porthole}}, port hole 32 | *franca: [1] {{t|fr|lucarne}} {{g|f}}, {{t|fr|sabord}} {{g|m}} 33 | *germana: [1] {{t|de|Bullauge}} {{g|n}}, {{t|de|Dachfenster}}, {{t|de|Deckenfenster}}, {{t|de|Mansardenfenster}}, {{t|de|Luke}}, {{t|de|Dachluke}} 34 | *malaltgermana: [1] {{t|nds|Bulloog}} 35 | *pola: [1] {{t|pl|bulaj}} {{g|m}}, {{t|pl|iluminator}} {{g|m}}, {{t|pl|świetlik}} {{g|m}} 36 | {{trad-mezo}} 37 | *portugala: [1] {{t|pt|vigia}} {{g|f}} 38 | *hispana: [1] {{t|es|ojo de buey}}, {{t|es|escudilla}} 39 | *sveda: [1] {{t|sv|ventil}}, {{t|sv|oxöga}} (veraltet), {{t|sv|fönsterventil}} {{g|u}} 40 | 41 | {{trad-fino}} 42 | 43 | {{Referencoj}} 44 | {{vikipedio|luko}} 45 | * {{ref-ReVo|luko}} 46 | * {{ref-PIV}} 47 | * {{ref-Tato|eo}} 48 | * {{ref-Simpla Vortaro|luko}} 49 | 50 | {{Fontoj}} 51 | 52 | {{Similaĵoj}} 53 | : 54 | 55 | [[Kategorio:Dubinda (Esperanto)]] 56 | -------------------------------------------------------------------------------- /tests/data/ca/-itzar.wiki: -------------------------------------------------------------------------------- 1 | == {{-ca-}} == 2 | {{ca-pron|alg=izar}} 3 | {{etim-lang|la|ca|-izare}}, {{del-lang|grc|ca|-ίζειν}}. 4 | 5 | === Sufix === 6 | {{entrada|ca|sufix}} 7 | 8 | # {{def-meta|Aplicat a un [[substantiu]] o [[adjectiu]] forma un [[verb]] que expressa la seva realització o convertir-se'n.}} 9 | #: ''[[sexual]]'' + ''-itzar'' → ''[[sexualitzar]]'', ''[[gallec]]'' + ''-itzar'' → ''[[galleguitzar]]'' 10 | 11 | {{-notes-}} 12 | * En grafia medieval i moderna arcaica s'escrivia predominantment ''[[-isar]]'': realisar, actualisar... La forma ''-itzar'' era un cultisme usat especialment en paraules gregues religioses. 13 | * Actualment és un sufix productiu en tecnicismes. S'aplica també a algun nom propi ([[pasteuritzar]]) o acrònim ([[uperitzar]]). 14 | * La pronúncia tradicional era fricativa {{IPAchar|[z]}} i la culta africada {{IPAchar|[d͡z]}}. 15 | ** En català central i nord-occidental s’ha generalitzat la pronúncia culta considerant la tradicional com a arcaica o relaxada. 16 | ** En balear predomina la pronúncia tradicional, però en registres formals s’usa la culta. 17 | ** En valencià s’accepta la pronúncia tradicional en registres formals. 18 | * És una hipercorrecció aplicar-lo a verbs formats per una arrel ''-is-'' que donen la terminació ''-isar'' ([[matisar]]). 19 | 20 | {{-der-}} 21 | {{vegeu-der-afix|ca}} 22 | 23 | {{-trad-}} 24 | {{t-inici}} 25 | * {{en}}: {{trad|en|-ize}}, {{trad|en|-yze}}, {{trad|en|-ise}}, {{trad|en|-yse}} 26 | * {{es}}: {{trad|es|-izar}} 27 | * {{eo}}: {{trad|eo|-igi}} 28 | * {{fr}}: {{trad|fr|-iser}}, {{trad|fr|-yser}} 29 | * {{it}}: {{trad|it|-izzare}} 30 | {{t-final}} 31 | 32 | === Vegeu també === 33 | * {{ca-dicc|dnv|gdlc}} 34 | * [https://web.archive.org/web/20091027094446/http://www.geocities.com/golls.geo/Fitxes/Textos/Arxius/itzar-ld9.htm Fitxa de la Coordinadora de Dinamització Lingüística del País Valencià]. 35 | -------------------------------------------------------------------------------- /tests/test_2_utils.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | import responses 5 | 6 | from wikidict import constants, utils 7 | 8 | 9 | @responses.activate 10 | def test_formula_to_svg(caplog: pytest.LogCaptureFixture) -> None: 11 | formula_hash = "1b3c657d9bf9ae776f50d0b36ae0b1041abfe45d" 12 | 13 | responses.add( 14 | responses.POST, 15 | constants.WIKIMEDIA_URL_MATH_CHECK.format(type="chem"), 16 | headers={"x-resource-location": formula_hash}, 17 | json={ 18 | "success": True, 19 | "checked": "{\\ce {C10H14N2O4}}", 20 | "requiredPackages": ["mhchem"], 21 | "identifiers": [], 22 | "endsWithDot": False, 23 | }, 24 | ) 25 | responses.add( 26 | responses.GET, 27 | constants.WIKIMEDIA_URL_MATH_RENDER.format(format="svg", hash=formula_hash), 28 | body="", 29 | ) 30 | 31 | assert utils.formula_to_svg("C10H14N2O4", cat="chem").startswith(" None: 39 | responses.add(responses.POST, constants.WIKIMEDIA_URL_MATH_CHECK.format(type="chem"), status=404) 40 | utils.convert_chem("bad formula", "word") 41 | assert caplog.records[0].getMessage() == " ERROR with 'bad formula' in [word]" 42 | 43 | 44 | @responses.activate 45 | def test_convert_math_error(caplog: pytest.LogCaptureFixture) -> None: 46 | responses.add(responses.POST, constants.WIKIMEDIA_URL_MATH_CHECK.format(type="math"), status=404) 47 | utils.convert_math("bad formula", "word") 48 | assert caplog.records[0].getMessage() == " ERROR with 'bad formula' in [word]" 49 | -------------------------------------------------------------------------------- /tests/data/es/buque_mercante.wiki: -------------------------------------------------------------------------------- 1 | {{desambiguación|}} 2 | == {{lengua|es}} == 3 | {{pron-graf}} 4 | 5 | === Etimología === 6 | {{etimología}}. 7 | 8 | ==== {{locución|es|sustantivo|masculino}} ==== 9 | {{es.sust|cop=s}} 10 | 11 | 12 | ;1 {{csem|náutica|comercio}}: {{plm|buque}} que pertenece a persona o empresa [[particular]], y que se emplea en la conducción de pasajeros y mercancías.{{DLE1925||buque}} 13 | {{hipónimo|petrolero|crucero|portacontenedores|gasero|granelero}} 14 | 15 | ==== Véase también ==== 16 | {{w}} 17 | 18 | ==== Traducciones ==== 19 | {{trad-arriba}} 20 | 21 | {{trad-abajo}} 22 | 23 | 24 | == Referencias y notas == 25 | 26 | -------------------------------------------------------------------------------- /tests/data/es/los.wiki: -------------------------------------------------------------------------------- 1 | == {{lengua|es}} == 2 | {{pron-graf}} 3 | 4 | === Etimología 1 === 5 | {{etimología|la|illos|diacrítico=illōs|sig=no}}, acusativo masculino plural {{etim|la|ille}}. 6 | 7 | ==== {{artículo determinado|es}} ==== 8 | {{es.adj|p}} 9 | ;1: {{plm|artículo determinado}} masculino plural. El singular es [[lo]]. 10 | {{uso|generalmente cuando el sustantivo se toma por conocido entre el emisor y receptor}} 11 | {{ejemplo|En la oración "recibí ''los'' mensajes", ''los'' denota "mensajes" ya definidos de cuya existencia sabía el hablante con anterioridad.}} 12 | {{ejemplo|En la oración "''los'' seres humanos son así", ''los'' denota que "seres humanos" se entiende en su sentido genérico.}} 13 | 14 | ==== {{pronombre personal|es}} ==== 15 | {{inflect.es.pron.pers.3}} 16 | ;2: ''{{plm|pronombre}} personal masculino de [[objeto directo]] ([[acusativo]]), tercera persona del plural.''. 17 | {{uso|puede emplearse como enclítico|pero no se usa como objeto de preposición|se usa la forma masculina para referirse|cualquier grupo del que al menos un miembro es de género masculino}} 18 | {{ejemplo|¿Dónde están tus sombreros? Estuve buscándo''los'', pero no ''los'' encontré.}} 19 | 20 | ==== Véase también ==== 21 | * [[artículo]] 22 | * [[artículo determinado]] o [[artículo definido]] 23 | * [[pronombre]] 24 | * [[Wikcionario:Categorías gramaticales]] 25 | 26 | ==== Traducciones ==== 27 | {{trad-arriba}} 28 | {{t|fr|a1=1|t1=les}} 29 | {{t|en|a1=1|t1=the|a2=2|t2=them}} 30 | {{t|pt|a1=1,2|t1=os}} 31 | {{trad-abajo}} 32 | 33 | == {{lengua|lfn}} == 34 | {{swadesh|lfn}} 35 | {{pron-graf|leng=lfn}} 36 | 37 | === Etimología === 38 | {{etimología|leng=lfn}}. 39 | 40 | === {{pronombre personal|lfn}} === 41 | ;1: {{plm|ellos}}. 42 | ;2: {{plm|ellas}}. 43 | ;3: {{plm|les}}. 44 | ;4: {{plm|las}}. 45 | ;5: {{plm|los}}. 46 | 47 | === {{adjetivo posesivo|lfn}} === 48 | ;6: {{plm|sus}}. 49 | 50 | == Referencias y notas == 51 | 52 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "wikidict" 7 | dynamic = ["version"] 8 | 9 | [tool.hatch.version] 10 | path = "wikidict/__init__.py" 11 | 12 | [tool.hatch.build.targets.sdist] 13 | only-include = [ 14 | "wikidict", 15 | ] 16 | 17 | [tool.hatch.build.targets.wheel] 18 | packages = [ 19 | "wikidict", 20 | ] 21 | 22 | [tool.coverage.report] 23 | exclude_also = [ 24 | "except Exception:", 25 | "except KeyboardInterrupt:", 26 | 'if "CI" in os.environ:', 27 | "if DEBUG_", 28 | 'if __name__ == "__main__":', 29 | "if TYPE_CHECKING:", 30 | ] 31 | 32 | [tool.mypy] 33 | # Ensure we know what we do 34 | warn_redundant_casts = true 35 | warn_unused_ignores = true 36 | warn_unused_configs = true 37 | 38 | # Imports management 39 | ignore_missing_imports = true 40 | follow_imports = "normal" 41 | 42 | # Ensure full coverage 43 | disallow_untyped_defs = true 44 | disallow_incomplete_defs = true 45 | disallow_untyped_calls = true 46 | 47 | # Restrict dynamic typing (a little) 48 | # e.g. `x: List[Any]` or x: List` 49 | disallow_any_generics = true 50 | 51 | # From functions not declared to return Any 52 | warn_return_any = true 53 | 54 | [tool.pytest.ini_options] 55 | addopts = """ 56 | --strict-markers 57 | -vvv 58 | """ 59 | markers = """ 60 | webtest: an internet connection is required for that test. 61 | """ 62 | 63 | [tool.ruff] 64 | exclude = [ 65 | "data", 66 | "docs", 67 | ".git", 68 | ".github", 69 | ".mypy_cache", 70 | ".pytest_cache", 71 | ".ruff_cache", 72 | "venv", 73 | ] 74 | line-length = 120 75 | indent-width = 4 76 | target-version = "py313" 77 | 78 | [tool.ruff.lint] 79 | fixable = ["ALL"] 80 | extend-select = ["F", "I", "U"] 81 | 82 | [tool.ruff.format] 83 | quote-style = "double" 84 | indent-style = "space" 85 | skip-magic-trailing-comma = false 86 | line-ending = "auto" 87 | -------------------------------------------------------------------------------- /wikidict/lang/sv/__init__.py: -------------------------------------------------------------------------------- 1 | """Swedish language.""" 2 | 3 | import re 4 | 5 | from ... import utils 6 | from .variant_handlers import handlers as variant_handlers # noqa: F401 7 | 8 | random_word_url = "https://sv.wiktionary.org/wiki/Special:RandomRootpage" 9 | 10 | module_trans = "Modul" 11 | template_trans = "Mall" 12 | 13 | float_separator = "," 14 | thousands_separator = " " 15 | 16 | # https://sv.wiktionary.org/wiki/Wiktionary:Stilguide#Ordklassrubriken 17 | head_sections = ("svenska",) 18 | sections = ( 19 | "adjektiv", 20 | "adverb", 21 | "affix", 22 | "artikel", 23 | "efterled", 24 | "förkortning", 25 | "förled", 26 | "interjektion", 27 | "konjunktion", 28 | "possessivt pronomen", 29 | "postposition", 30 | "prefix", 31 | "preposition", 32 | "pronomen", 33 | "substantiv", 34 | "suffix", 35 | "verb", 36 | "verbpartikel", 37 | ) 38 | 39 | variant_titles = ( 40 | "adjektiv", 41 | "adverb", 42 | "substantiv", 43 | "verb", 44 | ) 45 | variant_templates = ( 46 | "{{avledning", 47 | "{{böjning", 48 | ) 49 | 50 | templates_ignored = ( 51 | "{{?", 52 | "{{citat", 53 | "{{inget uppslag", # nospread 54 | "{{fakta", # facts 55 | "{{källa-so", # missing source 56 | "{{konstr", # incomplete construction 57 | ) 58 | 59 | 60 | def find_pronunciations(code: str, locale: str) -> list[str]: 61 | """ 62 | >>> find_pronunciations("", "sv") 63 | [] 64 | >>> find_pronunciations("{{uttal|sv|ipa=eːn/, /ɛn/, /en}}", "sv") 65 | ['/eːn/, /ɛn/, /en/'] 66 | >>> find_pronunciations("{{uttal|sv|ipa=en|uttalslänk=-|tagg=vissa dialekter}}", "sv") 67 | ['/en/'] 68 | >>> find_pronunciations("{{uttal|sv|ipa=ɛn|uttalslänk=-}}", "sv") 69 | ['/ɛn/'] 70 | """ 71 | pattern = re.compile(rf"\{{uttal\|{locale}\|(?:[^\|]+\|)?ipa=([^}}|]+)}}?\|?") 72 | return [f"/{p}/" for p in utils.unique(pattern.findall(code))] 73 | -------------------------------------------------------------------------------- /tests/data/it/lettore.wiki: -------------------------------------------------------------------------------- 1 | == {{-it-}} == 2 | {{-sost-|it}} 3 | {{Pn}} ''m sing'' 4 | {{Tabs|lettore|lettori|lettrice|lettrici}} 5 | #[[chi]] legge un [[libro]], un [[giornale]] o una [[rivista]] 6 | # {{Term|religione|it}} persona che in alcune chiese cristiane, come la Chiesa cattolica, la Chiesa anglicana e quella ortodossa, è incaricata di proclamare la parola di Dio e altri testi nelle celebrazioni liturgiche e di esercitare altri compiti in campo pastorale 7 | # {{Term|elettronica|it}} {{Term|informatica|it}} {{Term|tecnologia|it}} {{Term|ingegneria|it}} dispositivo elettronico che decodifica e riceve informazioni da un supporto 8 | 9 | {{-sill-}} 10 | ; let | tó | re 11 | 12 | {{-pron-}} 13 | {{IPA|/letˈtore/}} 14 | 15 | {{-etim-}} 16 | dal [[latino]] ''[[lector]]'', derivazione di ''[[legĕre]]'' ossia "[[leggere]]" 17 | 18 | {{-quote-}} 19 | {{Quote 20 | |Ogni lettore, quando legge, è il lettore di se stesso. L'opera dello scrittore è solo una specie di strumento ottico offerto al lettore per consentirgli di discernere ciò che forse, senza quel libro, non avrebbe potuto intravedere in se stesso 21 | |[[q:Marcel Proust|Marcel Proust]]}} 22 | 23 | {{-sin-}} 24 | * [[riproduttore]] 25 | * ''(in informatica)'' [[decodificatore]], [[interprete]] 26 | 27 | {{-der-}} 28 | *[[acchiappalettore]], [[labiolettore]], [[lettorato]], [[lettore CD]], [[lettore DVD]], [[lettore MP3]], [[lettore MP4]], [[lettore multimediale]] 29 | 30 | {{-rel-}} 31 | * [[leggere]], [[lettura]] 32 | 33 | {{-trad-}} 34 | {{Trad1|persona che legge}} 35 | :* {{en}} [[reader]] 36 | {{Trad2}} 37 | 38 | {{Trad1|dispositivo}} 39 | :* {{en}} [[player]], [[reader]], [[scanner]] 40 | {{Trad2}} 41 | 42 | {{Trad1|lettore universitario}} 43 | :* {{en}}: [[lecturer]], [[reader]], [[lector]] 44 | {{Trad2}} 45 | 46 | {{Trad1|informatica}} 47 | :* {{en}}: [[driver]] 48 | {{Trad2}} 49 | 50 | {{-ref-}} 51 | * {{Fonte|trec}} 52 | * {{Fonte|gar}} 53 | * {{Fonte|dizit}} 54 | * {{Fonte|dem}} 55 | -------------------------------------------------------------------------------- /tests/data/fr/mutiner.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|fr}} == 2 | === {{S|étymologie}} === 3 | : {{dénominal|de=mutin|lang=fr|m=1}}. 4 | 5 | === {{S|verbe|fr}} === 6 | '''mutiner''' {{pron|my.ti.ne|fr}} {{prnl|nocat}} {{conj|grp=1|fr}} {{lien pronominal||fr|exclusivement=oui}} 7 | # Se [[porter]] à la [[sédition]], à la [[révolte]]. 8 | #* {{exemple | lang=fr | Des troupes '''mutinées'''.}} 9 | #* {{absolument|fr}} ''Le peuple se '''mutinait'''.'' 10 | #* {{exemple | lang=fr | Cet ordre rigoureux fit '''mutiner''' le peuple.}} 11 | # [[enfant|Enfant]] qui se [[dépiter|dépite]] et [[manque]] à l’[[obéissance]]. 12 | #* {{exemple | lang=fr | Cet enfant se '''mutine''' à chaque instant.}} 13 | # {{poétique|fr}} … 14 | #* ''Les flots, les vents '''mutinés''','' Les flots agités, les vents impétueux. 15 | 16 | ==== {{S|apparentés}} ==== 17 | * [[mutinerie]] 18 | 19 | ==== {{S|vocabulaire}} ==== 20 | * [[désobéir]] 21 | * [[se rebeller]] 22 | 23 | ==== {{S|traductions}} ==== 24 | {{trad-début|Se porter à la sédition, à la révolte|1}} 25 | * {{T|de}} : {{trad+|de|meutern}} 26 | * {{T|en}} : {{trad+|en|mutiny}} 27 | * {{T|ca}} : {{trad+|ca|amotinar}}, {{trad+|ca|avalotar}} 28 | * {{T|es}} : {{trad+|es|amotinar}} 29 | * {{T|nl}} : {{trad+|nl|muiten}} 30 | {{trad-fin}} 31 | 32 | {{trad-début|Enfant qui se dépite et manque à l’obéissance|2}} 33 | * {{T|de}} : {{trad+|de|rebellieren}} 34 | * {{T|en}} : {{trad|en|rebel}} 35 | * {{T|nl}} : {{trad+|nl|rebelleren}} 36 | {{trad-fin}} 37 | 38 | {{trad-début|{{poétique|nocat=1}} Les flots, les vents mutinés|3}} 39 | * {{T|de}} : {{trad+|de|tosen}} 40 | * {{T|nl}} : {{trad+|nl|beuken}} 41 | {{trad-fin}} 42 | 43 | === {{S|prononciation}} === 44 | * {{écouter|lang=fr|France (Lyon)||audio=LL-Q150 (fra)-Lyokoï-mutiner.wav}} 45 | * {{écouter|Somain (France)||lang=fr|audio=LL-Q150 (fra)-Jérémy-Günther-Heinz Jähnick-mutiner.wav}} 46 | 47 | === {{S|anagrammes}} === 48 | {{voir anagrammes|fr}} 49 | 50 | === {{S|références}} === 51 | *{{Import:DAF8}} 52 | -------------------------------------------------------------------------------- /tests/data/no/aberrasjon.wiki: -------------------------------------------------------------------------------- 1 | ==Norsk== 2 | {{wikipediaartikkel}} 3 | ===Substantiv=== 4 | {{no-sub|m}} 5 | 6 | #avvik, avvikelse 7 | #{{astronomi|no}} avvik i en stjernes avbildede posisjon relativ til dens sanne posisjon. 8 | #{{optikk|no}} avbildningsfeil i [[linse]]r og [[speil]]. 9 | #{{biologi|no}} endring i et kromosom mens celledeling pågår. 10 | 11 | ====Etymologi==== 12 | * Fra {{etyl|la|no}} {{term|aberratio|aberrātiō|lindring, avvikelse|lang=la}} [http://dictionary.reference.com/browse/aberration Aberration hos Dictionary.com], fra {{term|aberro|aberrō|gå unna/bort, gå vill|lang=la}}, fra {{term|ab||bort|lang=la}} + {{term|erro|errō|vandre/gå|lang=la}}{{reference-book | last = Dobbie | first = Elliott K. | coauthors = Dunmore, C. William, et al. | editor = Barnhart, Robert K.| title = Chambers Dictionary of Etymology | origyear = 1998 | year = 2004 | publisher = Chambers Harrap Publishers Ltd | location = Edinburgh, Scotland | isbn =0550142304 | pages = 2}}. 13 | * Se [[aberrate]]. 14 | 15 | ====Uttale==== 16 | {{lyd|LL-Q9043 (nor)-Teodor605-aberrasjon.wav|språk=no|Lyd (Oslouttale)}} 17 | {{uttale mangler|språk=no}} 18 | 19 | ====Grammatikk==== 20 | {{no-sub-m1|aberrasjon}} 21 | {{ordbank|OK}} 22 | 23 | ====Oversettelser==== 24 | {{overs-topp|avvik}} 25 | * {{overs|da|aberration|c}} 26 | * {{overs|en|aberration|c}} 27 | * {{overs|fi|poikkeama}} 28 | * {{overs|fr|aberration|f}} 29 | * {{overs|it|aberrazione|f}} 30 | {{overs-midt}} 31 | * {{overs|pt|aberração|f}} 32 | * {{overs-mangler|ru}} 33 | * {{overs-mangler|es}} 34 | * {{overs|sv|aberration|c}} 35 | * {{overs|de|Aberration|f}} 36 | {{overs-bunn}} 37 | {{overs-topp|astronomi}} 38 | *{{overs|fi|aberraatio}} 39 | * {{overs|fr|aberration|f}} 40 | {{overs-midt}} 41 | {{overs-bunn}} 42 | {{overs-topp|optikk}} 43 | * {{overs|fr|aberration|f}} 44 | {{overs-midt}} 45 | * {{overs|sv|aberration|c}} 46 | {{overs-bunn}} 47 | 48 | ===Referanser=== 49 | 50 | * {{R:Bokmålsordboka-Nynorskordboka}} 51 | * {{R:NAOB}} 52 | -------------------------------------------------------------------------------- /tests/data/ro/Lama.wiki: -------------------------------------------------------------------------------- 1 | {{vezi|lama|láma|lǎma|lamă}} 2 | =={{limba|conv}}== 3 | {{-nume taxonomic-|conv}} 4 | '''''Lama''''' 5 | #(''zool.'') [[gen]] de [[animal]]e din [[familie|familia]] ''[[Camelidae]]''; (''spec.'') [[lamă]], [[guanaco]] 6 | {{-hipo-}} 7 | * ''[[w:Lama glama|Lama glama]]'' 8 | * ''[[w:Lama guanicoe|Lama guanicoe]]'' 9 | 10 | {{alta}} 11 | 12 | =={{limba|deu}}== 13 | {{cuv|de}} 14 | {{-etimologie-}} 15 | Din spaniolă ''[[llama]]'' < limba quechua ''[[llama]]''. 16 | 17 | Pentru al doilea înțeles al cuvântului, probabil sub influența adjectivului ''[[lahm]]''. 18 | {{-pronunție-}} 19 | * {{AFI}}: {{AFI|/ˈlaːmaː/}} 20 | {{-substantiv-|deu}} 21 | {{substantiv-deu 22 | |gen={{n}} 23 | |nom-sg=das Lama 24 | |nom-pl=die Lamas 25 | |akk-sg=das Lama 26 | |akk-pl=die Lamas 27 | |dat-sg=dem Lama 28 | |dat-pl=den Lamas 29 | |gen-sg=des Lamas 30 | |gen-pl=der Lamas 31 | }} 32 | #(''zool.'') [[lamă]] 33 | #:''Es gibt zwei bis drei Arten von '''Lamas'''.'' 34 | #(''fam.'') ([[om]]) [[stângaci]], [[neîndemânatic]] 35 | #(''fam.'') [[persoană]] care [[scuipa|scuipă]] [[când]] [[vorbi|vorbește]] 36 | {{-sin-}} 37 | *'''1:''' (zool.) [[Neuweltkamel]] 38 | {{-apr-}} 39 | * [[Kamel]] 40 | ===Vezi și=== 41 | * [[Alpaka]] 42 | * [[Guanako]] 43 | {{-etimologie-}} 44 | Din limba tibetană [[བླ་མ]] (bla-ma). 45 | {{-pronunție-}} 46 | * {{AFI}}: {{AFI|/ˈlaːmaː/}} 47 | {{-substantiv-|deu}} 48 | {{substantiv-deu 49 | |gen={{m}} 50 | |nom-sg=der Lama 51 | |nom-pl=die Lamas 52 | |akk-sg=den Lama 53 | |akk-pl=die Lamas 54 | |dat-sg=dem Lama 55 | |dat-pl=den Lamas 56 | |gen-sg=des Lamas 57 | |gen-pl=der Lamas 58 | }} 59 | #(''rel.'') [[lama]] 60 | {{-sin-}} 61 | * (rel.) [[Guru]] 62 | {{-deriv-}} 63 | * [[Lamaismus]] 64 | ===Referințe=== 65 | * [http://www.dwds.de/ DWDS] 66 | * [http://en.wiktionary.org/wiki/ Wiktionary] 67 | * [http://de.wiktionary.org/wiki/ Wiktionary] 68 | [[Categorie:Mamifere în germană]] 69 | [[Categorie:Camelide în germană]] 70 | [[Categorie:Profesiuni în germană]] 71 | [[Categorie:Budism în germană]] 72 | -------------------------------------------------------------------------------- /tests/data/el/τσιγγάνα.wiki: -------------------------------------------------------------------------------- 1 | =={{-el-}}== 2 | {{el-κλίση-'πείνα'}} 3 | ==={{ουσιαστικό|el}}=== 4 | '''{{PAGENAME}}''' {{θ}} 5 | * {{θηλ του|τσιγγάνος}} 6 | *: {{παράθεμα|ποίηση}} ''Περδικόστηθη '''Tσιγγάνα''',''''ω μαγεύτρα, που μιλείς''''τα μεσάνυχτα προς τ' άστρα''''γλώσσα προσταγής!'' 7 | *:: {{β|Κωστής Παλαμάς}}, ''Ο Δωδεκάλογος του Γύφτου'', [[s:Ο δωδεκάλογος του Γύφτου/Αγάπη|Λόγος Γ΄ Αγάπη, 1η στροφή]] 8 | {{clear}} 9 | ===={{μεταφράσεις}}==== 10 | {{μτφ-αρχή}} 11 | * {{en}} : {{τ|en|gitana}} 12 | 13 | 14 | 15 | 16 | * {{fr}} : {{τ|fr|gitane}}, {{τ|fr|tzigane}} 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | {{μτφ-τέλος}} 53 | 54 | {{κλείδα-ελλ}} 55 | -------------------------------------------------------------------------------- /tests/data/el/-ης.wiki: -------------------------------------------------------------------------------- 1 | =={{-el-}}== 2 | 3 | ==={{ετυμολογία}}=== 4 | # '''{{PAGENAME}}''' < {{αρχ}} [[-ης]] 5 | # '''{{PAGENAME}}''' < {{ελνστ}} [[-ις]] < {{αρχ}} [[-ιος|-(ε)ιος]] ({{αρχ}} [[κύριος]], {{αιτ}} τόν κύριον > {{ελνστ}} τόν κῦριν →ὁ κῦρις > {{μσν}} [[κύρης]] > {{νε}} [[νοικοκύρης]]) 6 | # '''{{PAGENAME}}''' < {{μσν}} '''-ης''' 7 | # '''{{PAGENAME}}''' < {{αρχ}} '''-ης, -ης, -ες''' & '''-ής, -ής, -ές''' 8 | # '''{{PAGENAME}}''' < {{ελνστ}} '''-ῆς''' ([[γενική]] [[ενικού]] [[θηλυκών]]: κατά γ'''ῆς''') 9 | # '''{{PAGENAME}}''' < {{ετυμ|tr}} '''-i''' ([[fıstık]] > [[fıstıki|fıstık'''i''']]) 10 | 11 | ==={{κατάληξη αρσενικών ουσιαστικών|el}}=== 12 | '''{{PAGENAME}}''' 13 | # [[παραγωγικός|παραγωγική]] [[κατάληξη]] [[ισοσύλλαβος|ισοσύλλαβων]] [[αρσενικό|αρσενικών]] [[ουσιαστικό|ουσιαστικών]] 14 | #: [[εργάτης|εργάτ'''ης''']], [[πολίτης|πολίτ'''ης''']] 15 | # [[παραγωγικός|παραγωγική]] [[κατάληξη]] [[ανισοσύλλαβος|ανισοσύλλαβων]] [[αρσενικό|αρσενικών]] [[ουσιαστικό|ουσιαστικών]] 16 | #: [[Αντώνης|Αντών'''ης''']], [[νοικοκύρης|νοικοκύρ'''ης''']] 17 | 18 | ==={{κατάληξη αρσενικών επιθέτων|el}}=== 19 | '''{{PAGENAME}}''' & [[-ής]] 20 | # [[παραγωγικός|παραγωγική]] [[κατάληξη]] [[τρικατάληκτος|τρικατάληκτων]] [[τριγενής|τριγενών]] [[επίθετο|επιθέτων]] (-'''ης''', -'''α''', -'''ικο''') 21 | #: [[τεμπέλης|τεμπέλ'''ης''']] 22 | # [[παραγωγικός|παραγωγική]] [[κατάληξη]] [[δικατάληκτος|δικατάληκτων]] [[τριγενής|τριγενών]] [[επίθετο|επιθέτων]] (-'''ης''', -'''ης''', -'''ες''' & -'''ής''', -'''ής''', -'''ές''') 23 | #: [[πλήρης|πλήρ'''ης''']], [[συνεχής|συνεχ'''ής''']] 24 | 25 | ==={{κατάληξη επιρρημάτων|el}}=== 26 | '''{{PAGENAME}}''' & [[-ής]] 27 | # [[κατάληξη]] [[επίρρημα|επιρρημάτων]] που προέρχονται από εμπρόθετα με γενική θηλυκού ουσιαστικού 28 | #: [[επικεφαλής|επικεφαλ'''ής''']], [[καταγής|καταγ'''ής''']] 29 | 30 | ==={{επίθημα|el}}=== 31 | '''{{PAGENAME}}''' (& [[-ής]]) 32 | # [[επίθημα]] [[τρικατάληκτος|τρικατάληκτων]] [[τριγενής|τριγενών]] [[επίθετο|επιθέτων]] (-'''ής''', -'''ιά''', -'''ί''') 33 | #: [[βυσσινής|βυσσιν'''ής''']], [[φιστικής|φιστικ'''ής''']] 34 | 35 | {{κλείδα-ελλ}} 36 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml.disabled: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | - pull_request 5 | - workflow_dispatch 6 | 7 | permissions: 8 | contents: write 9 | pull-requests: write 10 | 11 | concurrency: 12 | group: ${{ github.ref }}-${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name != 'pull_request' && github.sha || '' }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | tests: 17 | strategy: 18 | matrix: 19 | os: 20 | - ubuntu-latest 21 | - macos-latest 22 | 23 | runs-on: ${{ matrix.os }} 24 | timeout-minutes: 3 25 | 26 | steps: 27 | - uses: actions/checkout@v5 28 | 29 | - name: Set up Python 30 | uses: actions/setup-python@v6 31 | with: 32 | python-version: "3.13" 33 | cache: pip 34 | 35 | - name: Install requirements 36 | run: python -m pip install -r requirements-tests.txt 37 | 38 | - name: Install kindlegen (GNU/Linux) 39 | if: ${{ matrix.os == 'ubuntu-latest' }} 40 | run: | 41 | mkdir -p ~/.local/bin 42 | wget https://raw.githubusercontent.com/reader-dict/monolingual/refs/heads/kindlegen-backup/kindlegen_linux/kindlegen -O ~/.local/bin/kindlegen 43 | chmod a+x ~/.local/bin/kindlegen 44 | 45 | - name: Install kindlegen (macOS) 46 | if: ${{ matrix.os == 'macos-latest' }} 47 | run: | 48 | mkdir -p ~/.local/bin 49 | wget https://raw.githubusercontent.com/reader-dict/monolingual/refs/heads/kindlegen-backup/kindlegen_mac/kindlegen64 -O ~/.local/bin/kindlegen 50 | chmod a+x ~/.local/bin/kindlegen 51 | 52 | - name: Unit tests 53 | run: python -Wd -m pytest tests --doctest-modules wikidict 54 | 55 | automerge: 56 | runs-on: ubuntu-latest 57 | needs: [tests] 58 | if: ${{ github.actor == 'dependabot[bot]' }} 59 | steps: 60 | - name: Automerge 61 | run: gh pr merge --auto --rebase "$PR_URL" 62 | env: 63 | PR_URL: ${{github.event.pull_request.html_url}} 64 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 65 | -------------------------------------------------------------------------------- /tests/test_zh.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Callable 2 | from pathlib import Path 3 | from unittest.mock import patch 4 | 5 | import pytest 6 | 7 | from wikidict import context 8 | from wikidict.render import parse_word 9 | from wikidict.stubs import Definitions 10 | 11 | 12 | @pytest.fixture(scope="module", autouse=True) 13 | def setup_lua_ctx() -> None: 14 | with patch.dict("os.environ", {"CWD": str(Path(context.__file__).parent.parent)}): 15 | assert context.reset("zh") 16 | 17 | 18 | @pytest.mark.parametrize( 19 | "word, pronunciations, genders, etymology, definitions, variants", 20 | [ 21 | ( 22 | "七講八講", 23 | [], 24 | [], 25 | [], 26 | {"動詞": ["(漳泉話,吳語) 亂講、胡說", "(柳州官話) 用各種方式解釋"]}, 27 | [], 28 | ), 29 | ( 30 | "稍後", 31 | ["/shāohòu/"], 32 | [], 33 | [], 34 | { 35 | "副詞": ["在短暫的時間之後"], 36 | "動詞": ["稍候 (shāohòu)的拼寫錯誤。"], 37 | }, 38 | [], 39 | ), 40 | ( 41 | "佛教", 42 | ["/Fójiào/"], 43 | [], 44 | [], 45 | { 46 | "專有名詞": [ 47 | "源自印度,奉釋迦牟尼為教主的宗教,以解脫生死、明心見性為教義,可以分為北傳佛教、南傳佛教以及禪宗、淨土宗、密宗等派別,信徒分布於東亞、南亞、東南亞,為世界三大宗教之一。" 48 | ], 49 | }, 50 | [], 51 | ), 52 | ], 53 | ) 54 | def test_parse_word( 55 | word: str, 56 | pronunciations: list[str], 57 | genders: list[str], 58 | etymology: list[Definitions], 59 | definitions: list[Definitions], 60 | variants: list[str], 61 | page: Callable[[str, str], str], 62 | ) -> None: 63 | """Test the sections finder and definitions getter.""" 64 | code = page(word, "zh") 65 | details = parse_word(word, code, "zh", force=True) 66 | assert details 67 | assert pronunciations == details.pronunciations 68 | assert genders == details.genders 69 | assert etymology == details.etymology 70 | assert definitions == details.definitions 71 | assert variants == details.variants 72 | -------------------------------------------------------------------------------- /tests/data/it/condividere.wiki: -------------------------------------------------------------------------------- 1 | == {{-it-}} == 2 | {{-verb-|it}} 3 | {{Transitivo|it}} 4 | {{Pn|c}} 5 | # [[spartire]] con altri 6 | # avere qualcosa in [[comune]] con qualcun altro 7 | # essere d'[[accordo]] con altri su un [[punto di vista]] 8 | # {{Term|filosofia|it}} {{Term|economia|it}} [[mettere]] [[spazi]] e [[risorse]] in [[comune]] con [[altri]] 9 | # {{Term|informatica|it}} [[ricevere]] o [[mettere]] un'[[informazione]] in [[comune]] con [[altri]] [[utenti]] 10 | 11 | {{-sill-}} 12 | ; con | di | vì | de | re 13 | 14 | {{-pron-}} 15 | {{IPA|/kondiˈvidere/}} 16 | 17 | {{-etim-}} 18 | dal [[latino]] ''[[cum]]'' e ''[[dividere]]''; l'attuale uso improprio del verbo ''condividere'' è dovuto alla diffusione dei [[social network]] negli anni 2000 e 2010 19 | 20 | {{-sin-}} 21 | * [[aderire]], [[appoggiare]], [[approvare]], [[concordare]] esprimere adesione, essere solidale, essere d’accordo, [[partecipare]], [[sostenere]], 22 | *avere in comune, [[compartecipare]] [[possedere]], [[dividere]], [[spartire]] 23 | * {{Est}} {{Fig}} [[accettare]], [[accogliere]] 24 | 25 | {{-ant-}} 26 | *[[avversare]], [[combattere]], [[contrariare]], [[contrastare]], [[discordare]], [[dissentire]], [[intralciare]], [[osteggiare]], [[separare]], 27 | * {{Est}} {{Fig}} [[ribellarsi]], 28 | *''(informatica)'' [[pubblicare]] 29 | 30 | {{-der-}} 31 | * [[condivisione]], [[condivisibile]], [[condiviso]] 32 | 33 | {{-rel-}} 34 | * [[dividere]] 35 | 36 | {{-trad-}} 37 | {{Trad1|dividere, spartire}} 38 | :*{{ca}}: [[compartir]] 39 | :*{{en}}: [[share]] 40 | :* {{la}}: [[communicare]], [[exsequere]] 41 | {{Trad2}} 42 | 43 | {{Trad1|avere qualcosa in comune}} 44 | :*{{en}}: 45 | {{Trad2}} 46 | 47 | {{Trad1|avere lo stesso punto di vista}} 48 | :*{{en}}: 49 | {{Trad2}} 50 | 51 | {{-ref-}} 52 | * {{Fonte|trec}} 53 | * {{Fonte|hoep}} 54 | * {{Fonte|sape}} 55 | * {{Fonte|sabco}} 56 | * {{Fonte|dizit}} 57 | * {{Fonte|gar}} 58 | * {{Fonte|sin-co}} 59 | * {{Fonte|dem}} 60 | *AA.VV., ''Dizionario sinonimi e contrari'', Mariotti, 2006, pagina 134 61 | 62 | [[Categoria:Internet-IT|condividere]] 63 | -------------------------------------------------------------------------------- /wikidict/lang/ru/variant_handlers.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import defaultdict 3 | 4 | from ... import context, utils 5 | 6 | 7 | def cleanup(form: str) -> str: 8 | return utils.cleanup_rev_variant(form) 9 | 10 | 11 | def render_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 12 | """ 13 | >>> render_variant("прич.", ["зыбить"], defaultdict(str), "") 14 | 'зыбить' 15 | >>> render_variant("прич.", ["находить (наталкиваться)", "наст"], defaultdict(str), "") 16 | 'находить' 17 | >>> render_variant("прич.", ["?"], defaultdict(str), "") 18 | '' 19 | """ 20 | if (variant := parts[0]) == "?": 21 | variant = "" 22 | if " (" in variant: 23 | variant = variant.split(" (", 1)[0] 24 | return variant 25 | 26 | 27 | def render_reverse_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 28 | """ 29 | >>> render_reverse_variant("rev-flexion", ["коро́ль"], defaultdict(str), "") 30 | 'коро́ль' 31 | """ 32 | if tpl == "rev-flexion": 33 | return parts[0].strip() 34 | 35 | forms: set[str] 36 | table = context.expand(utils.reconstruct_tpl(tpl, parts, data), "ru") 37 | if table.startswith("{"): 38 | table = table.replace("", "\n| ").replace("", "\n| ") 39 | forms = {form[2:].strip() for form in table.splitlines() if form.startswith("| ") and not form.endswith("| ")} 40 | else: 41 | table = table.replace("", "").replace("", "").replace(' rowspan="2"', "") 42 | forms = set(re.findall(r"([^<]+)", table)) 43 | 44 | if forms: 45 | forms = {cleanup(form) for form in forms} 46 | forms.discard(word) 47 | forms.discard("") 48 | 49 | return "|".join(forms) 50 | 51 | 52 | handlers = { 53 | "прич.": render_variant, 54 | "rev-flexion": render_reverse_variant, 55 | } 56 | 57 | 58 | def append_to_reverse_variants(tpl: str) -> None: 59 | """Dynamically append a template to reverse variants templates.""" 60 | if tpl in handlers: 61 | return 62 | handlers[tpl] = render_reverse_variant 63 | -------------------------------------------------------------------------------- /tests/data/no/bare.wiki: -------------------------------------------------------------------------------- 1 | {{se også|Bäre}} 2 | ==Norsk== 3 | ===Adverb=== 4 | {{nb-adv}} 5 | 6 | # [[begrensende]], [[kun]] 7 | #: ''Det er '''bare''' lov å spise brunost'' 8 | #Gir dempende effekt 9 | #: ''Jeg skal '''bare''' på do'' 10 | #Gir forsterkende effekt 11 | #:''Han er '''bare''' så kul!'' 12 | #Gir en sitatfunksjon, særlig i muntlig språk. 13 | #: ''Hun '''bare''': GI meg katten min!'' 14 | 15 | ====Andre former==== 16 | {{andre former|berre|nb=nei|nn=ja|nrm=nei}} 17 | 18 | ====Synonymer==== 19 | * [[blott]] {{norm|nb=ja|nrm=ja|nn=ja}} 20 | * [[kun]] {{norm|nb=ja|nrm=ja}} 21 | 22 | ====Avledete termer==== 23 | * [[ikke bare bare]] 24 | 25 | ====Oversettelser==== 26 | {{overs-topp|kun}} 27 | *{{overs|af|net}} 28 | *{{overs|da|bare}}, {{o|da|kun}} 29 | * {{overs|en|only}}, {{o|en|just}}, {{o|en|merely}} 30 | *{{overs|eo|nur}} 31 | * {{overs|fi|vain}} 32 | {{overs-midt}} 33 | * {{overs|fr|seulement}}, {{o|fr|uniquement}}, {{o|fr|rien que}}, ne... que 34 | *{{overs|nl|slechts}}, {{o|nl|alleen}} 35 | * {{overs|pt|só}} 36 | * {{overs|sv|bara}}, {{o|sv|enbart}}, {{o|sv|endast}} 37 | * {{overs|de|nur}}, {{o|de|bloß}} 38 | {{overs-bunn}} 39 | 40 | ===Adjektiv=== 41 | '''bare''' {{norm|nb=ja|nrm=ja|nn=ja}} 42 | 43 | # {{no-adj-bøyningsform|fl|bar|nb=ja|nrm=ja|nn=ja}} 44 | # {{no-adj-bøyningsform|b|bar|nb=ja|nrm=ja|nn=ja}} 45 | 46 | ===Referanser=== 47 | * {{R:Bokmålsordboka-Nynorskordboka}} 48 | * {{R:NAOB}} 49 | 50 | 51 | ---- 52 | 53 | ==Dansk== 54 | ===Adverb=== 55 | {{da-adv}} 56 | 57 | #[[#Norsk|bare]] 58 | 59 | ===Referanser=== 60 | * {{R:DDO}} 61 | 62 | 63 | ---- 64 | 65 | ==Engelsk== 66 | ===Adjektiv=== 67 | {{en-adj|bar|er|est}} 68 | 69 | # [[bar#Adjektiv|bar]], [[naken]] 70 | # [[minimal]] 71 | # [[tom]] 72 | #: {{eksempel|en|a room '''bare''' of furniture|et rom '''tomt''' for møbler}} 73 | #: {{eksempel|en|The cupboard was '''bare'''.|Skapet var '''tomt'''.}} 74 | #: {{eksempel|en|The walls of this room are '''bare''' — why not hang some paintings on them?|Veggene i dette rommet er tomme - hvorfor ikke henge opp noen malerier på dem?}} 75 | 76 | ===Adverb=== 77 | {{en-adv}} 78 | 79 | # [[svært]], [[veldig]] 80 | # [[knapt]] 81 | 82 | ====Synonymer==== 83 | * ''knapt:'' [[barely]] 84 | -------------------------------------------------------------------------------- /tests/data/es/hocico.wiki: -------------------------------------------------------------------------------- 1 | == {{lengua|es}} == 2 | {{pron-graf}} 3 | 4 | === Etimología 1 === 5 | {{etimología|endo|hocicar}}. 6 | 7 | ==== {{sustantivo masculino|es}} ==== 8 | {{es.sust}} 9 | ;1 {{csem|zootomía}}: Parte más o menos [[prolongar|prolongada]] de la [[cabeza]] de algunos [[animal]]es en que están la [[boca]] y las [[nariz|narices]]. 10 | ;2 {{csem|anatomía}}: {{plm}} de una persona cuando tiene muy [[abultado]]s los [[labio]]s. 11 | ;3: {{plm|cara}}. 12 | {{uso|familiar}}. 13 | {{ejemplo|Félix tiene buen hocico.}} 14 | ;4: {{plm|gesto}} que [[denotar|denota]] [[enojo]] o [[enfado]]. 15 | ;5: Forma despectiva para referirse a la boca de alguien. 16 | {{ámbito|Chile}}. 17 | {{uso|despectivo|malsonante}} 18 | {{ejemplo|Te voy a dar un [[combo]] en lhocico.}} 19 | {{ejemplo|Tení' el hocico [[hediondo]].}} 20 | {{ejemplo|Tengo el hocico p'a la cag'á.}} 21 | {{ejemplo|¿Querí que te de una patá en el hocico?}} 22 | ;6: Boca de una persona, especialmente de la que dice malas palabras. 23 | {{uso|coloquial}}. 24 | {{ámbito|México}}. 25 | 26 | ==== Locuciones ==== 27 | {{trad-arriba|Locuciones con «hocico»}} 28 | * [[pimiento de hocico]]: Variedad del [[pimiento]], que se diferencia en ser más grueso que el de otras castas. 29 | * [[caer de hocicos]]: Dar con la cara, o cayendo en ella en otra parte. 30 | * [[de hocicos]]: De bruces, cabeza abajo. 31 | * echar, decir o restregar una cosa por los hocicos: Decirle a uno a la cara lo que le repugna oir. 32 | * [[meter el hocico en todo]]: Ser entrometido. 33 | * [[meter el hocico]] en algo: Entrometerse en algo. 34 | * [[quitar a uno el hocico]]: Ponerle de buen humor. 35 | {{trad-abajo}} 36 | 37 | ==== Información adicional ==== 38 | {{derivad|hocicón|hocicudo|hocico|hocicar}} 39 | 40 | ==== Véase también ==== 41 | {{Wikipedia}} 42 | * [[morro]] 43 | * [[malhablado]] 44 | 45 | ==== Traducciones ==== 46 | {{trad-arriba}} 47 | {{t|de|t1=Schnauze}} 48 | {{t|ca|t1=morro}} 49 | {{t|fr|t1=museau}} 50 | {{t|eo|t1=muzelo}} 51 | {{t|gl|t1=fociño}} 52 | {{t|cy|a1=1|t1=trwyn}} 53 | {{t|en|t1=snout}} 54 | {{t|nv|a1=1|t1=áchį́į́h}} 55 | {{t|nl|t1=snuit}} 56 | {{t|pt|t1=focinho}} 57 | {{t|sv|t1=nos}} 58 | {{trad-abajo}} 59 | 60 | == Referencias y notas == 61 | 62 | -------------------------------------------------------------------------------- /wikidict/lang/fr/template_overrides.py: -------------------------------------------------------------------------------- 1 | from ... import utils 2 | 3 | 4 | def template_etymologie_graphique_chinoise(args: tuple[str, ...]) -> str: 5 | """ 6 | >>> template_etymologie_graphique_chinoise(("Étymologie graphique chinoise", "racine=羊", "sens=Attaquer en force, porter un coup (敦) / Vase rituel pour offrir les viandes (錞)")) 7 | 'Attaquer en force, porter un coup (敦) / Vase rituel pour offrir les viandes (錞)' 8 | """ 9 | data = utils.extract_keywords_from(list(args[1:])) 10 | return data["sens"] or data["composition"] or data["explication"] 11 | 12 | 13 | def template_sinogram_noimg(args: tuple[str, ...]) -> str: 14 | """ 15 | >>> template_sinogram_noimg(("sinogram-noimg", "它", "clefhz1=宀", "clefhz2=2", "nbthz1=1-5", "nbthz2=5", "m4chz1=3", "m4chz2=30711", "unihz=5B83", "gbhz1= ", "gbhz2=-", "b5hz1=A1", "b5hz2=A5A6", "cjhz1=J", "cjhz2=十心", "cjhz3=JP")) 16 | 'Codage informatique : Unicode : U+5B83 - Big5 : A5A6 - Cangjie : 十心 (JP) - Quatre coins : 30711' 17 | """ 18 | data = utils.extract_keywords_from(list(args[1:])) 19 | text = "Codage informatique :" 20 | codages = [] 21 | 22 | if unihz := data["unihz"]: 23 | codages.append(f"Unicode : U+{unihz}") 24 | 25 | if b5hz2 := data["b5hz2"]: 26 | codage = f"Big5 : {b5hz2}" 27 | if b5hz3 := data["b5hz3"]: 28 | codage += f" ({b5hz3})" 29 | codages.append(codage) 30 | 31 | if cjhz2 := data["cjhz2"]: 32 | codage = f"Cangjie : {cjhz2}" 33 | if cjhz3 := data["cjhz3"]: 34 | codage += f" ({cjhz3})" 35 | codages.append(codage) 36 | 37 | if m4chz2 := data["m4chz2"]: 38 | codage = f"Quatre coins : {m4chz2}" 39 | if m4chz3 := data["m4chz3"]: 40 | codage += f" ({m4chz3})" 41 | codages.append(codage) 42 | 43 | return f"{text} {' - '.join(codages)}" 44 | 45 | 46 | overrides = { 47 | **dict.fromkeys( 48 | {"Étymologie graphique chinoise", "Etymologie graphique chinoise"}, 49 | template_etymologie_graphique_chinoise, 50 | ), 51 | **dict.fromkeys({"sinogramme-sans-image", "sinogram-noimg"}, template_sinogram_noimg), 52 | } 53 | -------------------------------------------------------------------------------- /tests/data/ca/disset.wiki: -------------------------------------------------------------------------------- 1 | {{vegeu|DISSET}} 2 | 3 | == {{-ca-}} == 4 | {{ca-pron|è 5 | |f-centr=LL-Q7026 (cat)-Unjoanqualsevol-disset.wav 6 | }} 7 | {{-etimologia-}} Contracció de l’antic {{m|ca|*deïsset}}, evolució fonètica {{del-lang|roa-oca|ca|deesset}} per la pronúncia {{IPAchar|/ɛe/}}, de {{m|roa-oca|desesset}}, {{del-lang|la|ca|[[decem]] [[et]] [[septem]]|lit=deu i set}}, {{etim-s|ca|XVIII}}. Compareu amb {{m|ca|divuit}} i {{m|ca|dinou}}. 8 | 9 | === Numeral === 10 | {{numeral|ca|16|17|18|setze|divuit|card=disset|ord=dissetè|pref=heptadeca-|llista=Viccionari:Llista de nombres en català }} 11 | {{ca-num}} 12 | 13 | # {{marca|ca|cardinal}} [[nombre|Nombre]] enter situat entre el [[setze]] i el [[divuit]]. 14 | #: ''Té disset anys'' 15 | # {{marca|ca|valor ordinal}} [[dissetè]], [[dissetena]]. 16 | #: ''La pàgina disset'' 17 | #: ''El dia disset de maig'' 18 | 19 | {{-sin-}} 20 | * [[desset]], en balear i alguerès 21 | * [[dèsset]], en valencià 22 | * [[desasset]], en septentrional 23 | 24 | {{-trad-}} 25 | {{t-inici}} 26 | * {{de}}: {{trad|de|siebzehn}} 27 | * {{en}}: {{trad|en|seventeen}} 28 | * {{eu}}: {{trad|eu|hamazazpi}} 29 | * {{es}}: {{trad|es|diecisiete}} 30 | * {{eo}}: {{trad|eo|dek sep}} 31 | * {{fr}}: {{trad|fr|dix-sept}} 32 | * {{gl}}: {{trad|gl|dezasete}} 33 | * {{it}}: {{trad|it|diciassette}} 34 | * {{ja}}: {{trad|ja|十七}} 35 | * {{csc}}: {{trad|csc|DISSET}} 36 | * {{oc}}: {{trad|oc|dètz-e-sèt}} 37 | * {{pl}}: {{trad|pl|siedemnaście}} 38 | * {{pt}}: {{trad|pt|dezassete}}, {{trad|pt|dezessete}} 39 | * {{ro}}: {{trad|ro|şaptesprezece}} 40 | * {{ru}}: {{trad|ru|семна́дцать}} 41 | * {{sc}}: {{trad|sc|deghessette}} 42 | * {{sv}}: {{trad|sv|sjutton}} 43 | * {{ty}}: hō'ē {{trad|ty|'ahuru}} ma hitu 44 | {{t-final}} 45 | 46 | === Nom === 47 | {{ca-nom|m}} 48 | 49 | # [[xifra|Xifra]] i [[nombre]] [[17]]. 50 | 51 | === Nom === 52 | {{ca-nom|fp}} 53 | 54 | # Dissetena [[hora]]. 55 | #: ''L'autobús surt a les disset (les cinc de la tarda)'' 56 | 57 | === Miscel·lània === 58 | * {{ca-sil}} 59 | * {{anagrames|ca|distès|distés}} 60 | 61 | === Vegeu també === 62 | * {{Viquipèdia}} 63 | * {{ca-dicc|diec|gdlc|optimot|decat}} 64 | * [https://web.archive.org/web/20220812153712/https://aldc.espais.iec.cat/files/2022/02/1175-Disset.pdf Atles Lingüístic del Domini Català] 65 | -------------------------------------------------------------------------------- /DEBUG.md: -------------------------------------------------------------------------------- 1 | # Debug Environment Variables 2 | 3 | Globally, setting `DEBUG=1` will set the logging level to DEBUG. 4 | 5 | ## `--download` 6 | 7 | ### FORCE_SNAPSHOT 8 | 9 | You can choose the exact Wiktionary dump to download by using `FORCE_SNAPSHOT=YYYYMMDD`. 10 | 11 | ## `--get-word` 12 | 13 | ### NO_COLORS 14 | 15 | Setting `NO_COLORS=1` will remove all markup styling (italic, bold, etc.). 16 | 17 | ### KEEP_UNFINISHED 18 | 19 | When an error happens in transforming/expanding a definition, the definition will be skipped (not the entire word, just the definition). 20 | Re-run the command prepending `KEEP_UNFINISHED=1` to display the raw HTML, and be able to see where the issue comes from. 21 | 22 | ## `--parse` 23 | 24 | Lst all words not taken into account with current head sections: 25 | 26 | ```shell 27 | DEBUG_PARSE=1 python -m wikidict LOCALE --parse >out.log 28 | ``` 29 | 30 | ## `--render` 31 | 32 | ### DEBUG_SECTIONS 33 | 34 | Lst all unhandled sections: 35 | 36 | ```shell 37 | DEBUG_SECTIONS=1 python -m wikidict LOCALE --render | sort -u >out.log 38 | ``` 39 | 40 | Make words under a given section to fail: 41 | 42 | ```shell 43 | DEBUG_SECTIONS="" python -m wikidict LOCALE --render 44 | ``` 45 | 46 | Example with the RO dictionary, and the "{{unități}}" section: 47 | 48 | ```shell 49 | DEBUG_SECTIONS='{{unități}}' python -m wikidict ro --render 50 | ``` 51 | 52 | ### DEBUG_EMPTY_WORDS 53 | 54 | List all unhandled words: 55 | 56 | ```shell 57 | DEBUG_EMPTY_WORDS=1 python -m wikidict LOCALE --render >out.log 2>&1 58 | ``` 59 | 60 | ### DEBUG_LUA 61 | 62 | Useful to debug Lua expansion issues. 63 | 64 | For example, to log all words for each process in order to be able to catch problematic words in a second time (mostly to catch infinite loops): 65 | 66 | ```shell 67 | DEBUG_LUA=1 python -m wikidict LOCALE --render > LOG_FILE 2>&1 68 | tail -f LOG_FILE 69 | # (and when the ouput hangs, hit CTRL+C, multiple times if needed) 70 | python log-analyzer.py LOG_FILE 71 | ``` 72 | 73 | If more details are needed, use `DEBUG_LUA=2` to print Lua errors in real time. 74 | 75 | ## `--show-pos` 76 | 77 | ### DEBUG_POS 78 | 79 | This is useful to list all found part of speech (POS). To be used after `--render` to have the full dictionary ready to be analyzed. 80 | -------------------------------------------------------------------------------- /tests/data/ja/V.wiki: -------------------------------------------------------------------------------- 1 | {{alphabet}} 2 | ==記号== 3 | {{Wikipedia|V}} 4 | 5 | # [[バナジウム]]の元素記号 6 | #[[電圧]] 7 | #電圧の単位[[ボルト]] 8 | # [[ローマ数字]]で[[5]]を[[あらわす|表す]][[記号]]([[Ⅴ]]) 9 | 10 | [[Category:Unicode Basic Latin]] 11 | [[Category:ラテン文字|v]] 12 | [[Category:記号|v]] 13 | [[Category:単位|v]] 14 | [[Category:国際単位系|v]] 15 | [[Category:元素記号|v]] 16 | [[Category:ローマ数字|v]] 17 | 18 | ---- 19 | 20 | ==日本語== 21 | ==={{pron|jpn}}=== 22 | ;ブ↘イ 23 | ==={{noun}}=== 24 | {{head|jpn|noun|sort=ふい ぶい}}(ブイ、(稀)ヴィー) 25 | [[カテゴリ:{{ja}} アルファベット記述]] 26 | #[[ラテン文字]]の第二十二字。 27 | #*[[V字]] 28 | #({{lang|en|[[victory]]}}より)[[勝利]]。 29 | #*'''V'''サイン 30 | #(テレビ放送業界)[[VTR]]の略。[[映像]]。 31 | ---- 32 | 33 | ==イタリア語== 34 | ===発音=== 35 | {{IPA|ˈvu}} 36 | :略語としては以下を参照 37 | ===略語=== 38 | 39 | # (ローマ数字)[[cinque]] 40 | # (元素記号)[[vanadio]]: バナジウム 41 | # [[volt]]: 電圧の単位'''ヴォルト''' 42 | # [[volume]]: 巻 43 | # [[Città del Vaticano]], [[vaticano]]: '''ヴァティカン'''市国 44 | # [[via]]: 通り 45 | # [[venerdì]]: 金曜日 46 | 47 | ---- 48 | 49 | ==英語== 50 | ===発音=== 51 | {{IPA|ˈviː}} 52 | :略語としては以下を参照 53 | ===略語=== 54 | 55 | # (ローマ数字)[[five]] 56 | # (元素記号)[[vanadium]] 57 | 58 | ---- 59 | 60 | ==スペイン語== 61 | ===発音=== 62 | {{IPA|ˈuβe}} 63 | :略語としては以下を参照 64 | ===略語=== 65 | 66 | # (ローマ数字)[[cinco]] 67 | # (元素記号)[[vanadio]]: バナジウム 68 | # [[Valencia]]: バレンシア 69 | # [[voltio]]: 電圧の単位'''ボルト''' 70 | 71 | ---- 72 | 73 | ==ドイツ語== 74 | ===発音=== 75 | {{IPA|ˈfaʊ}} 76 | :略語としては以下を参照 77 | ===略語=== 78 | 79 | # (ローマ数字)[[fünf]] 80 | # (元素記号)[[Vanadium]]: バナジウム 81 | # [[Volt]]: 電圧の単位'''ヴォルト''' 82 | # [[Vaticanstadt]]: '''ヴァティカン'''市国 83 | # [[Verband]], [[Vereinigung]], [[Verein]]: 協会 84 | # [[Volumen]]: 容積 85 | 86 | ---- 87 | 88 | =={{hu}}== 89 | 90 | ==={{pron}}=== 91 | * {{sense|音素}} {{hu-IPA}} 92 | * {{sense|文字名}} {{hu-IPA|vé}} 93 | 94 | ===文字=== 95 | {{hu-letter|upper=V|lower=v}} 96 | 97 | # [[ラテン文字]]で書かれる[[ハンガリー語]][[アルファベット]]の38番目の[[文字]]、読みは '''{{m|hu|vé}}'''。 98 | 99 | ===={{decl}}==== 100 | {{hu-infl-nom|V-|e}}{{hu-pos-tek|V-}} 101 | 102 | ==={{seealso}}=== 103 | {{list:Latin script letters/hu}} 104 | 105 | ---- 106 | 107 | ==フランス語== 108 | ===発音=== 109 | {{IPA|ve}} 110 | :略語としては以下を参照 111 | ===略語=== 112 | 113 | # (ローマ数字)[[cinq]] 114 | # (元素記号)[[vanadium]] 115 | -------------------------------------------------------------------------------- /tests/data/fr/minutes.wiki: -------------------------------------------------------------------------------- 1 | {{voir|minutés}} 2 | 3 | == {{langue|fr}} == 4 | === {{S|nom|fr|flexion}} === 5 | {{fr-rég|mi.nyt|s=minute}} 6 | '''minutes''' {{pron|mi.nyt|fr}} {{f}} 7 | # ''Pluriel de'' [[minute]]. 8 | #* {{exemple | lang=fr 9 | | Quelques '''minutes''' avant la fin de la première mi-temps, un joueur de Bordeaux crochète un Marseillais dans la surface de réparation. Je siffle le penalty indiscutable. 10 | | source=Christophe Donner, ''Mon oncle'', Grasset, 1995}} 11 | 12 | === {{S|verbe|fr|flexion}} === 13 | {{fr-verbe-flexion|minuter|ind.p.2s=oui|sub.p.2s=oui}} 14 | '''minutes''' {{pron|mi.nyt|fr}} 15 | # ''Deuxième personne du singulier de l’indicatif présent du verbe'' [[minuter]]. 16 | # ''Deuxième personne du singulier du subjonctif présent du verbe'' [[minuter]]. 17 | 18 | === {{S|prononciation}} === 19 | * {{écouter|lang=fr|France (Muntzenheim)|audio=LL-Q150 (fra)-0x010C-minutes.wav}} 20 | * {{écouter|lang=fr|Canada (Shawinigan)||audio=LL-Q150 (fra)-DenisdeShawi-minutes.wav}} 21 | * {{écouter|lang=fr|France||audio=LL-Q150 (fra)-Mecanautes-minutes.wav}} 22 | 23 | 24 | ==== {{S|homophones|fr}} ==== 25 | * [[minutent]] {{cf|minuter}} 26 | 27 | === {{S|anagrammes}} === 28 | {{voir anagrammes|fr}} 29 | 30 | == {{langue|en}} == 31 | === {{S|étymologie}} === 32 | : {{ébauche-étym|en}} 33 | 34 | === {{S|nom|en}} === 35 | '''minutes''' {{pron|ˈmɪn.ɪts|en}} {{au pluriel uniquement|en}} 36 | # [[compte-rendu|Compte-rendu]], [[procès-verbal]]. 37 | #* {{exemple | lang=en | The elected secretary will take the '''minutes''' of the meeting.}} 38 | 39 | === {{S|nom|en|flexion}} === 40 | {{en-nom-rég|sing=minute|ˈmɪn.ɪt}} 41 | '''minutes''' {{pron|ˈmɪn.ɪts|en}} 42 | # ''Pluriel de'' [[minute]]. 43 | 44 | === {{S|verbe|en|flexion}} === 45 | '''minutes''' {{pron|ˈmɪn.ɪts|en}} 46 | # ''Troisième personne du singulier du présent indicatif du verbe ''[[minute]]. 47 | 48 | === {{S|prononciation}} === 49 | * {{écouter|lang=en|États-Unis |ˈmɪn.ɪts|audio=En-us-minutes.ogg}} 50 | * {{écouter|lang=en|Royaume-Uni (Londres)|ˈmɪn.ɪts|audio=LL-Q1860 (eng)-Back ache-minutes.wav}} 51 | * {{écouter|Texas (États-Unis)||lang=en|audio=LL-Q1860 (eng)-Wodencafe-minutes.wav}} 52 | * {{écouter|Connecticut (États-Unis)||lang=en|audio=LL-Q1860 (eng)-Grendelkhan-minutes.wav}} 53 | -------------------------------------------------------------------------------- /wikidict/lang/pt/variant_handlers.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import defaultdict 3 | 4 | import wikitextparser as wtp 5 | 6 | from ... import context, utils 7 | 8 | 9 | def cleanup(form: str) -> str: 10 | return utils.cleanup_rev_variant(form, rpl={"não "}, skip={"plural", "singular", "subjuntivo"}) 11 | 12 | 13 | def table_to_forms(word: str, wikitext: str) -> list[str]: 14 | wikitext = re.sub(r"\d+", "", wikitext) 15 | wikitext = wikitext.replace("", "\n| ").replace("", "\n| ") 16 | 17 | forms: set[str] = set() 18 | tables = wtp.parse(wikitext).get_tables(recursive=True) 19 | 20 | for table in tables: 21 | cells = table.data(span=False) 22 | for lines in cells: 23 | for item in lines: 24 | if not item or "''" in item: 25 | continue 26 | raw_forms = re.findall(r"\[\[(.+)#\w+\|\1\]\]", item) or [item] 27 | forms.update([cleanup(form) for form in raw_forms]) 28 | 29 | forms.discard(word) 30 | forms.discard("–") 31 | forms.discard("-") 32 | forms.discard("") 33 | 34 | return sorted(forms) 35 | 36 | 37 | def render_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 38 | """ 39 | >>> render_variant("flexion", ["ensimesmar"], defaultdict(str), "") 40 | 'ensimesmar' 41 | """ 42 | return parts[0] 43 | 44 | 45 | def render_reverse_variant(tpl: str, parts: list[str], data: defaultdict[str, str], word: str) -> str: 46 | """ 47 | >>> render_reverse_variant("rev-flexion", ["foo"], defaultdict(str), "") 48 | 'foo' 49 | """ 50 | if tpl == "rev-flexion": 51 | return parts[0] 52 | 53 | table = context.expand(utils.reconstruct_tpl(tpl, parts, data), "da") 54 | if not table.startswith("{|"): 55 | if (idx := table.find("{|")) == -1: 56 | return "" 57 | table = table[idx:] 58 | return "|".join(table_to_forms(word, table)) 59 | 60 | 61 | handlers = { 62 | "flexion": render_variant, 63 | "rev-flexion": render_reverse_variant, 64 | } 65 | 66 | 67 | def append_to_reverse_variants(tpl: str) -> None: 68 | """Dynamically append a template to reverse variants templates.""" 69 | if tpl in handlers: 70 | return 71 | handlers[tpl] = render_reverse_variant 72 | -------------------------------------------------------------------------------- /tests/data/pt/giro-.wiki: -------------------------------------------------------------------------------- 1 | ={{-pt-}}= 2 | ==Afixo== 3 | '''giro-''' 4 | # [[círculo]] 5 | # [[redondo]] 6 | 7 | ===Verbetes derivados=== 8 | {{verTambém.Ini}} 9 | * [[girínida]] 10 | * [[girinídeo]] 11 | * [[girobus]] 12 | * [[girocárpea]] 13 | * [[girocárpeo]] 14 | * [[girocarpo]] 15 | * [[girocécide]] 16 | * [[girocécis]] 17 | * [[girocéfalo]] 18 | * [[girocompasso]] 19 | * [[girocóride]] 20 | * [[girócoris]] 21 | * [[girocótilo]] 22 | * [[girodactílida]] 23 | * [[girodactilídeo]] 24 | * [[girodactilíneo]] 25 | * [[girodáctilo]] 26 | * [[girodatílida]] 27 | * [[girodatilídeo]] 28 | * [[girodatilíneo]] 29 | * [[girodinâmica]] 30 | {{verTambém.NovaColuna}} 31 | * [[giroédrico]] 32 | * [[giroedro]] 33 | * [[giroestabilizador]] 34 | * [[giroeta]] 35 | * [[girofalco]] 36 | * [[girofalte]] 37 | * [[girofle]] 38 | * [[giroflê]] 39 | * [[girofleiro]] 40 | * [[girófora]] 41 | * [[giroforácea]] 42 | * [[giroforáceo]] 43 | * [[giróforo]] 44 | * [[girogirar]] 45 | * [[girógrafo]] 46 | * [[giroípno]] 47 | * [[girolas]] 48 | * [[girolita]] 49 | * [[girolite]] 50 | * [[girolito]] 51 | * [[giroma]] 52 | {{verTambém.NovaColuna}} 53 | * [[giromagnético]] 54 | * [[giromancia]] 55 | * [[giromante]] 56 | * [[giromântico]] 57 | * [[girometria]] 58 | * [[girométrico]] 59 | * [[girômetro]] 60 | * [[giromitra]] 61 | * [[gironado]] 62 | * [[girôneo]] 63 | * [[giropéltis]] 64 | * [[girópida]] 65 | * [[giropídeo]] 66 | * [[giropiloto]] 67 | * [[giropíneo]] 68 | * [[giroplano]] 69 | * [[giropo]] 70 | * [[giróptero]] 71 | * [[girorizonte]] 72 | * [[giroscópico]] 73 | * [[giroscópio]] 74 | {{verTambém.NovaColuna}} 75 | * [[girose]] 76 | * [[girosela]] 77 | * [[girospasmo]] 78 | * [[girossomo]] 79 | * [[girostabilizador]] 80 | * [[girostática]] 81 | * [[girostático]] 82 | * [[giróstato]] 83 | * [[girostemo]] 84 | * [[girostêmon]] 85 | * [[girostemonácea]] 86 | * [[girostemonáceo]] 87 | * [[girostemônea]] 88 | * [[girostemôneo]] 89 | * [[girote]] 90 | * [[giroto]] 91 | * [[girotrém]] 92 | * [[girótropo]] 93 | * [[girovagar]] 94 | * [[giróvago]] 95 | {{verTambém.Fim}} 96 | 97 | =={{etimologia|pt}}== 98 | {{etimo2|grc|γῦρος|pt|gyros|ponto=não}}, pelo {{etm|la|pt}} {{etimo|la|gyrus}}. 99 | 100 | ==Ver também== 101 | ===No Wikcionário=== 102 | * [[giro]] 103 | 104 | [[Categoria:Afixo (Português)]] 105 | -------------------------------------------------------------------------------- /tests/data/fr/acrologie.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|fr}} == 2 | === {{S|étymologie}} === 3 | : Du {{étyl|grc|fr|mot=ἄκρος|tr=akros|sens=extrémité}}, voir ''[[acro-]]'', avec le suffixe ''[[-logie]]''. 4 | 5 | === {{S|nom|fr}} === 6 | {{fr-rég|a.kʁɔ.lɔ.ʒi}} 7 | '''acrologie''' {{pron|a.kʁɔ.lɔ.ʒi|fr}} {{f}} 8 | # {{lexique|linguistique|fr}} {{rare|fr}} [[système|Système]] [[graphique]] qui consiste à [[peindre]], pour représenter les [[idée]]s, l’image des objets dont le nom commence par la même lettre que celui par lequel ces idées sont exprimées dans le [[langage]] ordinaire. 9 | #* {{exemple | lang=fr 10 | | Le disque désigne donc le SOLEIL, le sceptre à tête de chacal, l’idée de GARDIEN, et le scarabée avec les trois traits au dessous, les MONDES. Or en égyptien, le chacal s’appelle ouônch et un gardien ourit. Ces deux mots commencent par la même lettre, ainsi il y a '''acrologie'''. 11 | | source={{w|Julius Klaproth}}, ''[//books.google.fr/books?id=FPUtAAAAYAAJ&pg=PA84&lpg=PA84&dq=acrologie&source=bl&ots=MkV_YLaxvR&sig=1FTDvSzYFjMfJcZPGLpO-eDcxEQ&hl=fr&sa=X&ei=B8HDUImHK8WzhAfDtYHYBQ&redir_esc=y Lettre sur la découverte des hiéroglyphes acrologiques]'', 1827, page 80}} 12 | # {{lexique|linguistique|fr}} {{par extension|fr}} {{rare|fr}} Se dit lorsque [[deux]] [[terme]]s commencent par la même [[lettre]] et qu’ils sont [[apparenté]]s par le [[sens]]. 13 | #* {{exemple | lang=fr | Nature et Nèfle sont une '''acrologie'''.}} 14 | # {{lexique|philosophie|fr}} {{très rare|fr}} [[recherche|Recherche]] ou [[exposition]] des [[principe]]s [[suprême]]s, ou du [[mieux]] [[absolu]]. 15 | # {{lexique|sport|fr}} [[étude|Étude]] ou [[pratique]] de l’[[acrobatie]]. 16 | 17 | ==== {{S|dérivés}} ==== 18 | * [[acrologique]] 19 | * [[acrologiquement]] 20 | 21 | ==== {{S|traductions}} ==== 22 | {{trad-début|}} 23 | * {{T|en}} : {{trad-|en|acrology}} 24 | {{trad-fin}} 25 | 26 | === {{S|prononciation}} === 27 | * {{pron-rimes|a.kʁɔ.lɔ.ʒi|fr}} 28 | * {{écouter|lang=fr|France (Vosges)||audio=LL-Q150 (fra)-LoquaxFR-acrologie.wav}} 29 | * {{écouter|lang=fr|France (Lyon)||audio=LL-Q150 (fra)-WikiLucas00-acrologie.wav}} 30 | * {{écouter|Somain (France)||lang=fr|audio=LL-Q150 (fra)-Jérémy-Günther-Heinz Jähnick-acrologie.wav}} 31 | 32 | === {{S|anagrammes}} === 33 | {{voir anagrammes|fr}} 34 | 35 | === {{S|références}} === 36 | * A. D. Demoustier, Manuel Lexique philologique, didactique et polytechnique, page 20, 1844 37 | 38 | [[Catégorie:Mots en français préfixés avec acro-]] 39 | -------------------------------------------------------------------------------- /tests/data/fr/42.wiki: -------------------------------------------------------------------------------- 1 | == {{langue|conv}} == 2 | === {{S|numéral|conv}} === 3 | '''42''' 4 | # Numéral en [[chiffre arabe|chiffres arabes]] du nombre [[quarante-deux]], en notation décimale. Selon la base utilisée, ce numéral peut représenter d’autres nombres. En notation [[hexadécimal]]e, par exemple, ce numéral représente le nombre [[soixante-six]] ; en [[octal]], le nombre [[trente-quatre]]. 5 | # {{ellipse|conv}} ''(Dans la plupart des langues)'' Une année qui se termine par '''42'''. 6 | 7 | ==== {{S|transcriptions}} ==== 8 | {{numéral}} 9 | 10 | ==== {{S|variantes orthographiques}} ==== 11 | : ''(Ellipse d’une année)'' : 12 | * [[’42]] 13 | 14 | == {{langue|fr}} == 15 | === {{S|étymologie}} === 16 | : {{date|lang=fr}} {{ébauche-étym|fr}} 17 | 18 | === {{S|nom|fr}} === 19 | {{fr-inv|ka.ʁɑ̃t.dø}} 20 | '''42''' {{pron|ka.ʁɑ̃t.dø|fr}} {{invar}} 21 | # [[quarante-deux|Quarante-deux]]. 22 | #* {{exemple | lang=fr | Le numéro gagnant est le '''42'''.}} 23 | # {{ellipse|fr}} Une année qui se termine par '''42'''. 24 | #* {{exemple | lang=fr | Elle a eu son bac en '''42''' (sous-entendu en 1942).}} 25 | # {{FR|fr}} {{familier|fr}} [[habitant|Habitant]] du [[département]] de la [[Loire]]. 26 | #* {{exemple | lang=fr | Les '''42''' de l’année dernière sont arrivés au camping et ont repris le même emplacement.}} 27 | 28 | ==== {{S|dérivés}} ==== 29 | * [[42e|42{{e}}]] 30 | 31 | === {{S|nom propre|fr}} === 32 | {{fr-inv|ka.ʁɑ̃t.dø|inv_titre={{m}} {{s}}}} 33 | '''42''' {{pron|ka.ʁɑ̃t.dø|fr}} {{m}} {{s}} 34 | # {{France|fr}} [[département|Département]] de la [[Loire]]. 35 | #* {{exemple | lang=fr | J’habite dans le '''42'''.}} 36 | 37 | ==== {{S|synonymes}} ==== 38 | * [[quatre deux]] {{familier|nocat=1}} 39 | 40 | === {{S|prononciation}} === 41 | * {{pron-rimes|ka.ʁɑ̃t.dø|fr}} 42 | * {{écouter|lang=fr|France (Vosges)||audio=LL-Q150 (fra)-Poslovitch-42.wav}} 43 | * {{écouter|lang=fr|France (Vosges)||audio=LL-Q150 (fra)-LoquaxFR-42.wav}} 44 | * {{écouter|lang=fr|France (Cesseras)||audio=LL-Q150 (fra)-Guilhelma-42.wav}} 45 | * {{écouter|France (Lyon)||lang=fr|audio=LL-Q150 (fra)-WikiLucas00-42.wav}} 46 | * {{écouter|Mulhouse (France)||lang=fr|audio=LL-Q150 (fra)-Mathieu Kappler-42.wav}} 47 | * {{écouter|Courmayeur (Italie)||niveau=moyen|lang=fr|audio=LL-Q150 (fra)-XANA000-42.wav}} 48 | 49 | === {{S|anagrammes}} === 50 | {{voir anagrammes|fr}} 51 | 52 | === {{S|voir aussi}} === 53 | * {{WP|42 (nombre)}} 54 | 55 | [[Catégorie:Numéros de départements de France en français]] 56 | -------------------------------------------------------------------------------- /wikidict/constants.py: -------------------------------------------------------------------------------- 1 | """Shared constants.""" 2 | 3 | from pathlib import Path 4 | 5 | import requests 6 | 7 | # Dictionaries metadata 8 | PROJECT = "reader.dict" 9 | TITLE = "{project} {langs}" 10 | WEBSITE = "https://www.reader-dict.com" 11 | 12 | # Wiktionary dump URL 13 | BASE_URL = "https://dumps.wikimedia.org/{locale}wiktionary" 14 | DUMP_URL = f"{BASE_URL}/{{snapshot}}/{{locale}}wiktionary-{{snapshot}}-pages-articles.xml.bz2" 15 | 16 | # Wikimedia REST API 17 | WIKIMEDIA_HEADERS = {"User-Agent": WEBSITE} 18 | WIKTIONARY_URL_API = "https://{locale}.wiktionary.org/w/api.php" 19 | WIKIMEDIA_URL_BASE = "https://en.wikipedia.org/api/rest_v1" 20 | WIKIMEDIA_URL_MATH_CHECK = f"{WIKIMEDIA_URL_BASE}/media/math/check/{{type}}" 21 | WIKIMEDIA_URL_MATH_RENDER = f"{WIKIMEDIA_URL_BASE}/media/math/render/{{format}}/{{hash}}" 22 | 23 | # Dictionary file suffix for etymology-free files 24 | NO_ETYMOLOGY_SUFFIX = "-noetym" 25 | 26 | # ZIP files 27 | ZIP_WORDS_COUNT = "words.count" 28 | ZIP_WORDS_SNAPSHOT = "words.snapshot" 29 | 30 | # Algorithm used to compute dictionaries checksum 31 | ASSET_CHECKSUM_ALGO = "sha256" 32 | 33 | # Locales relations 34 | # Example with FRO (Old French) that uses the FR (French) Wiktionary dump as source. 35 | # Syntax: "locale": "origin locale" 36 | LOCALE_ORIGIN = {"fro": "fr"} 37 | 38 | # Dictionaries known to be problematic about the number of chars in MobiPocket 39 | MOBI_CLEANUP = {"en", "en:en", "fr", "fr:fr"} 40 | # Dictionaries known to be problematic about the file size in MobiPocket 41 | MOBI_SKIP: set[str] = set() 42 | 43 | # Mobi 44 | COVER_FILE = Path(__file__).parent / "cover.png" 45 | KINDLEGEN_FILE = Path.home() / ".local" / "bin" / "kindlegen" 46 | 47 | # HTTP requests 48 | SESSION = requests.Session() 49 | SESSION.headers.update(WIKIMEDIA_HEADERS) 50 | 51 | # --parse: modules & templates "end patterns" to ignore when saving them in the database 52 | MODULES_TO_IGNORE = ("/doc", "/documentation", "/sandbox", "/testcases") 53 | 54 | # --render: Lua modules aliases 55 | PARSER_FUNCTIONS_ALIASES = { 56 | "pt": { 57 | "#se": "#if", 58 | "#seigual": "#ifeq", 59 | "#seerro": "#iferror", 60 | "#seexiste": "#ifexist", 61 | "#seexpr": "#ifexpr", 62 | } 63 | } 64 | 65 | # --parse: HTML entities to replace in modules & templates contents 66 | HTML_REPL_BODY = { 67 | # Found in modules importing another module 68 | """: '"', 69 | } 70 | HTML_REPL_TITLE = {"&": "&"} 71 | -------------------------------------------------------------------------------- /tests/data/no/et.wiki: -------------------------------------------------------------------------------- 1 | {{se også|-et}} 2 | ==Norsk== 3 | ===Artikkel=== 4 | {{infl|nb|artikkel}} 5 | # artikkel for substantiv i ubestemt entall, av intetkjønn 6 | * '''et''' hus 7 | 8 | ====Andre former==== 9 | *{{andre former|eit|nn=ja|nb=nei|nrm=nei}} 10 | 11 | ====Etymologi==== 12 | {{etymologi mangler|språk=no}} 13 | 14 | ====Uttale==== 15 | {{lyd|no-et.ogg|Lyd (Dialekt: Oslo)}} 16 | 17 | ====Grammatikk==== 18 | * {{norm|nb=ja}}: [[en]], [[ei]], [[et]] 19 | * {{norm|nrm=ja}}: [[en]], ([[ei]]), [[et]] 20 | * {{norm|nn=ja}}: [[ein]], [[ei]], [[eit]] 21 | 22 | ====Oversettelser==== 23 | {{overs-se|ubestemt artikkel entall|[[en]] / [[ein]]}} 24 | 25 | [[Kategori:100 vanligste ord i norsk]] 26 | 27 | ===Verb=== 28 | '''{{PAGENAME}}''' {{norm|nb=ja|nrm=ja|nn=ja}} 29 | 30 | #{{no-verb-bøyningsform|imp|ete|nb=ja|nn=ja}} 31 | 32 | 33 | 34 | ---- 35 | 36 | ==Dansk== 37 | ===Artikkel=== 38 | {{infl|da|artikkel}} 39 | 40 | #{{l|no|et}}; artikkel for substantiv i ubestemt entall, av intetkjønn 41 | 42 | ====Se også==== 43 | *{{l|da|en}} 44 | 45 | 46 | ==Flerspråklig== 47 | ===Forkortelse=== 48 | '''et''' 49 | 50 | #Den offisielle [[:w:Liste over ISO 639-1-koder|ISO 639-1]]-språkkoden for '''[[estisk]]'''. 51 | # ''Forkortelse for'' etasje 52 | 53 | [[Kategori:ISO 639-1]] 54 | ====Synonymer==== 55 | *[[est]] (ISO 639-2, ISO 639-3) 56 | 57 | 58 | ---- 59 | 60 | ==Fransk== 61 | ===Konjunksjon=== 62 | {{infl|fr|konj}} 63 | # [[og]] 64 | 65 | ====Etymologi==== 66 | {{opphav|latin|språk=fr}} 67 | 68 | ====Uttale==== 69 | * {{IPA|/e/|språk=fr}} 70 | * {{SAMPA|/e/|språk=fr}} 71 | 72 | ====Homonymer==== 73 | * {{IPA|/e/|språk=fr}} 74 | ** [[ai]], [[ait]] → se ''[[avoir]]'' 75 | ** [[eh]]! 76 | ** [[ais]] 77 | 78 | * {{IPA|/ɛ/|språk=fr}} 79 | ** [[haie]] 80 | ** [[es]], [[est]] → se ''[[être]]'' 81 | 82 | ==Karakalpakisk== 83 | ===Substantiv=== 84 | {{kaa-sub}} 85 | # [[kjøtt]] 86 | 87 | ====Synonymer==== 88 | * [[gósh]] 89 | 90 | ====Beslektede termer==== 91 | [[etli]] 92 | 93 | ==Krimtatarisk== 94 | ===Substantiv=== 95 | {{crh-sub}} 96 | # [[kjøtt]] 97 | 98 | ====Beslektede termer==== 99 | [[etli]] 100 | 101 | ==Turkmensk== 102 | ===Substantiv=== 103 | {{tk-sub}} 104 | # [[kjøtt]] 105 | 106 | ====Grammatikk==== 107 | {{tk-sub-bøyning|et|etiň|ete|eti|etde|etden|etler|etleriň|etlere|etleri|etlerde|etlerden}} 108 | 109 | ====Beslektede termer==== 110 | [[etli]] 111 | -------------------------------------------------------------------------------- /tests/data/ca/bio-.wiki: -------------------------------------------------------------------------------- 1 | {{vegeu|bio}} 2 | 3 | == {{-ca-}} == 4 | 5 | === Prefix === 6 | {{entrada|ca|prefix}} 7 | 8 | # Element que entra en la composició de paraules amb el sentit de {{m|ca|vida}}. 9 | 10 | {{-sin-}} 11 | * Sufix: [[-bi]] 12 | 13 | ==== Derivats ==== 14 | {{rel-top}} 15 | * [[bioactiu]] 16 | * [[bioacumulació]] 17 | * [[bioacústica]] 18 | * [[bioalcohol]] 19 | * [[bioalimentació]] 20 | * [[biobalística]] 21 | * [[biobibliografia]] 22 | * [[biocenologia]] 23 | * [[biocibernètica]] 24 | * [[biocida]] 25 | * [[biocinètica]] 26 | * [[biocircuit]] 27 | * [[bioclimatologia]] 28 | * [[bioclínica]] 29 | * [[biocompatible]] 30 | * [[biocuina]] 31 | * [[biodegradar]] 32 | * [[bioderma]] 33 | * [[biodeterioració]] 34 | * [[biodinàmica]] 35 | * [[biodisc]] 36 | * [[biodisponibilitat]] 37 | * [[bioecologia]] 38 | * [[bioelectricitat]] 39 | * [[bioenergètica]] 40 | * [[bioenergia]] 41 | * [[bioenginyeria]] 42 | * [[bioespeleologia]] 43 | * [[bioestadística]] 44 | * [[bioestratigrafia]] 45 | * [[bioètica]] 46 | * [[bioevolutiu]] 47 | * [[biofàgia]] 48 | * [[biofarmacologia]] 49 | * [[biofísica]] 50 | * [[biofotogènesi]] 51 | * [[biogènia]] 52 | * [[biògen]] 53 | * [[biogenèsia]] 54 | * [[biogeogenosi]] 55 | * [[biogeografia]] 56 | * [[biogeoquímica]] 57 | * [[bioindicador]] 58 | {{rel-mid}} 59 | * [[bioindústria]] 60 | * [[bioinformàtica]] 61 | * [[bioinsecticida]] 62 | * [[biòlisi]] 63 | * '''[[biologia]]''' (vegeu-hi més compostos) 64 | * [[bioluminiscent]] 65 | * [[biomagnetisme]] 66 | * [[biomaterial]] 67 | * [[biomecànica]] 68 | * [[biomedicina]] 69 | * [[biomembrana]] 70 | * [[biometal·lurgia]] 71 | * [[biometeorologia]] 72 | * [[biometria]] 73 | * [[biomimètica]] 74 | * [[bionòmic]] 75 | * [[bionutrient]] 76 | * [[biopoesi]] 77 | * [[biopolítica]] 78 | * [[bioproteïna]] 79 | * [[bioquímica]] 80 | * [[bioreactor]] 81 | * [[bioregulador]] 82 | * [[bioretroacció]] 83 | * [[bioritme]] 84 | * [[biorreologia]] 85 | * [[biosensor]] 86 | * [[biosfera]] 87 | * [[biosíntesi]] 88 | * [[biosistemàtica]] 89 | * [[biosociologia]] 90 | * [[biosoma]] 91 | * [[biostàsia]] 92 | * [[biostàtica]] 93 | * [[biotàxia]] 94 | * [[biotecnologia]] 95 | * [[biotelemetria]] 96 | * [[bioteràpia]] 97 | * [[biotip]] 98 | * [[biòtop]] 99 | * [[biotractament]] 100 | * [[bitransistor]] 101 | * [[bioxip]] 102 | {{rel-bottom}} 103 | 104 | == {{-fr-}} == 105 | 106 | === Prefix === 107 | {{entrada|fr|prefix}} 108 | 109 | # [[#ca|bio-]] 110 | -------------------------------------------------------------------------------- /tests/data/da/mus.wiki: -------------------------------------------------------------------------------- 1 | {{også|Mus}} 2 | {{wikipedia}} 3 | {{=da=}} 4 | [[Image:House mouse.jpg|thumb|right|En husmus]] 5 | [[Image:3-Tasten-Maus Microsoft.jpg|thumb|right|En computermus]] 6 | {{-etym-}} 7 | # Fra {{etyl|non|da}} {{term|mús|lang=non}}. 8 | # Fra {{etyl|en|da}} {{term|mouse|lang=en}}. 9 | {{-noun-|da}} 10 | {{pn}} {{c}} 11 | # (''zoologi'') [[pattedyr]] 12 | #:''[[Min]] [[kat]] [[være|er]] [[bange]] [[for]] [[mus]]''.'' 13 | # {{data|dansk}} en enhed som tilsluttes computere 14 | {{-decl-}} 15 | {{da-noun-infl|en|2=mus}} 16 | {{-rel-}} 17 | * [[computermus]] 18 | * [[hasselmus]] 19 | * [[husmus]] 20 | * [[markmus]] 21 | * [[musearm]] 22 | * [[musefælde]] 23 | * [[muselort]] 24 | * [[musemåtte]] 25 | * [[museskade]] 26 | * [[museunge]] 27 | {{-trans-}} 28 | {{(}} 29 | * {{af}}: {{t|af|muis}} 30 | * {{en}}: {{t|en|mouse}} 31 | * {{fi}}: {{t|fi|hiiri}} 32 | * {{fr}}: {{t|fr|souris|f}} 33 | * {{fo}}: {{t|fo|mús}} 34 | * {{el}}: {{t|el|ποντικός}} 35 | * {{is}}: {{t|is|mús}} 36 | * {{it}}: {{t|it|topo|m}} 37 | * {{O|ku|mişk|m}} 38 | {{-}} 39 | * {{lb}}: {{t|lb|Maus}} 40 | * {{nl}}: {{t|nl|muis|f}} 41 | * {{no}}: {{t|no|mus|c}} 42 | * {{pl}}: {{t|pl|mysz|f}} 43 | * {{ru}}: {{t|ru|мышь|f}} (myšʹ) 44 | * {{es}}: {{t|es|ratón|m}} 45 | * {{sv}}: {{t|sv|mus|c}} 46 | * {{tr}}: {{t|tr|fare}} 47 | * {{de}}: {{t|de|Maus|f}} 48 | * {{cy}}: {{t|cy|llygoden|f}}, {{t|cy|llygodyn|m}} 49 | {{)}} 50 | [[Kategori:Pattedyr på dansk]] 51 | 52 | {{=nl=}} 53 | {{-pronun-}} 54 | :{{audio|nl-mus.ogg|mus}} 55 | {{-noun-|nl}} 56 | {{pn}} {{f}} 57 | #''zool.'' [[spurv]] 58 | {{-decl-}} 59 | {{nl-noun|de|mus|musje|mussen|musjes}} 60 | {{-rel-}} 61 | {| 62 | |- valign=top 63 | | 64 | *[[baardgrasmus]] 65 | *[[bergmus]] 66 | *[[boommus]] 67 | | 68 | *[[grasmus]] 69 | *[[heggenmus]] 70 | *[[huismus]] 71 | | 72 | *[[mannetjesmus]] 73 | *[[meimus]] 74 | *[[rietmus]] 75 | | 76 | *[[ringelmus]] 77 | *[[ringmus]] 78 | *[[roodmus]] 79 | | 80 | *[[rotsmus]] 81 | *[[straatmus]] 82 | *[[wijfjesmus]] 83 | |} 84 | [[Kategori:Fugle på nederlandsk]] 85 | 86 | {{=sv=}} 87 | {{-noun-|sv}} 88 | {{pn}} 89 | # en [[#Dansk|mus]] (dyr) 90 | # en mus (til computere) 91 | {{-decl-}} 92 | {{sv-noun-irreg-c|3=möss|4=mössen|5=mus|7=möss}} 93 | {{-rel-}} 94 | ;dyr 95 | * [[hasselmus]] 96 | * [[husmus]] 97 | * [[musfälla]] 98 | * [[musgrå]] 99 | * [[näbbmus]] 100 | ;computere 101 | * [[datormus]] 102 | * [[musknapp]] 103 | * [[musmatta]] 104 | * [[muspekare]] 105 | {{-ref-}} 106 | * {{SAOL}} 107 | [[Kategori:Pattedyr på svensk]] 108 | --------------------------------------------------------------------------------
</span>
(65535).toString(16) === 'ffff'
=(65535).toString(16) === 'ffff'
{value}
my ''@''teile = split m[/], $unixpfad;