├── .circleci └── config.yml ├── .codacy.yml ├── .codeclimate.yml ├── .coveragerc ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── codeql.yml │ ├── pylint.yml │ └── tests.yml ├── .gitignore ├── .sonarcloud.properties ├── ACKS.md ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── VERSION ├── WORKS_USING_WORDLESS.md ├── appveyor.yml ├── azure-pipelines.yml ├── data ├── __init__.py ├── bamberger_vanecek_most_common_words_1000.txt ├── dale_list_easy_words_3000.txt ├── dale_list_easy_words_769.txt ├── luong_nguyen_dinh_freq_syls_easy_1000.txt ├── spache_word_list.txt └── unifont-16.0.04.otf ├── doc ├── doc.md ├── e_11.svg ├── e_12.svg ├── e_21.svg ├── e_22.svg ├── measures │ ├── dispersion_adjusted_frequency │ │ ├── ald.svg │ │ ├── arf.svg │ │ ├── awt.svg │ │ ├── carrolls_um.svg │ │ ├── engwalls_fm.svg │ │ ├── griess_dp.svg │ │ ├── juillands_u.svg │ │ ├── kromers_ur.svg │ │ ├── lynes_d3.svg │ │ ├── rosengrens_s.svg │ │ └── zhangs_distributional_consistency.svg │ ├── effect_size │ │ ├── conditional_probability.svg │ │ ├── delta_p.svg │ │ ├── dice_sorensen_coeff.svg │ │ ├── diff_coeff.svg │ │ ├── im2.svg │ │ ├── im3.svg │ │ ├── jaccard_index.svg │ │ ├── kilgarriffs_ratio.svg │ │ ├── log_dice.svg │ │ ├── log_ratio.svg │ │ ├── me.svg │ │ ├── mi.svg │ │ ├── mi_log_f.svg │ │ ├── min_sensitivity.svg │ │ ├── mu_val.svg │ │ ├── nmi.svg │ │ ├── npmi.svg │ │ ├── or.svg │ │ ├── pct_diff.svg │ │ ├── pmi.svg │ │ ├── poisson_collocation_measure.svg │ │ ├── rr.svg │ │ └── squared_phi_coeff.svg │ ├── f_x1_bar.svg │ ├── f_x2_bar.svg │ ├── lexical_density_diversity │ │ ├── brunets_index.svg │ │ ├── cttr.svg │ │ ├── fishers_index_of_diversity.svg │ │ ├── herdans_vm.svg │ │ ├── honores_stat.svg │ │ ├── lexical_density.svg │ │ ├── logttr.svg │ │ ├── mattr.svg │ │ ├── msttr.svg │ │ ├── popescu_macutek_altmanns_b1_b2_b3_b4_b5.svg │ │ ├── repeat_rate.svg │ │ ├── rttr.svg │ │ ├── shannon_entropy.svg │ │ ├── simpsons_l.svg │ │ ├── ttr.svg │ │ ├── yules_characteristic_k.svg │ │ └── yules_index_of_diversity.svg │ ├── readability │ │ ├── aari.svg │ │ ├── ari.svg │ │ ├── bormuths_cloze_mean_gp.svg │ │ ├── coleman_liau_index.svg │ │ ├── colemans_readability_formula.svg │ │ ├── cp.svg │ │ ├── crawfords_readability_formula.svg │ │ ├── danielson_bryans_readability_formula.svg │ │ ├── dawoods_readability_formula.svg │ │ ├── devereux_readability_index.svg │ │ ├── dickes_steiwer_handformel.svg │ │ ├── drp.svg │ │ ├── eflaw.svg │ │ ├── elf.svg │ │ ├── fog_index.svg │ │ ├── fuckss_stilcharakteristik.svg │ │ ├── gl.svg │ │ ├── gulpease.svg │ │ ├── lensear_write_formula.svg │ │ ├── lix.svg │ │ ├── lorge_readability_index.svg │ │ ├── luong_nguyen_dinhs_readability_formula.svg │ │ ├── mu.svg │ │ ├── nwl.svg │ │ ├── nws.svg │ │ ├── osman.svg │ │ ├── rd.svg │ │ ├── re.svg │ │ ├── re_farr_jenkins_paterson.svg │ │ ├── rgl.svg │ │ ├── rix.svg │ │ ├── smog_grading.svg │ │ ├── spache_readability_formula.svg │ │ ├── strain_index.svg │ │ ├── td.svg │ │ ├── trankle_bailers_readability_formula.svg │ │ ├── wheeler_smiths_readability_formula.svg │ │ └── x_c50.svg │ └── statistical_significance │ │ ├── log_likehood_ratio_test.svg │ │ ├── pearsons_chi_squared_test.svg │ │ ├── students_t_test_1_sample.svg │ │ ├── students_t_test_2_sample.svg │ │ ├── z_test.svg │ │ └── z_test_berry_rogghe.svg ├── trs │ ├── zho_cn │ │ ├── ACKS.md │ │ ├── CONTRIBUTING.md │ │ ├── README.md │ │ └── WORKS_USING_WORDLESS.md │ └── zho_tw │ │ ├── ACKS.md │ │ ├── CONTRIBUTING.md │ │ ├── README.md │ │ └── WORKS_USING_WORDLESS.md └── wl_logo.png ├── imgs ├── donating_alipay.png ├── donating_paypal.gif ├── donating_wechat_pay.png ├── wechat_official_account.jpg ├── wl_icon.icns ├── wl_icon.ico ├── wl_icon_about.png └── wl_loading.png ├── pylintrc ├── requirements ├── requirements_dev.txt ├── requirements_tests.txt └── requirements_tests_macos.txt ├── tests ├── __init__.py ├── files │ ├── file_area │ │ ├── [eng_gb] Charlotte Brontë. (1847). Jane Eyre—An autobiography.txt │ │ ├── [eng_gb] Emily Brontë. (1847). Wuthering heights.txt │ │ ├── [eng_gb] Jane Austen. (1813). Pride and prejudice.txt │ │ ├── [eng_us] F. Scott Fitzgerald. (1925). The great Gatsby.txt │ │ ├── [eng_us] Herman Melville. (1851). Moby-Dick; or, the whale.txt │ │ ├── [eng_us] Mark Twain. (1876). The adventures of Tom Sawyer.txt │ │ └── misc │ │ │ ├── [eng_gb] Tagged.txt │ │ │ ├── [eng_us] First token is a punctuation mark.txt │ │ │ ├── [eng_us] TTR = 1.txt │ │ │ ├── [eng_us] Tags at start of text.txt │ │ │ └── [other] No language support.txt │ ├── search_terms │ │ ├── empty.txt │ │ └── unicode_decode_error (utf_16).txt │ ├── wl_checks │ │ └── wl_checks_files │ │ │ ├── dup.txt │ │ │ ├── dup.xml │ │ │ ├── empty_docx.docx │ │ │ └── empty_txt.txt │ └── wl_file_area │ │ ├── file_types │ │ ├── csv.csv │ │ ├── docx.docx │ │ ├── html.html │ │ ├── lrc.lrc │ │ ├── pdf.pdf │ │ ├── pptx.pptx │ │ ├── tmx.tmx │ │ ├── xlsx.xlsx │ │ └── xml.xml │ │ ├── misc │ │ └── vie_tokenized.txt │ │ ├── tags │ │ ├── tokenized_tagged.txt │ │ ├── tokenized_untagged.txt │ │ ├── untokenized_tagged.txt │ │ └── untokenized_untagged.txt │ │ └── unicode_decode_error │ │ ├── unicode_decode_error (utf_16).csv │ │ ├── unicode_decode_error (utf_16).html │ │ ├── unicode_decode_error (utf_16).txt │ │ └── unicode_decode_error (utf_16).xml ├── test_colligation_extractor.py ├── test_collocation_extractor.py ├── test_concordancer.py ├── test_concordancer_parallel.py ├── test_dependency_parser.py ├── test_keyword_extractor.py ├── test_main.py ├── test_ngram_generator.py ├── test_profiler.py ├── test_wordlist_generator.py ├── tests_checks │ ├── __init__.py │ ├── test_checks_files.py │ ├── test_checks_misc.py │ ├── test_checks_tokens.py │ └── test_checks_work_area.py ├── tests_dialogs │ ├── __init__.py │ ├── test_dialogs.py │ ├── test_dialogs_errs.py │ └── test_dialogs_misc.py ├── tests_figs │ ├── __init__.py │ ├── test_figs.py │ ├── test_figs_freqs.py │ └── test_figs_stats.py ├── tests_file_area │ ├── __init__.py │ └── test_file_area_file_types.py ├── tests_measures │ ├── __init__.py │ ├── test_measure_utils.py │ ├── test_measures_adjusted_freq.py │ ├── test_measures_bayes_factor.py │ ├── test_measures_dispersion.py │ ├── test_measures_effect_size.py │ ├── test_measures_lexical_density_diversity.py │ ├── test_measures_misc.py │ ├── test_measures_readability.py │ └── test_measures_statistical_significance.py ├── tests_nlp │ ├── __init__.py │ ├── test_dependency_parsing.py │ ├── test_lemmatization.py │ ├── test_matching.py │ ├── test_nlp_utils.py │ ├── test_pos_tagging.py │ ├── test_sentence_tokenization.py │ ├── test_sentiment_analysis.py │ ├── test_stop_word_lists.py │ ├── test_syl_tokenization.py │ ├── test_texts.py │ ├── test_word_detokenization.py │ ├── test_word_tokenization.py │ ├── tests_spacy │ │ ├── __init__.py │ │ ├── test_spacy.py │ │ ├── test_spacy_cat.py │ │ ├── test_spacy_dan.py │ │ ├── test_spacy_deu.py │ │ ├── test_spacy_ell.py │ │ ├── test_spacy_eng.py │ │ ├── test_spacy_fin.py │ │ ├── test_spacy_fra.py │ │ ├── test_spacy_hrv.py │ │ ├── test_spacy_ita.py │ │ ├── test_spacy_jpn.py │ │ ├── test_spacy_kor.py │ │ ├── test_spacy_lit.py │ │ ├── test_spacy_mkd.py │ │ ├── test_spacy_nld.py │ │ ├── test_spacy_nob.py │ │ ├── test_spacy_pol.py │ │ ├── test_spacy_por.py │ │ ├── test_spacy_ron.py │ │ ├── test_spacy_rus.py │ │ ├── test_spacy_slv.py │ │ ├── test_spacy_spa.py │ │ ├── test_spacy_swe.py │ │ ├── test_spacy_ukr.py │ │ └── test_spacy_zho.py │ └── tests_stanza │ │ ├── __init__.py │ │ ├── test_stanza.py │ │ ├── test_stanza_afr.py │ │ ├── test_stanza_ang.py │ │ ├── test_stanza_ara.py │ │ ├── test_stanza_bel.py │ │ ├── test_stanza_bul.py │ │ ├── test_stanza_bxr.py │ │ ├── test_stanza_cat.py │ │ ├── test_stanza_ces.py │ │ ├── test_stanza_chu.py │ │ ├── test_stanza_cop.py │ │ ├── test_stanza_cym.py │ │ ├── test_stanza_dan.py │ │ ├── test_stanza_deu.py │ │ ├── test_stanza_ell.py │ │ ├── test_stanza_eng.py │ │ ├── test_stanza_est.py │ │ ├── test_stanza_eus.py │ │ ├── test_stanza_fao.py │ │ ├── test_stanza_fas.py │ │ ├── test_stanza_fin.py │ │ ├── test_stanza_fra.py │ │ ├── test_stanza_fro.py │ │ ├── test_stanza_gla.py │ │ ├── test_stanza_gle.py │ │ ├── test_stanza_glg.py │ │ ├── test_stanza_glv.py │ │ ├── test_stanza_got.py │ │ ├── test_stanza_grc.py │ │ ├── test_stanza_hbo.py │ │ ├── test_stanza_heb.py │ │ ├── test_stanza_hin.py │ │ ├── test_stanza_hrv.py │ │ ├── test_stanza_hsb.py │ │ ├── test_stanza_hun.py │ │ ├── test_stanza_hye.py │ │ ├── test_stanza_ind.py │ │ ├── test_stanza_isl.py │ │ ├── test_stanza_ita.py │ │ ├── test_stanza_jpn.py │ │ ├── test_stanza_kat.py │ │ ├── test_stanza_kaz.py │ │ ├── test_stanza_kir.py │ │ ├── test_stanza_kmr.py │ │ ├── test_stanza_kor.py │ │ ├── test_stanza_kpv.py │ │ ├── test_stanza_lat.py │ │ ├── test_stanza_lav.py │ │ ├── test_stanza_lij.py │ │ ├── test_stanza_lit.py │ │ ├── test_stanza_lzh.py │ │ ├── test_stanza_mar.py │ │ ├── test_stanza_mlt.py │ │ ├── test_stanza_mya.py │ │ ├── test_stanza_myv.py │ │ ├── test_stanza_nds.py │ │ ├── test_stanza_nld.py │ │ ├── test_stanza_nno.py │ │ ├── test_stanza_nob.py │ │ ├── test_stanza_orv.py │ │ ├── test_stanza_ota.py │ │ ├── test_stanza_pcm.py │ │ ├── test_stanza_pol.py │ │ ├── test_stanza_por.py │ │ ├── test_stanza_qpm.py │ │ ├── test_stanza_ron.py │ │ ├── test_stanza_rus.py │ │ ├── test_stanza_san.py │ │ ├── test_stanza_slk.py │ │ ├── test_stanza_slv.py │ │ ├── test_stanza_sme.py │ │ ├── test_stanza_snd.py │ │ ├── test_stanza_spa.py │ │ ├── test_stanza_sqi.py │ │ ├── test_stanza_srp_latn.py │ │ ├── test_stanza_swe.py │ │ ├── test_stanza_tam.py │ │ ├── test_stanza_tel.py │ │ ├── test_stanza_tha.py │ │ ├── test_stanza_tur.py │ │ ├── test_stanza_uig.py │ │ ├── test_stanza_ukr.py │ │ ├── test_stanza_urd.py │ │ ├── test_stanza_vie.py │ │ ├── test_stanza_wol.py │ │ ├── test_stanza_xcl.py │ │ ├── test_stanza_zho_cn.py │ │ └── test_stanza_zho_tw.py ├── tests_results │ ├── __init__.py │ ├── test_results_filter.py │ ├── test_results_search.py │ └── test_results_sort.py ├── tests_settings │ ├── __init__.py │ ├── test_settings.py │ ├── test_settings_default.py │ ├── test_settings_dependency_parsing.py │ ├── test_settings_figs.py │ ├── test_settings_files.py │ ├── test_settings_general.py │ ├── test_settings_global.py │ ├── test_settings_lemmatization.py │ ├── test_settings_measures.py │ ├── test_settings_pos_tagging.py │ ├── test_settings_sentence_tokenization.py │ ├── test_settings_sentiment_analysis.py │ ├── test_settings_stop_word_lists.py │ ├── test_settings_syl_tokenization.py │ ├── test_settings_tables.py │ └── test_settings_word_tokenization.py ├── tests_utils │ ├── __init__.py │ ├── test_conversion.py │ ├── test_detection.py │ ├── test_excs.py │ ├── test_misc.py │ ├── test_paths.py │ ├── test_sorting.py │ └── test_threading.py ├── tests_widgets │ ├── __init__.py │ ├── test_boxes.py │ ├── test_buttons.py │ ├── test_editors.py │ ├── test_item_delegates.py │ ├── test_labels.py │ ├── test_layouts.py │ ├── test_lists.py │ └── test_widgets.py ├── wl_test_doc.py ├── wl_test_file_area.py ├── wl_test_init.py └── wl_test_lang_examples.py ├── trs ├── zho_cn.ts └── zho_tw.ts ├── utils ├── __init__.py ├── data_luong_nguyen_dinh_freq_syls_easy_1000.py ├── linux_compile_py_from_src.sh ├── linux_create_shortcut.py ├── wl_download_ci.py ├── wl_download_modern_botok.py ├── wl_generate_acks.py ├── wl_packaging.py ├── wl_packaging.spec ├── wl_trs_generate_ts_files.py ├── wl_trs_translate.py ├── wl_trs_utils.py └── wl_trs_zho_tw.py └── wordless ├── __init__.py ├── wl_checks ├── __init__.py ├── wl_checks_files.py ├── wl_checks_misc.py ├── wl_checks_tokens.py └── wl_checks_work_area.py ├── wl_colligation_extractor.py ├── wl_collocation_extractor.py ├── wl_concordancer.py ├── wl_concordancer_parallel.py ├── wl_dependency_parser.py ├── wl_dialogs ├── __init__.py ├── wl_dialogs.py ├── wl_dialogs_errs.py └── wl_dialogs_misc.py ├── wl_figs ├── __init__.py ├── wl_figs.py ├── wl_figs_freqs.py └── wl_figs_stats.py ├── wl_file_area.py ├── wl_keyword_extractor.py ├── wl_main.py ├── wl_measures ├── __init__.py ├── wl_measure_utils.py ├── wl_measures_adjusted_freq.py ├── wl_measures_bayes_factor.py ├── wl_measures_dispersion.py ├── wl_measures_effect_size.py ├── wl_measures_lexical_density_diversity.py ├── wl_measures_misc.py ├── wl_measures_readability.py └── wl_measures_statistical_significance.py ├── wl_ngram_generator.py ├── wl_nlp ├── __init__.py ├── wl_dependency_parsing.py ├── wl_lemmatization.py ├── wl_matching.py ├── wl_nlp_utils.py ├── wl_pos_tagging.py ├── wl_sentence_tokenization.py ├── wl_sentiment_analysis.py ├── wl_stop_word_lists.py ├── wl_syl_tokenization.py ├── wl_texts.py ├── wl_token_processing.py ├── wl_word_detokenization.py └── wl_word_tokenization.py ├── wl_profiler.py ├── wl_results ├── __init__.py ├── wl_results_filter.py ├── wl_results_search.py └── wl_results_sort.py ├── wl_settings ├── __init__.py ├── wl_settings.py ├── wl_settings_default.py ├── wl_settings_dependency_parsing.py ├── wl_settings_figs.py ├── wl_settings_files.py ├── wl_settings_general.py ├── wl_settings_global.py ├── wl_settings_lemmatization.py ├── wl_settings_measures.py ├── wl_settings_pos_tagging.py ├── wl_settings_sentence_tokenization.py ├── wl_settings_sentiment_analysis.py ├── wl_settings_stop_word_lists.py ├── wl_settings_syl_tokenization.py ├── wl_settings_tables.py └── wl_settings_word_tokenization.py ├── wl_tagsets ├── __init__.py ├── wl_tagset_cat_universal.py ├── wl_tagset_dan_universal.py ├── wl_tagset_ell_universal.py ├── wl_tagset_eng_penn_treebank.py ├── wl_tagset_eng_universal.py ├── wl_tagset_eus_universal.py ├── wl_tagset_fra_universal.py ├── wl_tagset_hun_universal.py ├── wl_tagset_hye_universal.py ├── wl_tagset_jpn_unidic.py ├── wl_tagset_khm_alt.py ├── wl_tagset_kor_mecab.py ├── wl_tagset_lao_seqlabeling.py ├── wl_tagset_lao_yunshan_cup_2020.py ├── wl_tagset_nor_universal.py ├── wl_tagset_pcm_universal.py ├── wl_tagset_por_universal.py ├── wl_tagset_rus_open_corpora.py ├── wl_tagset_rus_russian_national_corpus.py ├── wl_tagset_rus_universal.py ├── wl_tagset_spa_universal.py ├── wl_tagset_tha_blackboard.py ├── wl_tagset_tha_orchid.py ├── wl_tagset_ukr_universal.py ├── wl_tagset_vie_underthesea.py ├── wl_tagset_xcl_universal.py └── wl_tagset_xct_botok.py ├── wl_utils ├── __init__.py ├── wl_conversion.py ├── wl_detection.py ├── wl_excs.py ├── wl_misc.py ├── wl_paths.py ├── wl_sorting.py └── wl_threading.py ├── wl_widgets ├── __init__.py ├── wl_boxes.py ├── wl_buttons.py ├── wl_editors.py ├── wl_item_delegates.py ├── wl_labels.py ├── wl_layouts.py ├── wl_lists.py ├── wl_tables.py └── wl_widgets.py └── wl_wordlist_generator.py /.codacy.yml: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: CI - Codacy 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | --- 20 | exclude_paths: 21 | - "data/**" 22 | - "doc/**" 23 | - "imgs/**" 24 | - "tests/files/**" 25 | - "trs/**" 26 | -------------------------------------------------------------------------------- /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: CI - Code Climate 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | version: "2" # required to adjust maintainability checks 20 | 21 | checks: 22 | argument-count: 23 | enabled: true 24 | config: 25 | threshold: 4 26 | complex-logic: 27 | enabled: true 28 | config: 29 | threshold: 4 30 | file-lines: 31 | enabled: false 32 | config: 33 | threshold: 250 34 | method-complexity: 35 | enabled: false 36 | config: 37 | threshold: 5 38 | method-count: 39 | enabled: true 40 | config: 41 | threshold: 20 42 | method-lines: 43 | enabled: false 44 | config: 45 | threshold: 25 46 | nested-control-flow: 47 | enabled: true 48 | config: 49 | threshold: 4 50 | return-statements: 51 | enabled: true 52 | config: 53 | threshold: 4 54 | similar-code: 55 | enabled: false 56 | config: 57 | threshold: #language-specific defaults. overrides affect all languages. 58 | identical-code: 59 | enabled: false 60 | config: 61 | threshold: #language-specific defaults. overrides affect all languages. 62 | 63 | exclude_patterns: 64 | - "data/**" 65 | - "doc/**" 66 | - "imgs/**" 67 | - "tests/files/**" 68 | - "trs/**" 69 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: CI - Codecov 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | [run] 20 | omit = 21 | .circleci/* 22 | .github/* 23 | data/* 24 | doc/* 25 | imgs/* 26 | requirements/* 27 | tests/* 28 | trs/* 29 | utils/* 30 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # GitHub: Sponsor button 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | custom: https://github.com/BLKSerene/Wordless#donating 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | --- 20 | name: Bug report 21 | about: Create a report to help us improve 22 | title: '' 23 | labels: '' 24 | assignees: '' 25 | 26 | --- 27 | 28 | **Describe the bug** 29 | A clear and concise description of what the bug is. 30 | 31 | **To Reproduce** 32 | Steps to reproduce the behavior: 33 | 1. Go to '...' 34 | 2. Click on '....' 35 | 3. Scroll down to '....' 36 | 4. See error 37 | 38 | **Expected behavior** 39 | A clear and concise description of what you expected to happen. 40 | 41 | **Screenshots** 42 | If applicable, add screenshots to help explain your problem. 43 | 44 | **Environment information** 45 | - Operating System: [e.g. Windows 11 24H2 64-bit, macOS Sequoia 15.3, Ubuntu 24.04.1 64-bit] 46 | - Wordless Version: [e.g. 3.5.0] 47 | 48 | **Additional context** 49 | Add any other context about the problem here. 50 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | --- 20 | name: Feature request 21 | about: Suggest an idea for this project 22 | title: '' 23 | labels: '' 24 | assignees: '' 25 | 26 | --- 27 | 28 | **Is your feature request related to a problem? Please describe.** 29 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 30 | 31 | **Describe the solution you'd like** 32 | A clear and concise description of what you want to happen. 33 | 34 | **Describe alternatives you've considered** 35 | A clear and concise description of any alternative solutions or features you've considered. 36 | 37 | **Additional context** 38 | Add any other context or screenshots about the feature request here. 39 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: CI - Pylint 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | name: "Pylint" 20 | 21 | on: [push, pull_request] 22 | 23 | jobs: 24 | build: 25 | runs-on: ubuntu-latest 26 | 27 | steps: 28 | - uses: actions/checkout@v4 29 | 30 | - name: Setup Python 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: '3.11' 34 | architecture: 'x64' 35 | 36 | - name: Install dependencies 37 | run: | 38 | python -m pip install --upgrade pip setuptools 39 | pip install pylint 40 | 41 | - name: Analysing the code with Pylint 42 | run: | 43 | pylint $(git ls-files '*.py') 44 | -------------------------------------------------------------------------------- /.sonarcloud.properties: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: CI - SonarCloud 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # Path to sources 20 | sonar.sources=.circleci, .github, utils, wordless 21 | # sonar.exclusions= 22 | # sonar.inclusions= 23 | 24 | # Path to tests 25 | sonar.tests=tests 26 | sonar.test.exclusions=tests/files/* 27 | # sonar.test.inclusions= 28 | 29 | # Source encoding 30 | sonar.sourceEncoding=UTF-8 31 | 32 | # Exclusions for copy-paste detection 33 | # sonar.cpd.exclusions= 34 | 35 | # Python version (for python projects only) 36 | sonar.python.version=3.11 37 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | If you would like to contribute to the development of *Wordless*, you can help with bug fixes, performance enhancements, or implementation of new features by submitting [pull requests](https://github.com/BLKSerene/Wordless/pulls) on GitHub. 20 | 21 | Besides, you may contribute by writing [wikis](https://github.com/BLKSerene/Wordless/wiki) on GitHub, making tutorial videos, or helping with the translation of the user interface and [documentation](/doc/doc_eng.md) into other languages. 22 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 18 | 19 | 3.5.0 20 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/data/__init__.py -------------------------------------------------------------------------------- /data/unifont-16.0.04.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/data/unifont-16.0.04.otf -------------------------------------------------------------------------------- /doc/trs/zho_cn/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | 如果你有意助力 *Wordless* 的开发工作,你可以在 GitHub 上提交 [pull requests](https://github.com/BLKSerene/Wordless/pulls) 来帮助修复程序错误、改善性能或实现新功能。 20 | 21 | 此外,你还可以在 GitHub 上撰写[百科](https://github.com/BLKSerene/Wordless/wiki)、制作视频教程、或帮助将用户界面及[文档](/doc/doc_eng.md)翻译为其他语言。 22 | -------------------------------------------------------------------------------- /doc/trs/zho_tw/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | 如果你有意助力 *Wordless* 的開發工作,你可以在 GitHub 上提交 [pull requests](https://github.com/BLKSerene/Wordless/pulls) 來幫助修復程式錯誤、改善效能或實現新功能。 20 | 21 | 此外,你還可以在 GitHub 上撰寫[百科](https://github.com/BLKSerene/Wordless/wiki)、製作影片教程、或幫助將使用者介面及[文檔](/doc/doc_eng.md)翻譯為其他語言。 22 | -------------------------------------------------------------------------------- /doc/wl_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/doc/wl_logo.png -------------------------------------------------------------------------------- /imgs/donating_alipay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/imgs/donating_alipay.png -------------------------------------------------------------------------------- /imgs/donating_paypal.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/imgs/donating_paypal.gif -------------------------------------------------------------------------------- /imgs/donating_wechat_pay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/imgs/donating_wechat_pay.png -------------------------------------------------------------------------------- /imgs/wechat_official_account.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/imgs/wechat_official_account.jpg -------------------------------------------------------------------------------- /imgs/wl_icon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/imgs/wl_icon.icns -------------------------------------------------------------------------------- /imgs/wl_icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/imgs/wl_icon.ico -------------------------------------------------------------------------------- /imgs/wl_icon_about.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/imgs/wl_icon_about.png -------------------------------------------------------------------------------- /imgs/wl_loading.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/imgs/wl_loading.png -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: CI - Pylint 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | [MASTER] 20 | 21 | extension-pkg-whitelist= 22 | PyQt5 23 | 24 | [MESSAGES CONTROL] 25 | 26 | disable= 27 | # C0103, C0114, C0115, C0116 28 | invalid-name, 29 | missing-module-docstring, 30 | missing-class-docstring, 31 | missing-function-docstring, 32 | # C0301, C0302 33 | line-too-long, 34 | too-many-lines, 35 | 36 | # R0401 37 | cyclic-import, 38 | # R0801 39 | duplicate-code, 40 | # R0901, R0902, R0903, R0904, R0912, R0913, R0914, R0915, R0916, R0917 41 | too-many-ancestors, 42 | too-many-instance-attributes, 43 | too-few-public-methods, 44 | too-many-public-methods, 45 | too-many-boolean-expressions, 46 | too-many-arguments, 47 | too-many-locals, 48 | too-many-statements, 49 | too-many-branches, 50 | too-many-positional-arguments, 51 | # R1702, R1705, R1720, R1723, R1724 52 | too-many-nested-blocks, 53 | no-else-return, 54 | no-else-raise, 55 | no-else-break, 56 | no-else-continue, 57 | 58 | # W0201, W0212 59 | attribute-defined-outside-init, 60 | protected-access, 61 | # W0603, W0621 62 | global-statement, 63 | redefined-outer-name, 64 | 65 | # E0401 66 | import-error, 67 | # E0606 68 | possibly-used-before-assignment, 69 | -------------------------------------------------------------------------------- /requirements/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Requirements files - Development 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # NLP 20 | botok 21 | charset-normalizer 22 | khmer-nltk 23 | laonlp 24 | lingua-language-detector 25 | nltk 26 | pyphen 27 | pythainlp 28 | python-mecab-ko 29 | sacremoses 30 | simplemma 31 | stanza 32 | underthesea 33 | vaderSentiment 34 | 35 | ## pymorphy3 36 | pymorphy3[fast] 37 | pymorphy3-dicts-ru 38 | pymorphy3-dicts-uk 39 | 40 | ## spaCy 41 | spacy 42 | spacy-lookups-data 43 | spacy-pkuseg 44 | # Required by French and Slovenian models 45 | sentencepiece 46 | # Required by the Ukrainian model 47 | transformers 48 | 49 | ## SudachiPy 50 | sudachipy 51 | sudachidict-core 52 | 53 | # Micellaneous 54 | beautifulsoup4 55 | lxml 56 | matplotlib 57 | networkx 58 | numpy 59 | opencc-python-reimplemented 60 | openpyxl 61 | pyinstaller 62 | pypdf 63 | pyqt5 64 | python-docx 65 | python-pptx 66 | requests 67 | scipy 68 | wordcloud 69 | 70 | # For PyTorch on Linux using CPU 71 | # See: https://stackoverflow.com/a/57060441 72 | --index-url=https://download.pytorch.org/whl/cpu 73 | --extra-index-url=https://pypi.org/simple 74 | torch 75 | -------------------------------------------------------------------------------- /requirements/requirements_tests.txt: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # ---------------------------------------------------------------------- 3 | # Wordless: Requirements files - Tests 4 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 5 | # 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | # ---------------------------------------------------------------------- 19 | 20 | # NLP 21 | botok == 0.9.0 22 | charset-normalizer == 3.4.2 23 | khmer-nltk == 1.6 24 | laonlp == 1.2.0 25 | lingua-language-detector == 2.1.1 26 | nltk == 3.9.1 27 | pyphen == 0.17.2 28 | pythainlp == 5.0.4 29 | sacremoses == 0.1.1 30 | simplemma == 1.1.2 31 | stanza == 1.10.1 32 | underthesea == 6.8.4 33 | vaderSentiment == 3.3.2 34 | 35 | ## python-mecab-ko 36 | python-mecab-ko == 1.3.7 37 | python-mecab-ko-dic == 2.1.1.post2 38 | 39 | ## pymorphy3 40 | pymorphy3[fast] == 2.0.3 41 | pymorphy3-dicts-ru == 2.4.417150.4580142 42 | pymorphy3-dicts-uk == 2.4.1.1.1663094765 43 | 44 | ## spaCy 45 | spacy == 3.8.4 46 | spacy-lookups-data == 1.0.5 47 | spacy-pkuseg == 1.0.0 48 | 49 | ## SudachiPy 50 | sudachipy == 0.6.10 51 | sudachidict_core == 20250129 52 | 53 | # Miscellaneous 54 | beautifulsoup4 55 | lxml 56 | matplotlib 57 | networkx 58 | numpy 59 | opencc-python-reimplemented 60 | openpyxl 61 | pypdf 62 | pyqt5 63 | pytest 64 | python-docx 65 | python-pptx 66 | requests 67 | scikit-learn == 1.4.2 # Underthesea is incompatible with scikit-learn 1.5 68 | scipy 69 | wordcloud 70 | 71 | # For PyTorch on Linux using CPU 72 | # See: https://stackoverflow.com/a/57060441 73 | --index-url=https://download.pytorch.org/whl/cpu 74 | --extra-index-url=https://pypi.org/simple 75 | torch 76 | -------------------------------------------------------------------------------- /requirements/requirements_tests_macos.txt: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # ---------------------------------------------------------------------- 3 | # Wordless: Requirements files - Tests on macOS 4 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 5 | # 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | # ---------------------------------------------------------------------- 19 | 20 | # NLP 21 | botok == 0.9.0 22 | charset-normalizer == 3.4.2 23 | khmer-nltk == 1.6 24 | laonlp == 1.2.0 25 | lingua-language-detector == 2.1.1 26 | nltk == 3.9.1 27 | pyphen == 0.17.2 28 | pythainlp == 5.0.4 29 | sacremoses == 0.1.1 30 | simplemma == 1.1.2 31 | stanza == 1.10.1 32 | underthesea == 6.8.4 33 | vaderSentiment == 3.3.2 34 | 35 | ## python-mecab-ko 36 | python-mecab-ko == 1.3.7 37 | python-mecab-ko-dic == 2.1.1.post2 38 | 39 | ## pymorphy3 40 | pymorphy3[fast] == 2.0.3 41 | pymorphy3-dicts-ru == 2.4.417150.4580142 42 | pymorphy3-dicts-uk == 2.4.1.1.1663094765 43 | 44 | ## spaCy 45 | spacy == 3.8.4 46 | spacy-lookups-data == 1.0.5 47 | # Install spacy-pkuseg separately with "--no-deps" to avoid depending on NumPy 2 48 | 49 | ## SudachiPy 50 | sudachipy == 0.6.10 51 | sudachidict_core == 20250129 52 | 53 | # Miscellaneous 54 | beautifulsoup4 55 | lxml 56 | matplotlib 57 | networkx 58 | numpy == 1.26.4 # PyTorch 2.2 is the last version supporting x86 version of macOS which is incompatible with NumPy 2.x 59 | opencc-python-reimplemented 60 | openpyxl 61 | pypdf 62 | pyqt5==5.15.10 # To be compatible with macOS < 11 on AppVeyor 63 | pytest 64 | python-docx 65 | python-pptx 66 | requests 67 | scikit-learn == 1.4.2 # Underthesea is incompatible with scikit-learn 1.5 68 | scipy 69 | wordcloud 70 | 71 | # For PyTorch on Linux using CPU 72 | # See: https://stackoverflow.com/a/57060441 73 | --index-url=https://download.pytorch.org/whl/cpu 74 | --extra-index-url=https://pypi.org/simple 75 | torch 76 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/__init__.py -------------------------------------------------------------------------------- /tests/files/file_area/misc/[eng_us] First token is a punctuation mark.txt: -------------------------------------------------------------------------------- 1 | "The first token is a punctuation mark. Take it! -------------------------------------------------------------------------------- /tests/files/file_area/misc/[eng_us] TTR = 1.txt: -------------------------------------------------------------------------------- 1 | The type-token ratio of this text is 1. Take it! 2 | -------------------------------------------------------------------------------- /tests/files/file_area/misc/[eng_us] Tags at start of text.txt: -------------------------------------------------------------------------------- 1 | There_TAG3 are tags at the start of this text. Take it! 2 | -------------------------------------------------------------------------------- /tests/files/file_area/misc/[other] No language support.txt: -------------------------------------------------------------------------------- 1 | There is no language support for this file. Take it! 2 | -------------------------------------------------------------------------------- /tests/files/search_terms/empty.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/search_terms/empty.txt -------------------------------------------------------------------------------- /tests/files/search_terms/unicode_decode_error (utf_16).txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/search_terms/unicode_decode_error (utf_16).txt -------------------------------------------------------------------------------- /tests/files/wl_checks/wl_checks_files/dup.txt: -------------------------------------------------------------------------------- 1 | Duplicate 2 | -------------------------------------------------------------------------------- /tests/files/wl_checks/wl_checks_files/dup.xml: -------------------------------------------------------------------------------- 1 | Duplicate 2 | -------------------------------------------------------------------------------- /tests/files/wl_checks/wl_checks_files/empty_docx.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_checks/wl_checks_files/empty_docx.docx -------------------------------------------------------------------------------- /tests/files/wl_checks/wl_checks_files/empty_txt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_checks/wl_checks_files/empty_txt.txt -------------------------------------------------------------------------------- /tests/files/wl_file_area/file_types/csv.csv: -------------------------------------------------------------------------------- 1 | ,, 2 | ,, 3 | ,3-2,3-3, 4 | ,, 5 | ,, 6 | ,6-2,6-3, 7 | ,, 8 | ,, 9 | -------------------------------------------------------------------------------- /tests/files/wl_file_area/file_types/docx.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_file_area/file_types/docx.docx -------------------------------------------------------------------------------- /tests/files/wl_file_area/file_types/html.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | This is a title 5 | 6 | 7 |

Hello world!

8 | 9 | 10 | -------------------------------------------------------------------------------- /tests/files/wl_file_area/file_types/lrc.lrc: -------------------------------------------------------------------------------- 1 | [ar:Lyrics artist] 2 | 3 | [al:Album where the song is from] 4 | 5 | [ti:Lyrics (song) title] 6 | 7 | [au:Creator of the Songtext] 8 | [length:How long the song is] 9 | [by:Creator of the LRC file] 10 | 11 | [offset:+/- Overall timestamp adjustment in milliseconds, + shifts time up, - shifts down] 12 | 13 | [re:The player or editor that created the LRC file] 14 | 15 | [ve:version of program] 16 | 17 | [00:00.00] 18 | [00:01.11]Lyrics line 1 19 | [99:99.99][00:02:22][99:99.99]Lyrics line 2 (with invalid time tags) 20 | [00:04.44][00:06.66]Repeating lyrics line 4 & 6 21 | [00:03.33] [00:05.55] Repeating lyrics line 3 & 5 (with whitespace) 22 | [00:07.00] 23 | [00:07.777]Lyrics line 7 (3-digit after seconds) 24 | [00:08:88]Lyrics line 8 (colon separator after seconds) 25 | [00:09.99][Lyrics in square brackets at the beginning] Lyrics line 9 [with lyrics in square brackets at the end] 26 | [00:10.10] <10:10.10> Lyrics <10:10.11> line 10 <10:10.12> (with word time tags) 27 | [00:11.11] Lyrics <10:11.11> line 11 2 |
7 | 8 | 9 | 10 | Hello world! 11 | 12 | 13 | Bonjour tout le monde! 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /tests/files/wl_file_area/file_types/xlsx.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_file_area/file_types/xlsx.xlsx -------------------------------------------------------------------------------- /tests/files/wl_file_area/file_types/xml.xml: -------------------------------------------------------------------------------- 1 | [ACET factsheets & newsletters]. Sample containing about 6688 words of miscellanea (domain: social science) Data capture and transcription Oxford University Press 2 |
3 | FACTSHEET WHAT IS AIDS?

4 | AIDS (Acquired Immune Deficiency Syndrome)is a condition caused by a virus called HIV (Human Immuno Deficiency Virus). 5 | This virus affects the body's defence system so that it cannot fight infection.

6 | -------------------------------------------------------------------------------- /tests/files/wl_file_area/misc/vie_tokenized.txt: -------------------------------------------------------------------------------- 1 | Tiếng Việt , cũng gọi_là tiếng Việt_Nam [ 9_] hay Việt_ngữ là ngôn_ngữ của người Việt và là ngôn_ngữ chính_thức tại Việt_Nam . Đây là tiếng_mẹ_đẻ của khoảng 85 % dân_cư Việt_Nam cùng với hơn 4 triệu người Việt_kiều . Tiếng Việt còn là ngôn_ngữ thứ hai của các dân_tộc_thiểu_số tại Việt_Nam và là ngôn_ngữ dân_tộc_thiểu_số được công_nhận tại Cộng_hòa_Séc . 2 | 3 | _test test_ _ 4 | -------------------------------------------------------------------------------- /tests/files/wl_file_area/tags/tokenized_tagged.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | This is the first sentence . This_TAG3RunningToken_TAG3 is the second sentence . 4 | 5 | 6 | This is the third sentence . 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/files/wl_file_area/tags/tokenized_untagged.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | This is the first sentence . This is the second sentence . 4 | 5 | 6 | This is the third sentence . 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/files/wl_file_area/tags/untokenized_tagged.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | This is the first sentence. This_TAG3 is the second sentence. 4 | 5 | 6 | This is the third sentence. 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/files/wl_file_area/tags/untokenized_untagged.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | This is the first sentence. This is the second sentence. 4 | 5 | 6 | This is the third sentence. 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).csv -------------------------------------------------------------------------------- /tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).html -------------------------------------------------------------------------------- /tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).txt -------------------------------------------------------------------------------- /tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).xml -------------------------------------------------------------------------------- /tests/test_concordancer_parallel.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Work Area - Parallel Concordancer 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import glob 20 | 21 | from tests import wl_test_init 22 | from wordless import wl_concordancer_parallel 23 | from wordless.wl_dialogs import wl_dialogs_misc 24 | 25 | def test_concordancer_parallel(): 26 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast') 27 | 28 | settings = main.settings_custom['concordancer_parallel'] 29 | 30 | settings['search_settings']['multi_search_mode'] = True 31 | settings['search_settings']['search_terms'] = wl_test_init.SEARCH_TERMS 32 | 33 | for i in range(2): 34 | match i: 35 | case 0: 36 | wl_test_init.select_test_files(main, no_files = [0, 1, 2]) 37 | case 1: 38 | wl_test_init.select_test_files( 39 | main, 40 | no_files = list(range(1, 3 + len(glob.glob('tests/files/file_area/misc/*.txt')))) 41 | ) 42 | 43 | print(f"Files: {' | '.join(wl_test_init.get_test_file_names(main))}") 44 | 45 | wl_concordancer_parallel.Wl_Worker_Concordancer_Parallel_Table( 46 | main, 47 | dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(main), 48 | update_gui = update_gui 49 | ).run() 50 | 51 | def update_gui(err_msg, concordance_lines): 52 | print(err_msg) 53 | assert not err_msg 54 | assert concordance_lines 55 | 56 | for concordance_line in concordance_lines: 57 | assert len(concordance_line) == 2 58 | 59 | parallel_unit_no, len_parallel_units = concordance_line[0] 60 | 61 | # Parallel Unit No. 62 | assert parallel_unit_no >= 1 63 | assert len_parallel_units >= 1 64 | 65 | # Parallel Units 66 | for parallel_unit in concordance_line[1]: 67 | assert len(parallel_unit) == 2 68 | 69 | if __name__ == '__main__': 70 | test_concordancer_parallel() 71 | -------------------------------------------------------------------------------- /tests/tests_checks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_checks/__init__.py -------------------------------------------------------------------------------- /tests/tests_checks/test_checks_files.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Checks - Files 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_checks import wl_checks_files 21 | from wordless.wl_utils import wl_paths 22 | 23 | def get_normalized_file_path(file_name): 24 | return wl_paths.get_normalized_path(f'tests/files/wl_checks/wl_checks_files/{file_name}') 25 | 26 | main = wl_test_init.Wl_Test_Main() 27 | main.settings_custom['file_area']['files_open'] = [ 28 | { 29 | 'path_orig': get_normalized_file_path('dup.txt') 30 | } 31 | ] 32 | 33 | FILE_PATHS_UNSUPPORTED = [ 34 | get_normalized_file_path('unsupported.unsupported') 35 | ] 36 | FILE_PATHS_EMPTY = [ 37 | get_normalized_file_path('empty_txt.txt'), 38 | get_normalized_file_path('empty_docx.docx') 39 | ] 40 | FILE_PATHS_DUP = [ 41 | get_normalized_file_path('dup.txt'), 42 | get_normalized_file_path('dup.xml'), 43 | get_normalized_file_path('dup.xml') 44 | ] 45 | 46 | def test_check_file_paths_unsupported(): 47 | _, files_unsupported = wl_checks_files.check_file_paths_unsupported(main, ['supported.txt'] + FILE_PATHS_UNSUPPORTED) 48 | 49 | assert files_unsupported == FILE_PATHS_UNSUPPORTED 50 | 51 | def test_check_file_paths_empty(): 52 | _, files_empty = wl_checks_files.check_file_paths_empty(main, [FILE_PATHS_DUP[0]] + FILE_PATHS_EMPTY) 53 | 54 | assert files_empty == FILE_PATHS_EMPTY 55 | 56 | def test_check_file_paths_duplicate(): 57 | _, files_dup = wl_checks_files.check_file_paths_dup(main, FILE_PATHS_DUP) 58 | 59 | assert files_dup == FILE_PATHS_DUP[:2] 60 | 61 | def test_check_err_file_area(): 62 | assert wl_checks_files.check_err_file_area(main, '') 63 | assert not wl_checks_files.check_err_file_area(main, 'test') 64 | 65 | if __name__ == '__main__': 66 | test_check_file_paths_unsupported() 67 | test_check_file_paths_empty() 68 | test_check_file_paths_duplicate() 69 | 70 | test_check_err_file_area() 71 | -------------------------------------------------------------------------------- /tests/tests_checks/test_checks_misc.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Checks - Miscellaneous 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import os 20 | import shutil 21 | 22 | from wordless.wl_checks import wl_checks_misc 23 | 24 | def test_check_custom_settings(): 25 | settings_custom = settings_default = { 26 | 'key_1': 'val_2', 27 | 'key_2': { 28 | 'key_3': 'val_3', 29 | 'key_4': 'val_4' 30 | } 31 | } 32 | 33 | assert wl_checks_misc.check_custom_settings(settings_custom, settings_default) 34 | assert not wl_checks_misc.check_custom_settings(settings_custom, {}) 35 | 36 | def test_check_dir(): 37 | if os.path.exists('temp'): 38 | shutil.rmtree('temp') 39 | 40 | wl_checks_misc.check_dir('temp') 41 | 42 | assert os.path.exists('temp') 43 | 44 | os.rmdir('temp') 45 | 46 | def test_check_new_name(): 47 | assert wl_checks_misc.check_new_name('new_name', ['new_name', 'new_name (2)', 'new_name (4)']) == 'new_name (3)' 48 | assert wl_checks_misc.check_new_name( 49 | 'new_name', ['new_name', 'new_name (2)', 'new_name (4)'], 50 | separator = '/' 51 | ) == 'new_name/2' 52 | 53 | def test_check_new_path(): 54 | if os.path.exists('temp'): 55 | shutil.rmtree('temp') 56 | 57 | os.mkdir('temp') 58 | 59 | for file_name in ('temp', 'temp (2)', 'temp (4)'): 60 | with open(f'temp/{file_name}.temp', 'w', encoding = 'utf_8'): 61 | pass 62 | 63 | assert wl_checks_misc.check_new_path('temp/temp.temp') == 'temp/temp (3).temp' 64 | 65 | shutil.rmtree('temp') 66 | 67 | if __name__ == '__main__': 68 | test_check_custom_settings() 69 | test_check_dir() 70 | test_check_new_name() 71 | test_check_new_path() 72 | -------------------------------------------------------------------------------- /tests/tests_dialogs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_dialogs/__init__.py -------------------------------------------------------------------------------- /tests/tests_dialogs/test_dialogs_errs.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Dialogs - Errors 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_dialogs import wl_dialogs_errs 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_wl_dialog_err(): 25 | wl_dialogs_errs.Wl_Dialog_Err(main, title = 'test').open() 26 | 27 | def test_wl_dialog_err_files(): 28 | wl_dialogs_errs.Wl_Dialog_Err_Files(main, title = 'test').open() 29 | 30 | def test_wl_dialog_err_info_copy(): 31 | wl_dialogs_errs.Wl_Dialog_Err_Info_Copy(main, title = 'test').open() 32 | 33 | def test_wl_dialog_err_fatal(): 34 | wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg = 'test').open() 35 | 36 | def test_wl_dialog_err_download_model(): 37 | wl_dialogs_errs.Wl_Dialog_Err_Download_Model(main, err_msg = 'test').open() 38 | 39 | if __name__ == '__main__': 40 | test_wl_dialog_err() 41 | test_wl_dialog_err_files() 42 | 43 | test_wl_dialog_err_info_copy() 44 | test_wl_dialog_err_fatal() 45 | test_wl_dialog_err_download_model() 46 | -------------------------------------------------------------------------------- /tests/tests_dialogs/test_dialogs_misc.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Dialogs - Miscellaneous 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_dialogs import wl_dialogs_misc 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_wl_dialog_progress(): 25 | wl_dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress(main, text = 'test') 26 | wl_dialog_progress.open() 27 | wl_dialog_progress.update_elapsed_time() 28 | wl_dialog_progress.update_progress('test') 29 | 30 | def test_wl_dialog_progress_process_data(): 31 | wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(main).open() 32 | 33 | def test_wl_dialog_progress_download_model(): 34 | wl_dialogs_misc.Wl_Dialog_Progress_Download_Model(main).open() 35 | 36 | def test_wl_dialog_restart_required(): 37 | wl_dialogs_misc.Wl_Dialog_Restart_Required(main).open() 38 | 39 | if __name__ == '__main__': 40 | test_wl_dialog_progress() 41 | test_wl_dialog_progress_process_data() 42 | test_wl_dialog_progress_download_model() 43 | test_wl_dialog_restart_required() 44 | -------------------------------------------------------------------------------- /tests/tests_figs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_figs/__init__.py -------------------------------------------------------------------------------- /tests/tests_file_area/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_file_area/__init__.py -------------------------------------------------------------------------------- /tests/tests_measures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_measures/__init__.py -------------------------------------------------------------------------------- /tests/tests_measures/test_measures_bayes_factor.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Measures - Bayes factor 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import numpy 20 | 21 | from tests import wl_test_init 22 | from wordless.wl_measures import wl_measures_bayes_factor 23 | 24 | main = wl_test_init.Wl_Test_Main() 25 | 26 | def test_bayes_factor_log_likelihood_ratio_test(): 27 | numpy.testing.assert_array_equal( 28 | wl_measures_bayes_factor.bayes_factor_log_likelihood_ratio_test( 29 | main, 30 | numpy.array([0] * 2), 31 | numpy.array([0] * 2), 32 | numpy.array([0] * 2), 33 | numpy.array([0] * 2) 34 | ), 35 | numpy.array([0] * 2) 36 | ) 37 | 38 | def test_bayes_factor_students_t_test_2_sample(): 39 | numpy.testing.assert_array_equal( 40 | wl_measures_bayes_factor.bayes_factor_students_t_test_2_sample( 41 | main, 42 | numpy.array([[0] * 5] * 2), 43 | numpy.array([[0] * 5] * 2), 44 | ), 45 | numpy.array([0] * 2) 46 | ) 47 | 48 | if __name__ == '__main__': 49 | test_bayes_factor_log_likelihood_ratio_test() 50 | test_bayes_factor_students_t_test_2_sample() 51 | -------------------------------------------------------------------------------- /tests/tests_measures/test_measures_misc.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Measures - Miscellaneous 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_measures import wl_measures_misc 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_modes(): 25 | nums = [1, 3, 3, 3, 2, 2, 1, 2, 5, 4] 26 | 27 | assert wl_measures_misc.modes(nums) == [2, 3] 28 | assert wl_measures_misc.modes([0] * 10) == [0] 29 | assert wl_measures_misc.modes([*range(10)]) == [*range(10)] 30 | assert not wl_measures_misc.modes([]) 31 | 32 | if __name__ == '__main__': 33 | test_modes() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_nlp/__init__.py -------------------------------------------------------------------------------- /tests/tests_nlp/test_stop_word_lists.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stop word lists 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import pytest 20 | 21 | from tests import wl_test_init 22 | from wordless.wl_nlp import wl_stop_word_lists 23 | 24 | main = wl_test_init.Wl_Test_Main() 25 | 26 | test_stop_word_lists = [] 27 | 28 | for lang, stop_word_lists in main.settings_global['stop_word_lists'].items(): 29 | for stop_word_list in stop_word_lists: 30 | test_stop_word_lists.append((lang, stop_word_list)) 31 | 32 | @pytest.mark.parametrize('lang, stop_word_list', test_stop_word_lists) 33 | def test_get_stop_word_list(lang, stop_word_list): 34 | stop_words = wl_stop_word_lists.wl_get_stop_word_list(main, lang, stop_word_list = stop_word_list) 35 | 36 | print(f'Number of stop words ({lang} / {stop_word_list}): {len(stop_words)}') 37 | 38 | if stop_word_list == 'custom': 39 | # Check if the custom list is empty 40 | assert stop_words == set() 41 | else: 42 | # Check for empty stop words 43 | assert stop_words 44 | assert all((stop_word.strip() for stop_word in stop_words)) 45 | 46 | def test_filter_stop_words(): 47 | assert wl_stop_word_lists.wl_filter_stop_words(main, items = ['a', 'aa'], lang = 'eng_us') == ['aa'] 48 | assert wl_stop_word_lists.wl_filter_stop_words(main, items = [], lang = 'eng_us') == [] 49 | 50 | main.settings_custom['stop_word_lists']['stop_word_list_settings']['case_sensitive'] = False 51 | assert wl_stop_word_lists.wl_filter_stop_words(main, items = ['A', 'a'], lang = 'eng_us') == [] 52 | main.settings_custom['stop_word_lists']['stop_word_list_settings']['case_sensitive'] = True 53 | assert wl_stop_word_lists.wl_filter_stop_words(main, items = ['A', 'a'], lang = 'eng_us') == ['A'] 54 | 55 | def test_stop_word_lists_misc(): 56 | # Other languages 57 | wl_stop_word_lists.wl_get_stop_word_list(main, lang = 'test') 58 | 59 | if __name__ == '__main__': 60 | for lang, stop_word_list in test_stop_word_lists: 61 | test_get_stop_word_list(lang, stop_word_list) 62 | 63 | test_filter_stop_words() 64 | test_stop_word_lists_misc() 65 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_spacy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_nlp/tests_spacy/__init__.py -------------------------------------------------------------------------------- /tests/tests_nlp/tests_spacy/test_spacy_dan.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - spaCy - Danish 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_spacy import test_spacy 20 | 21 | def test_spacy_dan(): 22 | results_pos_tag = [('Dansk', 'NOUN'), ('er', 'AUX'), ('et', 'DET'), ('østnordisk', 'ADJ'), ('sprog', 'NOUN'), ('indenfor', 'ADP'), ('den', 'DET'), ('germanske', 'ADJ'), ('gren', 'NOUN'), ('af', 'ADP'), ('den', 'DET'), ('indoeuropæiske', 'ADJ'), ('sprogfamilie', 'NOUN'), ('.', 'PUNCT')] 23 | 24 | test_spacy.wl_test_spacy( 25 | lang = 'dan', 26 | results_sentence_tokenize_trf = ['Dansk er et østnordisk sprog indenfor den germanske gren af den indoeuropæiske sprogfamilie.', 'Det danske sprog tales af ca. seks millioner mennesker, hovedsageligt i Danmark, men også i Sydslesvig, på Færøerne og Grønland.[1]'], 27 | results_word_tokenize = ['Dansk', 'er', 'et', 'østnordisk', 'sprog', 'indenfor', 'den', 'germanske', 'gren', 'af', 'den', 'indoeuropæiske', 'sprogfamilie', '.'], 28 | results_pos_tag = results_pos_tag, 29 | results_pos_tag_universal = results_pos_tag, 30 | results_lemmatize = ['dansk', 'være', 'en', 'østnordisk', 'sprog', 'indenfor', 'den', 'germansk', 'gren', 'af', 'den', 'indoeuropæisk', 'sprogfamilie', '.'], 31 | results_dependency_parse = [('Dansk', 'sprog', 'nsubj', 4), ('er', 'sprog', 'cop', 3), ('et', 'sprog', 'det', 2), ('østnordisk', 'sprog', 'amod', 1), ('sprog', 'sprog', 'ROOT', 0), ('indenfor', 'gren', 'case', 3), ('den', 'gren', 'det', 2), ('germanske', 'gren', 'amod', 1), ('gren', 'sprog', 'nmod', -4), ('af', 'sprogfamilie', 'case', 3), ('den', 'sprogfamilie', 'det', 2), ('indoeuropæiske', 'sprogfamilie', 'amod', 1), ('sprogfamilie', 'gren', 'nmod', -4), ('.', 'sprog', 'punct', -9)] 32 | ) 33 | 34 | if __name__ == '__main__': 35 | test_spacy_dan() 36 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_spacy/test_spacy_ell.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - spaCy - Greek (Modern) 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_spacy import test_spacy 20 | 21 | def test_spacy_ell(): 22 | results_pos_tag = [('Η', 'DET'), ('ελληνική', 'ADJ'), ('γλώσσα', 'NOUN'), ('ανήκει', 'VERB'), ('στην', 'ADP'), ('ινδοευρωπαϊκή', 'ADJ'), ('οικογένεια[9', 'NOUN'), (']', 'NOUN'), ('secεπίσης', 'X'), ('στο', 'ADP'), ('βαλκανικό', 'ADJ'), ('γλωσσικό', 'ADJ'), ('δεσμό', 'NOUN'), ('.', 'PUNCT')] 23 | 24 | test_spacy.wl_test_spacy( 25 | lang = 'ell', 26 | results_sentence_tokenize_trf = ['Η ελληνική γλώσσα ανήκει στην ινδοευρωπαϊκή οικογένεια[9] secεπίσης στο βαλκανικό γλωσσικό δεσμό.', 'ελληνική γλώσσα', ', έχουμε γραπτά κείμενα ήδη από τον 15ο αιώνα π.', 'Χ..'], 27 | results_sentence_tokenize_lg = ['Η ελληνική γλώσσα ανήκει στην ινδοευρωπαϊκή οικογένεια[9] secεπίσης στο βαλκανικό γλωσσικό δεσμό.', 'ελληνική γλώσσα, έχουμε γραπτά κείμενα ήδη από τον 15ο αιώνα π.', 'Χ..'], 28 | results_word_tokenize = ['Η', 'ελληνική', 'γλώσσα', 'ανήκει', 'στην', 'ινδοευρωπαϊκή', 'οικογένεια[9', ']', 'secεπίσης', 'στο', 'βαλκανικό', 'γλωσσικό', 'δεσμό', '.'], 29 | results_pos_tag = results_pos_tag, 30 | results_pos_tag_universal = results_pos_tag, 31 | results_lemmatize = ['ο', 'ελληνικός', 'γλώσσα', 'ανήκω', 'σε ο', 'ινδοευρωπαϊκός', 'οικογένεια[9', ']', 'secεπίσης', 'σε ο', 'βαλκανικός', 'γλωσσικός', 'δεσμός', '.'], 32 | results_dependency_parse = [('Η', 'γλώσσα', 'det', 2), ('ελληνική', 'γλώσσα', 'amod', 1), ('γλώσσα', 'ανήκει', 'nsubj', 1), ('ανήκει', 'ανήκει', 'ROOT', 0), ('στην', 'οικογένεια[9', 'case', 2), ('ινδοευρωπαϊκή', 'οικογένεια[9', 'amod', 1), ('οικογένεια[9', 'ανήκει', 'obl', -3), (']', 'ανήκει', 'obl', -4), ('secεπίσης', ']', 'nmod', -1), ('στο', 'δεσμό', 'case', 3), ('βαλκανικό', 'δεσμό', 'amod', 2), ('γλωσσικό', 'δεσμό', 'amod', 1), ('δεσμό', ']', 'nmod', -5), ('.', 'ανήκει', 'punct', -10)] 33 | ) 34 | 35 | if __name__ == '__main__': 36 | test_spacy_ell() 37 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_spacy/test_spacy_ita.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - spaCy - Italian 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_spacy import test_spacy 20 | 21 | def test_spacy_ita(): 22 | results_sentence_tokenize = ["L'italiano è una lingua romanza parlata principalmente in Italia.", "Per ragioni storiche e geografiche, l'italiano è la lingua romanza meno divergente dal latino (complessivamente a pari merito, anche se in parametri diversi, con la lingua sarda).[2][3][4][5]"] 23 | 24 | test_spacy.wl_test_spacy( 25 | lang = 'ita', 26 | results_sentence_tokenize_trf = results_sentence_tokenize, 27 | results_sentence_tokenize_lg = results_sentence_tokenize, 28 | results_word_tokenize = ["L'", 'italiano', 'è', 'una', 'lingua', 'romanza', 'parlata', 'principalmente', 'in', 'Italia', '.'], 29 | results_pos_tag = [("L'", 'RD'), ('italiano', 'S'), ('è', 'V'), ('una', 'RI'), ('lingua', 'S'), ('romanza', 'A'), ('parlata', 'V'), ('principalmente', 'B'), ('in', 'E'), ('Italia', 'SP'), ('.', 'FS')], 30 | results_pos_tag_universal = [("L'", 'DET'), ('italiano', 'NOUN'), ('è', 'AUX'), ('una', 'DET'), ('lingua', 'NOUN'), ('romanza', 'ADJ'), ('parlata', 'VERB'), ('principalmente', 'ADV'), ('in', 'ADP'), ('Italia', 'PROPN'), ('.', 'PUNCT')], 31 | results_lemmatize = ['il', 'italiano', 'essere', 'uno', 'lingua', 'romanza', 'parlare', 'principalmente', 'in', 'Italia', '.'], 32 | results_dependency_parse = [("L'", 'italiano', 'det', 1), ('italiano', 'lingua', 'nsubj', 3), ('è', 'lingua', 'cop', 2), ('una', 'lingua', 'det', 1), ('lingua', 'lingua', 'ROOT', 0), ('romanza', 'lingua', 'amod', -1), ('parlata', 'lingua', 'amod', -2), ('principalmente', 'parlata', 'advmod', -1), ('in', 'Italia', 'case', 1), ('Italia', 'parlata', 'obl', -3), ('.', 'lingua', 'punct', -6)] 33 | ) 34 | 35 | if __name__ == '__main__': 36 | test_spacy_ita() 37 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_spacy/test_spacy_kor.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - spaCy - Korean 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_spacy import test_spacy 20 | 21 | def test_spacy_kor(): 22 | results_sentence_tokenize = ['한국어(韓國語), 조선어(朝鮮語)는 대한민국과 조선민주주의인민공화국의 공용어이다.', '둘은 표기나 문법, 동사 어미나 표현에서 약간의 차이가 있다.'] 23 | 24 | test_spacy.wl_test_spacy( 25 | lang = 'kor', 26 | results_sentence_tokenize_trf = results_sentence_tokenize, 27 | results_sentence_tokenize_lg = results_sentence_tokenize, 28 | results_word_tokenize = ['한국어', '(', '韓國語', ')', ',', '조선어', '(', '朝鮮語', ')', '는', '대한민국과', '조선민주주의인민공화국의', '공용어이다', '.'], 29 | results_pos_tag = [('한국어', 'nq'), ('(', 'sl'), ('韓國語', 'nq'), (')', 'sr'), (',', 'sp'), ('조선어', 'nq'), ('(', 'sl'), ('朝鮮語', 'nq'), (')', 'sr'), ('는', 'jxt'), ('대한민국과', 'nq+jcj'), ('조선민주주의인민공화국의', 'nq+ncn+jcm'), ('공용어이다', 'ncn+jp+ef'), ('.', 'sf')], 30 | results_pos_tag_universal = [('한국어', 'PROPN'), ('(', 'PUNCT'), ('韓國語', 'PROPN'), (')', 'PUNCT'), (',', 'PUNCT'), ('조선어', 'PROPN'), ('(', 'PUNCT'), ('朝鮮語', 'PROPN'), (')', 'PUNCT'), ('는', 'ADP'), ('대한민국과', 'CCONJ'), ('조선민주주의인민공화국의', 'PROPN'), ('공용어이다', 'VERB'), ('.', 'PUNCT')], 31 | results_lemmatize = ['한국어', '(', '韓國語', ')', ',', '조선어', '(', '朝鮮語', ')', '는', '대한민국+과', '조선민주주의인민공+화국+의', '공용어+이+다', '.'], 32 | results_dependency_parse = [('한국어', '공용어이다', 'advmod', 12), ('(', '韓國語', 'punct', 1), ('韓國語', '한국어', 'appos', -2), (')', '韓國語', 'punct', -1), (',', '한국어', 'punct', -4), ('조선어', '한국어', 'flat', -5), ('(', '朝鮮語', 'punct', 1), ('朝鮮語', '조선어', 'appos', -2), (')', '朝鮮語', 'punct', -1), ('는', '조선어', 'case', -4), ('대한민국과', '공용어이다', 'nmod', 2), ('조선민주주의인민공화국의', '대한민국과', 'conj', -1), ('공용어이다', '공용어이다', 'ROOT', 0), ('.', '공용어이다', 'punct', -1)] 33 | ) 34 | 35 | if __name__ == '__main__': 36 | test_spacy_kor() 37 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_spacy/test_spacy_nob.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - spaCy - Norwegian (Bokmål) 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_spacy import test_spacy 20 | 21 | def test_spacy_nob(): 22 | results_sentence_tokenize = ['Bokmål er en av to offisielle målformer av norsk skriftspråk, hvorav den andre er nynorsk.', 'I skrift har 87,3 % bokmål som hovedmål i skolen.[3]'] 23 | results_pos_tag = [('Bokmål', 'PROPN'), ('er', 'AUX'), ('en', 'DET'), ('av', 'ADP'), ('to', 'NUM'), ('offisielle', 'ADJ'), ('målformer', 'NOUN'), ('av', 'ADP'), ('norsk', 'ADJ'), ('skriftspråk', 'NOUN'), (',', 'PUNCT'), ('hvorav', 'ADV'), ('den', 'DET'), ('andre', 'DET'), ('er', 'AUX'), ('nynorsk', 'ADJ'), ('.', 'PUNCT')] 24 | 25 | test_spacy.wl_test_spacy( 26 | lang = 'nob', 27 | results_sentence_tokenize_trf = results_sentence_tokenize, 28 | results_sentence_tokenize_lg = results_sentence_tokenize, 29 | results_word_tokenize = ['Bokmål', 'er', 'en', 'av', 'to', 'offisielle', 'målformer', 'av', 'norsk', 'skriftspråk', ',', 'hvorav', 'den', 'andre', 'er', 'nynorsk', '.'], 30 | results_pos_tag = results_pos_tag, 31 | results_pos_tag_universal = results_pos_tag, 32 | results_lemmatize = ['bokmål', 'være', 'en', 'av', 'to', 'offisiell', 'målforme', 'av', 'norsk', 'skriftspråk', '$,', 'hvorav', 'den', 'annen', 'være', 'nynorsk', '$.'], 33 | results_dependency_parse = [('Bokmål', 'en', 'nsubj', 2), ('er', 'en', 'cop', 1), ('en', 'en', 'ROOT', 0), ('av', 'målformer', 'case', 3), ('to', 'målformer', 'nummod', 2), ('offisielle', 'målformer', 'amod', 1), ('målformer', 'en', 'nmod', -4), ('av', 'skriftspråk', 'case', 2), ('norsk', 'skriftspråk', 'amod', 1), ('skriftspråk', 'målformer', 'nmod', -3), (',', 'nynorsk', 'punct', 5), ('hvorav', 'nynorsk', 'advmod', 4), ('den', 'andre', 'det', 1), ('andre', 'nynorsk', 'nsubj', 2), ('er', 'nynorsk', 'cop', 1), ('nynorsk', 'målformer', 'amod', -9), ('.', 'en', 'punct', -14)] 34 | ) 35 | 36 | if __name__ == '__main__': 37 | test_spacy_nob() 38 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_nlp/tests_stanza/__init__.py -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_ara.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Arabic 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_ara(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'ara', 24 | results_sentence_tokenize = ['تحتوي اللغة العربية 28 حرفاً مكتوباً. ويرى بعضُ اللغويين أنه يجب إضافة حرف الهمزة إلى حروف العربية، ليصبحَ عدد الحروف 29. تُكتب العربية من اليمين إلى اليسار - ومثلها اللغة الفارسية والعبرية على عكس كثير من اللغات العالمية - ومن أعلى الصفحة إلى أسفلها.'], 25 | results_word_tokenize = ['تحتوي', 'اللغة', 'العربية', '28', 'حرفاً', 'مكتوباً', '.'], 26 | results_pos_tag = [('تحتوي', 'VIIA-3FS--'), ('اللغة', 'N------S1D'), ('العربية', 'A-----FS1D'), ('28', 'Q---------'), ('حرفاً', 'N------S4I'), ('مكتوباً', 'A-----MS4I'), ('.', 'G---------')], 27 | results_pos_tag_universal = [('تحتوي', 'VERB'), ('اللغة', 'NOUN'), ('العربية', 'ADJ'), ('28', 'NUM'), ('حرفاً', 'NOUN'), ('مكتوباً', 'ADJ'), ('.', 'PUNCT')], 28 | results_lemmatize = ['اِحتَوَى', 'لُغَة', 'عَرَبِيّ', '28', 'حَرف', 'مُكتَوِب', '.'], 29 | results_dependency_parse = [('تحتوي', 'تحتوي', 'root', 0), ('اللغة', 'تحتوي', 'nsubj', -1), ('العربية', 'اللغة', 'amod', -1), ('28', 'تحتوي', 'obj', -3), ('حرفاً', '28', 'nmod', -1), ('مكتوباً', 'حرفاً', 'amod', -1), ('.', 'تحتوي', 'punct', -6)] 30 | ) 31 | 32 | if __name__ == '__main__': 33 | test_stanza_ara() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_chu.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Church Slavonic (Old) 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_chu(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'chu', 24 | results_sentence_tokenize = ['ВЪ И҃ В҃ ДЬНЬ КЛꙆМЕНТА', 'Бъ҃ ꙇже нъи лѣта огрѧдѫцѣ блаженаго климента мѫченіка твоего ꙇ папежа чьстьѭ веселішꙇ подазь мілостівъі да егоже чьсть чьстімъ сілоѭ ѹбо мѫчениѣ его наслѣдѹемъ г҃мь'], 25 | results_word_tokenize = ['ВЪ', 'И҃', 'В҃', 'ДЬНЬ', 'КЛꙆМЕНТА'], 26 | results_pos_tag = [('ВЪ', 'R-'), ('И҃', 'Nb'), ('В҃', 'R-'), ('ДЬНЬ', 'Nb'), ('КЛꙆМЕНТА', 'Ne')], 27 | results_pos_tag_universal = [('ВЪ', 'ADP'), ('И҃', 'NOUN'), ('В҃', 'ADP'), ('ДЬНЬ', 'NOUN'), ('КЛꙆМЕНТА', 'PROPN')], 28 | results_lemmatize = ['въ', 'изоусъ', 'въ', 'дьнь', 'кламенъ'], 29 | results_dependency_parse = [('ВЪ', 'И҃', 'case', 1), ('И҃', 'И҃', 'root', 0), ('В҃', 'ДЬНЬ', 'case', 1), ('ДЬНЬ', 'И҃', 'orphan', -2), ('КЛꙆМЕНТА', 'ДЬНЬ', 'nmod', -1)] 30 | ) 31 | 32 | if __name__ == '__main__': 33 | test_stanza_chu() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_cop.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Coptic 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_cop(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'cop', 24 | results_sentence_tokenize = ['ϭⲟⲗ ·', 'ⲛⲉⲛⲧⲁⲩⲕⲗⲏⲣⲟⲛⲟⲙⲉⲓ ⲉⲛⲉϩ ⲛⲧⲙⲛⲧⲣⲣⲟ ⲙⲡⲛⲟⲩⲧⲉ ·'], 25 | results_word_tokenize = ['ϭⲟⲗ', '·'], 26 | results_pos_tag = [('ϭⲟⲗ', 'VIMP'), ('·', 'PUNCT')], 27 | results_pos_tag_universal = [('ϭⲟⲗ', 'VERB'), ('·', 'PUNCT')], 28 | results_lemmatize = ['ϭⲟⲗ', '·'], 29 | results_dependency_parse = [('ϭⲟⲗ', 'ϭⲟⲗ', 'root', 0), ('·', 'ϭⲟⲗ', 'punct', -1)] 30 | ) 31 | 32 | if __name__ == '__main__': 33 | test_stanza_cop() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_dan.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Danish 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_dan(): 22 | results_pos_tag = [('Dansk', 'ADJ'), ('er', 'AUX'), ('et', 'DET'), ('østnordisk', 'ADJ'), ('sprog', 'NOUN'), ('indenfor', 'ADP'), ('den', 'DET'), ('germanske', 'ADJ'), ('gren', 'NOUN'), ('af', 'ADP'), ('den', 'DET'), ('indoeuropæiske', 'ADJ'), ('sprogfamilie', 'NOUN'), ('.', 'PUNCT')] 23 | 24 | test_stanza.wl_test_stanza( 25 | lang = 'dan', 26 | results_sentence_tokenize = ['Dansk er et østnordisk sprog indenfor den germanske gren af den indoeuropæiske sprogfamilie.', 'Det danske sprog tales af ca. seks millioner mennesker, hovedsageligt i Danmark, men også i Sydslesvig, på Færøerne og Grønland.[1]', 'Dansk er tæt beslægtet med norsk, svensk og islandsk, og sproghistorisk har dansk været stærkt påvirket af plattysk.'], 27 | results_word_tokenize = ['Dansk', 'er', 'et', 'østnordisk', 'sprog', 'indenfor', 'den', 'germanske', 'gren', 'af', 'den', 'indoeuropæiske', 'sprogfamilie', '.'], 28 | results_pos_tag = results_pos_tag, 29 | results_pos_tag_universal = results_pos_tag, 30 | results_lemmatize = ['dansk', 'være', 'en', 'østnordisk', 'sprog', 'indenfor', 'den', 'germansk', 'gren', 'af', 'den', 'indoeuropæisk', 'sprogfamilie', '.'], 31 | results_dependency_parse = [('Dansk', 'sprog', 'nsubj', 4), ('er', 'sprog', 'cop', 3), ('et', 'sprog', 'det', 2), ('østnordisk', 'sprog', 'amod', 1), ('sprog', 'sprog', 'root', 0), ('indenfor', 'gren', 'case', 3), ('den', 'gren', 'det', 2), ('germanske', 'gren', 'amod', 1), ('gren', 'sprog', 'nmod', -4), ('af', 'sprogfamilie', 'case', 3), ('den', 'sprogfamilie', 'det', 2), ('indoeuropæiske', 'sprogfamilie', 'amod', 1), ('sprogfamilie', 'gren', 'nmod', -4), ('.', 'sprog', 'punct', -9)] 32 | ) 33 | 34 | if __name__ == '__main__': 35 | test_stanza_dan() 36 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_eus.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Basque 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_eus(): 22 | results_pos_tag = [('Euskara', 'NOUN'), ('Euskal', 'PROPN'), ('Herriko', 'NOUN'), ('hizkuntza', 'NOUN'), ('da', 'AUX'), ('.', 'PUNCT'), ('[8]', 'PUNCT')] 23 | 24 | test_stanza.wl_test_stanza( 25 | lang = 'eus', 26 | results_sentence_tokenize = ['Euskara Euskal Herriko hizkuntza da.', '[8] Hizkuntza bakartua da, ez baitzaio ahaidetasunik aurkitu.', 'Morfologiari dagokionez, hizkuntza eranskari eta ergatiboa da.', 'Euskaraz mintzo direnei euskaldun deritze.', 'Gaur egun, Euskal Herrian bertan ere hizkuntza gutxitua da, lurralde horretan gaztelania eta frantsesa nagusitu baitira.'], 27 | results_word_tokenize = ['Euskara', 'Euskal', 'Herriko', 'hizkuntza', 'da', '.', '[8]'], 28 | results_pos_tag = results_pos_tag, 29 | results_pos_tag_universal = results_pos_tag, 30 | results_lemmatize = ['euskara', 'Euskal', 'herri', 'hizkuntza', 'izan', '.', '[8]'], 31 | results_dependency_parse = [('Euskara', 'hizkuntza', 'nsubj', 3), ('Euskal', 'Herriko', 'compound', 1), ('Herriko', 'hizkuntza', 'nmod', 1), ('hizkuntza', 'hizkuntza', 'root', 0), ('da', 'hizkuntza', 'cop', -1), ('.', 'hizkuntza', 'punct', -2), ('[8]', '[8]', 'root', 0)] 32 | ) 33 | 34 | if __name__ == '__main__': 35 | test_stanza_eus() 36 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_fao.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Faroese 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_fao(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'fao', 24 | results_sentence_tokenize = ['Føroyskt er høvuðsmálið í Føroyum.', 'Føroyskt er almenna málið í Føroyum, og tað er tjóðarmál føroyinga.', 'Harafturat verður nógv føroyskt tosað í Danmark og Íslandi.', 'Í Føroyum tosa 48.', '000 fólk føroyskt, í Danmark umleið 25.', '000 og í Íslandi umleið 5.000, so samlaða talið av fólkum, ið duga føroyskt liggur um 75-80.', '000.', 'Føroyskt er tí í altjóða høpi eitt lítið mál.', 'Føroyskt mál hevur fýra føll og trý kyn, og grammatiski málbygningurin líkist ógvuliga nógv íslendskum, meðan orðatilfarið og í summum lutum úttalan líkist norska landsmálinum.'], 25 | results_word_tokenize = ['Føroyskt', 'er', 'høvuðsmálið', 'í', 'Føroyum', '.'], 26 | results_pos_tag = [('Føroyskt', 'ADJ-N'), ('er', 'BEPI'), ('høvuðsmálið', 'N-N'), ('í', 'P'), ('Føroyum', 'N-D'), ('.', '.')], 27 | results_pos_tag_universal = [('Føroyskt', 'ADJ'), ('er', 'AUX'), ('høvuðsmálið', 'NOUN'), ('í', 'ADP'), ('Føroyum', 'NOUN'), ('.', 'PUNCT')], 28 | results_dependency_parse = [('Føroyskt', 'Føroyskt', 'root', 0), ('er', 'Føroyskt', 'cop', -1), ('høvuðsmálið', 'Føroyskt', 'nsubj', -2), ('í', 'Føroyum', 'case', 1), ('Føroyum', 'Føroyskt', 'obl', -4), ('.', 'Føroyum', 'punct', -1)] 29 | ) 30 | 31 | if __name__ == '__main__': 32 | test_stanza_fao() 33 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_fro.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - French (Old) 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_fro(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'fro', 24 | results_sentence_tokenize = ["Si l'orrat Carles, ki est as porz passant. Je vos plevis, ja returnerunt Franc."], 25 | results_word_tokenize = ['Si', "l'", 'orrat', 'Carles', ',', 'ki', 'est', 'as', 'porz', 'passant', '.'], 26 | results_pos_tag = [('Si', 'ADVgen'), ("l'", 'PROper'), ('orrat', 'VERcjg'), ('Carles', 'NOMpro'), (',', 'PONfbl'), ('ki', 'PROrel'), ('est', 'VERcjg'), ('as', 'PRE.DETdef'), ('porz', 'NOMcom'), ('passant', 'VERppa'), ('.', 'PONfrt')], 27 | results_pos_tag_universal = [('Si', 'ADV'), ("l'", 'PRON'), ('orrat', 'VERB'), ('Carles', 'PROPN'), (',', 'PUNCT'), ('ki', 'PRON'), ('est', 'AUX'), ('as', 'ADP'), ('porz', 'NOUN'), ('passant', 'VERB'), ('.', 'PUNCT')], 28 | results_lemmatize = ['si', "l'", 'orrat', 'Carles', ',', 'ki', 'est', 'as', 'porz', 'passant', '.'], 29 | results_dependency_parse = [('Si', 'orrat', 'advmod', 2), ("l'", 'orrat', 'obj', 1), ('orrat', 'orrat', 'root', 0), ('Carles', 'orrat', 'nsubj', -1), (',', 'Carles', 'punct', -1), ('ki', 'passant', 'nsubj', 4), ('est', 'passant', 'aux', 3), ('as', 'porz', 'case:det', 1), ('porz', 'passant', 'obl', 1), ('passant', 'Carles', 'acl:relcl', -6), ('.', 'orrat', 'punct', -8)] 30 | ) 31 | 32 | if __name__ == '__main__': 33 | test_stanza_fro() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_gla.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Scottish Gaelic 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_gla(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'gla', 24 | results_sentence_tokenize = ["'S i cànan dùthchasach na h-Alba a th' anns a' Ghàidhlig.", "'S i ball den teaghlach de chànanan Ceilteach dhen mheur Ghoidhealach a tha anns a' Ghàidhlig.", 'Tha Goidhealach a\' gabhail a-steach na cànanan Gàidhealach gu lèir; Gàidhlig na h-Èireann, Gàidhlig Mhanainn, agus Gàidhlig agus gu dearbh chan eil anns an fhacal "Goidhealach" ach seann fhacal a tha a\' ciallachadh "Gàidhealach".'], 25 | results_word_tokenize = ["'S", 'i', 'cànan', 'dùthchasach', 'na', 'h-Alba', 'a', "th'", 'anns', "a'", 'Ghàidhlig', '.'], 26 | results_pos_tag = [("'S", 'Wp-i'), ('i', 'Pp3sf'), ('cànan', 'Ncsmn'), ('dùthchasach', 'Aq-smn'), ('na', 'Tdsfg'), ('h-Alba', 'Nt'), ('a', 'Q-r'), ("th'", 'V-p'), ('anns', 'Sp'), ("a'", 'Tdsf'), ('Ghàidhlig', 'Ncsfd'), ('.', 'Fe')], 27 | results_pos_tag_universal = [("'S", 'AUX'), ('i', 'PRON'), ('cànan', 'NOUN'), ('dùthchasach', 'ADJ'), ('na', 'DET'), ('h-Alba', 'PROPN'), ('a', 'PART'), ("th'", 'VERB'), ('anns', 'ADP'), ("a'", 'DET'), ('Ghàidhlig', 'NOUN'), ('.', 'PUNCT')], 28 | results_lemmatize = ['is', 'i', 'cànan', 'dùthchasach', 'an', 'Alba', 'a', 'bi', 'an', 'an', 'gàidhlig', '.'], 29 | results_dependency_parse = [("'S", 'cànan', 'cop', 2), ('i', "'S", 'fixed', -1), ('cànan', 'cànan', 'root', 0), ('dùthchasach', 'cànan', 'amod', -1), ('na', 'h-Alba', 'det', 1), ('h-Alba', 'cànan', 'nmod', -3), ('a', "th'", 'nsubj', 1), ("th'", 'cànan', 'csubj:cleft', -5), ('anns', 'Ghàidhlig', 'case', 2), ("a'", 'Ghàidhlig', 'det', 1), ('Ghàidhlig', "th'", 'xcomp:pred', -3), ('.', 'cànan', 'punct', -9)] 30 | ) 31 | 32 | if __name__ == '__main__': 33 | test_stanza_gla() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_glv.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Manx 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_glv(): 22 | results_pos_tag = [('She', 'AUX'), ('Gaelg', 'PROPN'), ('(graït', 'NOUN'), (':', 'PUNCT'), ('/gɪlg/', 'NOUN'), (')', 'PUNCT'), ('çhengey', 'NOUN'), ('Ghaelagh', 'PROPN'), ('Vannin', 'PROPN'), ('.', 'PUNCT')] 23 | 24 | test_stanza.wl_test_stanza( 25 | lang = 'glv', 26 | results_sentence_tokenize = ['She Gaelg (graït: /gɪlg/) çhengey Ghaelagh Vannin.', "Haink y Ghaelg woish Shenn-Yernish, as t'ee cosoylagh rish Yernish as Gaelg ny h-Albey."], 27 | results_word_tokenize = ['She', 'Gaelg', '(graït', ':', '/gɪlg/', ')', 'çhengey', 'Ghaelagh', 'Vannin', '.'], 28 | results_pos_tag = results_pos_tag, 29 | results_pos_tag_universal = results_pos_tag, 30 | results_lemmatize = ['she', 'Gaelg', 'ben', ':', '/gɪlg/', ')', 'çhengey', 'Gaelagh', 'Mannin', '.'], 31 | results_dependency_parse = [('She', 'Gaelg', 'cop', 1), ('Gaelg', 'Gaelg', 'root', 0), ('(graït', 'Gaelg', 'nmod', -1), (':', '/gɪlg/', 'punct', 1), ('/gɪlg/', 'Gaelg', 'appos', -3), (')', '/gɪlg/', 'punct', -1), ('çhengey', 'Gaelg', 'parataxis', -5), ('Ghaelagh', 'çhengey', 'nmod', -1), ('Vannin', 'Ghaelagh', 'nmod', -1), ('.', 'Gaelg', 'punct', -8)] 32 | ) 33 | 34 | if __name__ == '__main__': 35 | test_stanza_glv() 36 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_kat.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Georgian 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_kat(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'kat', 24 | results_sentence_tokenize = ['ქართული ენა — ქართველურ ენათა ოჯახის ენა.', 'ქართველების მშობლიური ენა, საქართველოს სახელმწიფო ენა (აფხაზეთის ავტონომიურ რესპუბლიკაში, მასთან ერთად სახელმწიფო ენად აღიარებულია აფხაზური ენა).', 'ქართულ ენაზე 5 მილიონზე მეტი ადამიანი ლაპარაკობს.'], 25 | results_word_tokenize = ['ქართული', 'ენა', '—', 'ქართველურ', 'ენათა', 'ოჯახის', 'ენა', '.'], 26 | results_pos_tag = [('ქართული', 'Adj'), ('ენა', 'Noun'), ('—', 'F'), ('ქართველურ', 'Adj'), ('ენათა', 'Noun'), ('ოჯახის', 'Noun'), ('ენა', 'Noun'), ('.', 'F')], 27 | results_pos_tag_universal = [('ქართული', 'ADJ'), ('ენა', 'NOUN'), ('—', 'PUNCT'), ('ქართველურ', 'ADJ'), ('ენათა', 'NOUN'), ('ოჯახის', 'NOUN'), ('ენა', 'NOUN'), ('.', 'PUNCT')], 28 | results_lemmatize = ['ქართული', 'ენა', '—', 'ქართველური', 'ენა', 'ოჯახი', 'ენა', '.'], 29 | results_dependency_parse = [('ქართული', 'ენა', 'amod', 1), ('ენა', 'ენა', 'nsubj', 5), ('—', 'ენა', 'punct', 4), ('ქართველურ', 'ენათა', 'amod', 1), ('ენათა', 'ოჯახის', 'nmod', 1), ('ოჯახის', 'ენა', 'nmod', 1), ('ენა', 'ენა', 'root', 0), ('.', 'ენა', 'punct', -1)] 30 | ) 31 | 32 | if __name__ == '__main__': 33 | test_stanza_kat() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_kor.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Korean 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_kor(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'kor', 24 | results_sentence_tokenize = ['세계 여러 지역에 한민족 인구가 거주하게 되면서 전 세계 각지에서 한국어가 사용 되고 있다.', '2016년 1월 초 기준으로 한국어 사용 인구는 약 8,000만 명으로 추산된다.', '[1]'], 25 | results_word_tokenize = ['세계', '여러', '지역에', '한민족', '인구가', '거주하게', '되면서', '전', '세계', '각지에서', '한국어가', '사용', '되고', '있다', '.'], 26 | results_pos_tag = [('세계', 'ncn'), ('여러', 'mma'), ('지역에', 'ncn+jca'), ('한민족', 'ncn'), ('인구가', 'ncn+jcs'), ('거주하게', 'ncpa+xsv+ecx'), ('되면서', 'px+ecc'), ('전', 'mma'), ('세계', 'ncn'), ('각지에서', 'ncn+jca'), ('한국어가', 'nq+jcs'), ('사용', 'ncpa'), ('되고', 'pvg+ecx'), ('있다', 'px+ef'), ('.', 'sf')], 27 | results_pos_tag_universal = [('세계', 'NOUN'), ('여러', 'ADJ'), ('지역에', 'ADV'), ('한민족', 'NOUN'), ('인구가', 'NOUN'), ('거주하게', 'VERB'), ('되면서', 'CCONJ'), ('전', 'ADJ'), ('세계', 'NOUN'), ('각지에서', 'ADV'), ('한국어가', 'PROPN'), ('사용', 'NOUN'), ('되고', 'VERB'), ('있다', 'AUX'), ('.', 'PUNCT')], 28 | results_lemmatize = ['세계', '여러', '지역+에', '한민족', '인구+가', '거주+하+게', '되+면서', '전', '세계', '각지+에서', '한국어+가', '사용', '되+고', '있', '.'], 29 | results_dependency_parse = [('세계', '지역에', 'compound', 2), ('여러', '지역에', 'amod', 1), ('지역에', '거주하게', 'obl', 3), ('한민족', '인구가', 'compound', 1), ('인구가', '거주하게', 'nsubj', 1), ('거주하게', '거주하게', 'root', 0), ('되면서', '거주하게', 'cc', -1), ('전', '세계', 'amod', 1), ('세계', '각지에서', 'compound', 1), ('각지에서', '되고', 'advcl', 3), ('한국어가', '되고', 'nsubj', 2), ('사용', '되고', 'dep', 1), ('되고', '거주하게', 'conj', -7), ('있다', '되고', 'aux', -1), ('.', '있다', 'punct', -1)] 30 | ) 31 | 32 | if __name__ == '__main__': 33 | test_stanza_kor() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_kpv.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Komi (Zyrian) 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_kpv(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'kpv', 24 | results_sentence_tokenize = ['Коми кыв — финн-йӧгра кывъясысь ӧти, коми войтырлӧн чужан кыв.', 'Коми кывйын кызь гӧгӧр сёрнисикас да кык гижӧда кыв: зырян коми да перым коми.', 'Коми кыв — Коми Республикаын каналан кыв (кыдзи и роч кыв).', 'Комиӧн сёрнитӧны Коми Республикаса вужвойтыр — комияс (зыряна, матӧ 156 сюрс морт).', 'Лунвылынджык, Перым Коми кытшын, перым комияслӧн (пермякъяслӧн, матӧ 63 сюрс морт) сӧвмӧ ас гижӧд кыв.', 'Комиясыд и сэні вужвойтыр.'], 25 | results_word_tokenize = ['Коми', 'кыв', '—', 'финн-йӧгра', 'кывъясысь', 'ӧти', ',', 'коми', 'войтырлӧн', 'чужан', 'кыв', '.'], 26 | results_pos_tag = [('Коми', 'N'), ('кыв', 'N'), ('—', 'PUNCT'), ('финн-йӧгра', 'Adv'), ('кывъясысь', 'N'), ('ӧти', 'Num'), (',', 'CLB'), ('коми', 'N'), ('войтырлӧн', 'N'), ('чужан', 'V'), ('кыв', 'N'), ('.', 'CLB')], 27 | results_pos_tag_universal = [('Коми', 'NOUN'), ('кыв', 'NOUN'), ('—', 'PUNCT'), ('финн-йӧгра', 'ADV'), ('кывъясысь', 'NOUN'), ('ӧти', 'NUM'), (',', 'PUNCT'), ('коми', 'NOUN'), ('войтырлӧн', 'NOUN'), ('чужан', 'VERB'), ('кыв', 'NOUN'), ('.', 'PUNCT')], 28 | results_lemmatize = ['коми', 'кыв', '—', 'финн-йӧгра', 'кыв', 'ӧти', ',', 'коми', 'войтыр', 'чужан', 'кыв', '.'], 29 | results_dependency_parse = [('Коми', 'кыв', 'obl', 1), ('кыв', 'кыв', 'root', 0), ('—', 'кывъясысь', 'punct', 2), ('финн-йӧгра', 'кывъясысь', 'advmod', 1), ('кывъясысь', 'кыв', 'appos', -3), ('ӧти', 'кывъясысь', 'nummod', -1), (',', 'кыв', 'punct', 4), ('коми', 'войтырлӧн', 'nmod', 1), ('войтырлӧн', 'чужан', 'obl:lmod', 1), ('чужан', 'кыв', 'acl', 1), ('кыв', 'кыв', 'nsubj:cop', -9), ('.', 'кыв', 'punct', -10)] 30 | ) 31 | 32 | if __name__ == '__main__': 33 | test_stanza_kpv() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_mar.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Marathi 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_mar(): 22 | results_pos_tag = [('मराठी', 'ADJ'), ('भाषा', 'NOUN'), ('ही', 'PART'), ('इंडो', 'ADJ'), ('-', 'PUNCT'), ('युरोपीय', 'ADJ'), ('भाषाकुळातील', 'NOUN'), ('एक', 'DET'), ('भाषा', 'NOUN'), ('आहे', 'AUX'), ('.', 'PUNCT')] 23 | 24 | test_stanza.wl_test_stanza( 25 | lang = 'mar', 26 | results_sentence_tokenize = ['मराठी भाषा ही इंडो-युरोपीय भाषाकुळातील एक भाषा आहे.', 'मराठी ही भारताच्या २२ अधिकृत भाषांपैकी एक आहे.', 'मराठी महाराष्ट्र राज्याची अधिकृत तर गोवा राज्याची सहअधिकृत भाषा आहे.', '२०११ च्या जनगणनेनुसार, भारतात मराठी भाषकांची एकूण लोकसंख्या सुमारे १४ कोटी आहे.', 'मराठी मातृभाषा असणाऱ्या लोकांच्या संख्येनुसार मराठी ही जगातील दहावी व भारतातील तिसरी भाषा आहे.', 'मराठी भाषा भारताच्या प्राचीन भाषांपैकी एक असून महाराष्ट्री प्राकृतचे आधुनिक रूप आहे.', 'मराठीचे वय सुमारे २४०० वर्ष आहे.', 'महाराष्ट्र हे मराठी भाषिकांचे राज्य म्हणून मराठी भाषेला वेगळे महत्त्व प्राप्त झालेले आहे.', 'आजतागायत मराठी भाषेतून अनेक श्रेष्ठ साहित्यकृती निर्माण झालेल्या आहेत आणि त्यात सातत्यपूर्ण रीतीने भर पडत आहे.', 'गोवा, गुजरात सारख्या राज्यातही मराठी भाषा काही प्रमाणात बोलली जाते.', 'गोव्यात मराठीला समृद्ध असा इतिहास आहे.', '[१]'], 27 | results_word_tokenize = ['मराठी', 'भाषा', 'ही', 'इंडो', '-', 'युरोपीय', 'भाषाकुळातील', 'एक', 'भाषा', 'आहे', '.'], 28 | results_pos_tag = results_pos_tag, 29 | results_pos_tag_universal = results_pos_tag, 30 | results_lemmatize = ['मराठी', 'भाष', 'ही', 'इंडो', '-', 'युरोपीय', 'भाषाकुळळत', 'एक', 'भाष', 'असणे', '.'], 31 | results_dependency_parse = [('मराठी', 'भाषा', 'amod', 1), ('भाषा', 'भाषा', 'obl', 7), ('ही', 'भाषा', 'discourse', -1), ('इंडो', 'भाषाकुळातील', 'amod', 3), ('-', 'इंडो', 'punct', -1), ('युरोपीय', 'भाषाकुळातील', 'amod', 1), ('भाषाकुळातील', 'भाषा', 'obl', 2), ('एक', 'भाषा', 'det', 1), ('भाषा', 'भाषा', 'root', 0), ('आहे', 'भाषा', 'cop', -1), ('.', 'भाषा', 'punct', -2)], 32 | results_sentiment_analayze = [0] 33 | ) 34 | 35 | if __name__ == '__main__': 36 | test_stanza_mar() 37 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_mya.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Burmese 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_mya(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'mya', 24 | results_sentence_tokenize = ['မြန်မာဘာသာစကား (အင်္ဂလိပ်: Myanmar Language)သည် မြန်မာနိုင်ငံ၏ ရုံးသုံး ဘာသာစကားဖြစ်သည်။', 'ဗမာလူမျိုးနှင့် ဗမာနွယ်ဝင်(ဓနု၊ အင်းသား၊ တောင်ရိုးနှင့် ယော)တို့၏ ဇာတိစကားဖြစ်သည်။', 'ဗမာလူမျိုးတို့သည် တိဘက်-ဗမာနွယ် ဘာသာစကားများ (Tibeto-Burman Languages) ပြောဆိုသည့် လူမျိုးနွယ်စုကြီးမှ အကြီးဆုံးသော လူမျိုးဖြစ်သည်။', 'လူဦးရေ ၃၈သန်းကျော်ခန့်သည် မြန်မာဘာသာစကားကို မိခင်ဘာသာစကား အနေဖြင့် သုံး၍ မြန်မာတိုင်းရင်သားများသည် ဒုတိယဘာသာစကား အနေဖြင့် သုံးသည်။'], 25 | results_word_tokenize = ['မြန်မာ', 'ဘာသာ', 'စကား', '(', 'အင်္ဂလိပ်', ':', 'Myanmar', 'Language)', 'သည်', 'မြန်မာ', 'နိုင်ငံ', '၏', 'ရုံးသုံး', 'ဘာသာ', 'စကား', 'ဖြစ်', 'သည်', '။'] 26 | ) 27 | 28 | if __name__ == '__main__': 29 | test_stanza_mya() 30 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_orv.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Russian (Old) 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_orv(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'orv', 24 | results_sentence_tokenize = ['шаибатъ же ѿ бедерѧ г҃ мсци', 'а ѿ дабылѧ до шаибата в҃ мсца', 'моремъ итьти'], 25 | results_word_tokenize = ['шаибатъ', 'же', 'ѿ', 'бедерѧ', 'г҃', 'мсци'], 26 | results_pos_tag = [('шаибатъ', 'Ne'), ('же', 'Df'), ('ѿ', 'R-'), ('бедерѧ', 'Ne'), ('г҃', 'Ma'), ('мсци', 'Nb')], 27 | results_pos_tag_universal = [('шаибатъ', 'PROPN'), ('же', 'ADV'), ('ѿ', 'ADP'), ('бедерѧ', 'PROPN'), ('г҃', 'NUM'), ('мсци', 'NOUN')], 28 | results_lemmatize = ['шаибатъ', 'же', 'отъ', 'бедерь', 'трие', 'мѣсяць'], 29 | results_dependency_parse = [('шаибатъ', 'шаибатъ', 'root', 0), ('же', 'шаибатъ', 'discourse', -1), ('ѿ', 'бедерѧ', 'case', 1), ('бедерѧ', 'шаибатъ', 'nmod', -3), ('г҃', 'мсци', 'nummod', 1), ('мсци', 'шаибатъ', 'orphan', -5)] 30 | ) 31 | 32 | if __name__ == '__main__': 33 | test_stanza_orv() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_pcm.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Nigerian Pidgin 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_pcm(): 22 | results_pos_tag = [('Naijá', 'PROPN'), ('na', 'AUX'), ('pijin,', 'VERB'), ('a', 'DET'), ('langwej', 'NOUN'), ('for', 'ADP'), ('oda', 'ADJ'), ('langwej.', 'NOUN')] 23 | 24 | test_stanza.wl_test_stanza( 25 | lang = 'pcm', 26 | results_sentence_tokenize = ['Naijá na pijin, a langwej for oda langwej. Naijá for Inglish an wey Afrikan langwej.'], 27 | results_word_tokenize = ['Naijá', 'na', 'pijin,', 'a', 'langwej', 'for', 'oda', 'langwej.'], 28 | results_pos_tag = results_pos_tag, 29 | results_pos_tag_universal = results_pos_tag, 30 | results_lemmatize = ['Naijá', 'na', 'pijin,', 'a', 'langwej', 'for', 'oder', 'langwej.'], 31 | results_dependency_parse = [('Naijá', 'pijin,', 'nsubj', 2), ('na', 'pijin,', 'cop', 1), ('pijin,', 'pijin,', 'root', 0), ('a', 'langwej', 'det', 1), ('langwej', 'pijin,', 'obj', -2), ('for', 'oda', 'case', 1), ('oda', 'pijin,', 'obl:arg', -4), ('langwej.', 'pijin,', 'dep', -5)] 32 | ) 33 | 34 | if __name__ == '__main__': 35 | test_stanza_pcm() 36 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_qpm.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Pomak 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_qpm(): 22 | results_pos_tag = [('Kážyjte', 'VERB'), ('nǽko', 'DET'), (',', 'PUNCT'), ('de', 'PART'), ('!', 'PUNCT')] 23 | 24 | test_stanza.wl_test_stanza( 25 | lang = 'qpm', 26 | results_sentence_tokenize = ['Kážyjte nǽko, de! Še go preskókneme!'], 27 | results_word_tokenize = ['Kážyjte', 'nǽko', ',', 'de', '!'], 28 | results_pos_tag = results_pos_tag, 29 | results_pos_tag_universal = results_pos_tag, 30 | results_lemmatize = ['kážom', 'nǽko', ',', 'de', '!'], 31 | results_dependency_parse = [('Kážyjte', 'Kážyjte', 'root', 0), ('nǽko', 'Kážyjte', 'det', -1), (',', 'de', 'punct', 1), ('de', 'Kážyjte', 'vocative', -3), ('!', 'Kážyjte', 'punct', -4)] 32 | ) 33 | 34 | if __name__ == '__main__': 35 | test_stanza_qpm() 36 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_san.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Sanskrit 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_san(): 22 | results_pos_tag = [('संस्कृतम्', 'NOUN'), ('जगतः', 'PRON'), ('एकतमा', 'NOUN'), ('अतिप्राचीना', 'NOUN'), ('समृद्धा', 'NOUN'), ('शास्त्रीया', 'NOUN'), ('च', 'ADJ'), ('भाषासु', 'NOUN'), ('वर्तते', 'NOUN'), ('।', 'NOUN')] 23 | 24 | test_stanza.wl_test_stanza( 25 | lang = 'san', 26 | results_sentence_tokenize = ['संस्कृतम् जगतः एकतमा', 'अतिप्राचीना समृद्धा शास्त्रीया', 'च भाषासु वर्तते। संस्कृतम् भारतस्य जगत: वा भाषासु एकतमा\u200c प्राचीनतमा। भारती, सुरभारती, अमरभारती, अमरवाणी, सुरवाणी, गीर्वाणवाणी, गीर्वाणी, देववाणी, देवभाषा, संस्कृतावाक्, दैवीवाक्, इत्यादिभिः नामभिः एतद्भाषा प्रसिद्धा', '।'], 27 | results_word_tokenize = ['संस्कृतम्', 'जगतः', 'एकतमा', 'अतिप्राचीना', 'समृद्धा', 'शास्त्रीया', 'च', 'भाषासु', 'वर्तते', '।'], 28 | results_pos_tag = results_pos_tag, 29 | results_pos_tag_universal = results_pos_tag, 30 | results_lemmatize = ['संस्कृतम्', 'जगतः', 'एकतमा', 'अतिप्राचीना', 'समृद्धा', 'शास्त्रीया', 'च', 'भाषासु', 'वर्तते', '।'], 31 | results_dependency_parse = [('संस्कृतम्', 'संस्कृतम्', 'root', 0), ('जगतः', 'एकतमा', 'nmod', 1), ('एकतमा', 'संस्कृतम्', 'vocative', -2), ('अतिप्राचीना', 'शास्त्रीया', 'nsubj', 2), ('समृद्धा', 'शास्त्रीया', 'nsubj', 1), ('शास्त्रीया', 'शास्त्रीया', 'root', 0), ('च', 'च', 'root', 0), ('भाषासु', 'च', 'nsubj', -1), ('वर्तते', 'च', 'conj', -2), ('।', '।', 'root', 0)] 32 | ) 33 | 34 | if __name__ == '__main__': 35 | test_stanza_san() 36 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_sme.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Sámi (Northern) 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_sme(): 22 | test_stanza.wl_test_stanza( 23 | lang = 'sme', 24 | results_sentence_tokenize = ['Davvisámegiella gullá sámegielaid oarjesámegielaid davvejovkui ovttas julev- ja bihtánsámegielain.', 'Eará oarjesámegielat leat ubmisámegiella ja lullisámegiella.'], 25 | results_word_tokenize = ['Davvisámegiella', 'gullá', 'sámegielaid', 'oarjesámegielaid', 'davvejovkui', 'ovttas', 'julev-', 'ja', 'bihtánsámegielain', '.'], 26 | results_pos_tag = [('Davvisámegiella', 'N'), ('gullá', 'V'), ('sámegielaid', 'N'), ('oarjesámegielaid', 'N'), ('davvejovkui', 'N'), ('ovttas', 'Adv'), ('julev-', 'N'), ('ja', 'CC'), ('bihtánsámegielain', 'N'), ('.', 'CLB')], 27 | results_pos_tag_universal = [('Davvisámegiella', 'NOUN'), ('gullá', 'VERB'), ('sámegielaid', 'NOUN'), ('oarjesámegielaid', 'NOUN'), ('davvejovkui', 'NOUN'), ('ovttas', 'ADV'), ('julev-', 'NOUN'), ('ja', 'CCONJ'), ('bihtánsámegielain', 'NOUN'), ('.', 'PUNCT')], 28 | results_lemmatize = ['davvisámegiella', 'gullat', 'sámegiella', 'oarjesámegiella', 'davvejoavku', 'ovttas', 'julle', 'ja', 'bihtánsámegiella', '.'], 29 | results_dependency_parse = [('Davvisámegiella', 'gullá', 'nsubj', 1), ('gullá', 'gullá', 'root', 0), ('sámegielaid', 'gullá', 'obj', -1), ('oarjesámegielaid', 'davvejovkui', 'nmod:poss', 1), ('davvejovkui', 'gullá', 'obl', -3), ('ovttas', 'gullá', 'advmod', -4), ('julev-', 'gullá', 'obl', -5), ('ja', 'julev-', 'cc', -1), ('bihtánsámegielain', 'julev-', 'conj', -2), ('.', 'gullá', 'punct', -8)] 30 | ) 31 | 32 | if __name__ == '__main__': 33 | test_stanza_sme() 34 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_tel.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Telugu 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_tel(): 22 | results_pos_tag = [('తెలుగు', 'PROPN'), ('అనేది', 'PRON'), ('ద్రావిడ', 'PROPN'), ('భాషల', 'NOUN'), ('కుటుంబానికి', 'NOUN'), ('చెందిన', 'VERB'), ('భాష', 'NOUN'), ('.', 'PUNCT')] 23 | 24 | test_stanza.wl_test_stanza( 25 | lang = 'tel', 26 | results_sentence_tokenize = ['తెలుగు అనేది ద్రావిడ భాషల కుటుంబానికి చెందిన భాష.', 'దీనిని మాట్లాడే ప్రజలు ప్రధానంగా ఆంధ్ర, తెలంగాణాలో ఉన్నారు.', 'ఇది ఆ రాష్ట్రాలలో అధికార భాష.', 'భారతదేశంలో ఒకటి', 'కంటే ఎక్కువ రాష్ట్రాల్లో ప్రాథమిక అధికారిక భాషా హోదా కలిగిన కొద్ది భాషలలో హిందీ, బెంగాలీలతో పాటు ఇది కూడా ఉంది.', '[5][6] పుదుచ్చేరిలోని యానం జిల్లాలో తెలుగు అధికారిక భాష.', 'ఒడిశా, కర్ణాటక, తమిళనాడు, కేరళ, పంజాబ్, ఛత్తీస్\u200cగఢ్, మహారాష్ట్ర, అండమాన్ నికోబార్ దీవులలో గుర్తింపబడిన అల్పసంఖ్యాక భాష.', 'దేశ ప్రభుత్వం భారతదేశ ప్రాచీన భాషగా గుర్తించిన ఆరు భాషలలో ఇది ఒకటి.', '[7][8]'], 27 | results_word_tokenize = ['తెలుగు', 'అనేది', 'ద్రావిడ', 'భాషల', 'కుటుంబానికి', 'చెందిన', 'భాష', '.'], 28 | results_pos_tag = results_pos_tag, 29 | results_pos_tag_universal = results_pos_tag, 30 | results_dependency_parse = [('తెలుగు', 'అనేది', 'compound', 1), ('అనేది', 'చెందిన', 'nsubj', 4), ('ద్రావిడ', 'అనేది', 'nmod', -1), ('భాషల', 'చెందిన', 'obl', 2), ('కుటుంబానికి', 'చెందిన', 'obl', 1), ('చెందిన', 'భాష', 'acl', 1), ('భాష', 'భాష', 'root', 0), ('.', 'భాష', 'punct', -1)] 31 | ) 32 | 33 | if __name__ == '__main__': 34 | test_stanza_tel() 35 | -------------------------------------------------------------------------------- /tests/tests_nlp/tests_stanza/test_stanza_zho_cn.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: NLP - Stanza - Chinese (Simplified) 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests.tests_nlp.tests_stanza import test_stanza 20 | 21 | def test_stanza_zho_cn(): 22 | results_word_tokenize = ['汉', '语', '又', '称', '华', '语', '[', '6][7', ']', ',', '是', '来', '自', '汉', '民族', '的', '语言', '[', '8][7]', '[9', ']', '。'] 23 | 24 | test_stanza.wl_test_stanza( 25 | lang = 'zho_cn', 26 | results_sentence_tokenize = ['汉语又称华语[6][7],是来自汉民族的语言[8][7][9]。', '汉语是汉藏语系中最大的一支语族,若把整个汉语族视为单一语言,则汉语为世界上母语使用者人数最多的语言,目前全世界有五分之一人口将其作为母语或第二语言。'], 27 | results_word_tokenize = results_word_tokenize, 28 | results_pos_tag = [('汉', 'NNP'), ('语', 'SFN'), ('又', 'RB'), ('称', 'VV'), ('华', 'NNP'), ('语', 'SFN'), ('[', '('), ('6][7', 'CD'), (']', ')'), (',', ','), ('是', 'VC'), ('来', 'VV'), ('自', 'VV'), ('汉', 'NNP'), ('民族', 'NN'), ('的', 'DEC'), ('语言', 'NN'), ('[', '('), ('8][7]', 'CD'), ('[9', 'CD'), (']', ')'), ('。', '.')], 29 | results_pos_tag_universal = [('汉', 'PROPN'), ('语', 'PART'), ('又', 'SCONJ'), ('称', 'VERB'), ('华', 'PROPN'), ('语', 'PART'), ('[', 'PUNCT'), ('6][7', 'NUM'), (']', 'PUNCT'), (',', 'PUNCT'), ('是', 'AUX'), ('来', 'VERB'), ('自', 'VERB'), ('汉', 'PROPN'), ('民族', 'NOUN'), ('的', 'SCONJ'), ('语言', 'NOUN'), ('[', 'PUNCT'), ('8][7]', 'NUM'), ('[9', 'NUM'), (']', 'PUNCT'), ('。', 'PUNCT')], 30 | results_lemmatize = results_word_tokenize, 31 | results_dependency_parse = [('汉', '语', 'compound', 1), ('语', '语言', 'nsubj', 15), ('又', '称', 'mark', 1), ('称', '语言', 'acl', 13), ('华', '语', 'compound', 1), ('语', '称', 'obj', -2), ('[', '6][7', 'punct', 1), ('6][7', '语', 'appos', -2), (']', '6][7', 'punct', -1), (',', '称', 'punct', -6), ('是', '语言', 'cop', 6), ('来', '语言', 'acl:relcl', 5), ('自', '来', 'mark', -1), ('汉', '民族', 'nmod', 1), ('民族', '来', 'obj', -3), ('的', '来', 'mark:rel', -4), ('语言', '语言', 'root', 0), ('[', '[9', 'punct', 2), ('8][7]', '[9', 'nummod', 1), ('[9', '语言', 'appos', -3), (']', '[9', 'punct', -1), ('。', '语言', 'punct', -5)], 32 | results_sentiment_analayze = [0] 33 | ) 34 | 35 | if __name__ == '__main__': 36 | test_stanza_zho_cn() 37 | -------------------------------------------------------------------------------- /tests/tests_results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_results/__init__.py -------------------------------------------------------------------------------- /tests/tests_results/test_results_search.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Results - Search in results 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_results import wl_results_search 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_wl_dialog_results_search(): 25 | table = wl_test_init.Wl_Test_Table(main, tab = 'dependency_parser') 26 | table.settings['file_area']['files_open'] = [{'selected': True, 'lang': 'test'}] 27 | 28 | dialog_results_search = wl_results_search.Wl_Dialog_Results_Search( 29 | main, 30 | table = table 31 | ) 32 | 33 | dialog_results_search.load_settings(defaults = True) 34 | dialog_results_search.load_settings(defaults = False) 35 | 36 | dialog_results_search.line_edit_search_term.setText('') 37 | dialog_results_search.search_settings_changed() 38 | dialog_results_search.line_edit_search_term.setText('test') 39 | dialog_results_search.search_settings_changed() 40 | 41 | dialog_results_search.table_item_changed() 42 | 43 | dialog_results_search.find_next() 44 | dialog_results_search.find_prev() 45 | dialog_results_search.find_all() 46 | dialog_results_search.update_gui('') 47 | dialog_results_search.clr_highlights() 48 | dialog_results_search.clr_history() 49 | 50 | if __name__ == '__main__': 51 | test_wl_dialog_results_search() 52 | -------------------------------------------------------------------------------- /tests/tests_settings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_settings/__init__.py -------------------------------------------------------------------------------- /tests/tests_settings/test_settings.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Settings 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_wl_settings(): 25 | settings = wl_settings.Wl_Settings(main) 26 | settings.open() 27 | settings.selection_changed(None, None) 28 | settings.load_settings() 29 | settings.load_settings(defaults = True) 30 | settings.validate_settings() 31 | settings.save_settings() 32 | settings.apply_settings() 33 | 34 | def test_wl_settings_node(): 35 | settings_node = wl_settings.Wl_Settings_Node(main) 36 | settings_node.validate_settings() 37 | settings_node.apply_settings() 38 | 39 | if __name__ == '__main__': 40 | test_wl_settings() 41 | test_wl_settings_node() 42 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_default.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Default settings 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_default 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_settings_default(): 25 | assert wl_settings_default.init_settings_default(main) 26 | 27 | # Check for invalid conversion of universal POS tags into content/function words 28 | for mappings in main.settings_default['pos_tagging']['tagsets']['mapping_settings'].values(): 29 | for mapping in mappings.values(): 30 | assert all(len(pos_mapping) == 5 for pos_mapping in mapping) 31 | 32 | if __name__ == '__main__': 33 | test_settings_default() 34 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_dependency_parsing.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Dependency Parsing 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_dependency_parsing 21 | 22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast') 23 | 24 | def test_wl_settings_dependency_parsing(): 25 | settings_dependency_parsing = wl_settings_dependency_parsing.Wl_Settings_Dependency_Parsing(main) 26 | settings_dependency_parsing.load_settings() 27 | settings_dependency_parsing.load_settings(defaults = True) 28 | settings_dependency_parsing.apply_settings() 29 | 30 | settings_dependency_parsing.preview_changed() 31 | settings_dependency_parsing.update_gui('test') 32 | settings_dependency_parsing.update_gui_err() 33 | 34 | def test_wl_dialog_preview_settings(): 35 | dialog_preview_settings = wl_settings_dependency_parsing.Wl_Dialog_Preview_Settings(main) 36 | dialog_preview_settings.open() 37 | dialog_preview_settings.load_settings() 38 | dialog_preview_settings.save_settings() 39 | 40 | if __name__ == '__main__': 41 | test_wl_settings_dependency_parsing() 42 | test_wl_dialog_preview_settings() 43 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_figs.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Figures 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_figs 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_wl_settings_figs_line_charts(): 25 | settings_figs_line_charts = wl_settings_figs.Wl_Settings_Figs_Line_Charts(main) 26 | settings_figs_line_charts.change_fonts() 27 | settings_figs_line_charts.load_settings() 28 | settings_figs_line_charts.load_settings(defaults = True) 29 | settings_figs_line_charts.apply_settings() 30 | 31 | def test_wl_settings_figs_word_clouds(): 32 | settings_figs_word_clouds = wl_settings_figs.Wl_Settings_Figs_Word_Clouds(main) 33 | settings_figs_word_clouds.font_settings_changed() 34 | settings_figs_word_clouds.load_settings() 35 | settings_figs_word_clouds.load_settings(defaults = True) 36 | settings_figs_word_clouds.validate_settings() 37 | settings_figs_word_clouds.apply_settings() 38 | 39 | def test_wl_settings_figs_network_graphs(): 40 | settings_figs_network_graphs = wl_settings_figs.Wl_Settings_Figs_Network_Graphs(main) 41 | settings_figs_network_graphs.load_settings() 42 | settings_figs_network_graphs.load_settings(defaults = True) 43 | settings_figs_network_graphs.apply_settings() 44 | 45 | if __name__ == '__main__': 46 | test_wl_settings_figs_line_charts() 47 | test_wl_settings_figs_word_clouds() 48 | test_wl_settings_figs_network_graphs() 49 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_files.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Files 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_files 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_wl_settings_files(): 25 | settings_files = wl_settings_files.Wl_Settings_Files(main) 26 | settings_files.load_settings() 27 | settings_files.load_settings(defaults = True) 28 | settings_files.apply_settings() 29 | 30 | def test_wl_settings_files_tags(): 31 | settings_files_tags = wl_settings_files.Wl_Settings_Files_Tags(main) 32 | settings_files_tags.load_settings() 33 | settings_files_tags.load_settings(defaults = True) 34 | settings_files_tags.apply_settings() 35 | 36 | def test_wl_table_tags(): 37 | table_tags = wl_settings_files.Wl_Table_Tags( 38 | main, 39 | settings_tags = 'header_tag_settings', 40 | defaults_row = ['Nonembedded', 'Header', '', ''] 41 | ) 42 | 43 | table_tags.item_changed() 44 | table_tags._add_row() 45 | table_tags.reset_table() 46 | table_tags.get_tags() 47 | 48 | def test_wl_table_tags_header(): 49 | table_tags_header = wl_settings_files.Wl_Table_Tags_Header(main) 50 | table_tags_header.item_changed(table_tags_header.model().item(0, 0)) 51 | 52 | def test_wl_table_tags_body(): 53 | table_tags_body = wl_settings_files.Wl_Table_Tags_Body(main) 54 | table_tags_body.item_changed(table_tags_body.model().item(0, 0)) 55 | 56 | def test_wl_table_tags_xml(): 57 | table_tags_xml = wl_settings_files.Wl_Table_Tags_Xml(main) 58 | table_tags_xml.item_changed(table_tags_xml.model().item(0, 0)) 59 | 60 | if __name__ == '__main__': 61 | test_wl_settings_files() 62 | test_wl_settings_files_tags() 63 | 64 | test_wl_table_tags() 65 | test_wl_table_tags_header() 66 | test_wl_table_tags_body() 67 | test_wl_table_tags_xml() 68 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_general.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - General 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_general 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_wl_settings_general(): 25 | settings_general = wl_settings_general.Wl_Settings_General(main) 26 | settings_general.proxy_settings_changed() 27 | settings_general.load_settings() 28 | settings_general.load_settings(defaults = True) 29 | settings_general.apply_settings() 30 | 31 | def test_wl_settings_general_imp(): 32 | settings_general_imp = wl_settings_general.Wl_Settings_General_Imp(main) 33 | settings_general_imp.detect_encodings_changed() 34 | settings_general_imp.check_path('files') 35 | 36 | settings_general_imp.load_settings() 37 | settings_general_imp.load_settings(defaults = True) 38 | settings_general_imp.validate_settings() 39 | settings_general_imp.apply_settings() 40 | 41 | def test_wl_settings_general_exp(): 42 | settings_general_exp = wl_settings_general.Wl_Settings_General_Exp(main) 43 | settings_general_exp.tables_default_type_changed() 44 | settings_general_exp.check_path('tables') 45 | 46 | settings_general_exp.load_settings() 47 | settings_general_exp.load_settings(defaults = True) 48 | settings_general_exp.validate_settings() 49 | settings_general_exp.apply_settings() 50 | 51 | if __name__ == '__main__': 52 | test_wl_settings_general() 53 | test_wl_settings_general_imp() 54 | test_wl_settings_general_exp() 55 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_lemmatization.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Lemmatization 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_lemmatization 21 | 22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast') 23 | 24 | def test_wl_settings_lemmatization(): 25 | settings_lemmatization = wl_settings_lemmatization.Wl_Settings_Lemmatization(main) 26 | settings_lemmatization.load_settings() 27 | settings_lemmatization.load_settings(defaults = True) 28 | settings_lemmatization.apply_settings() 29 | 30 | settings_lemmatization.preview_changed() 31 | settings_lemmatization.update_gui('test') 32 | settings_lemmatization.update_gui_err() 33 | 34 | if __name__ == '__main__': 35 | test_wl_settings_lemmatization() 36 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_sentence_tokenization.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Sentence Tokenization 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_sentence_tokenization 21 | 22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast') 23 | 24 | def test_wl_settings_sentence_tokenization(): 25 | settings_sentence_tokenization = wl_settings_sentence_tokenization.Wl_Settings_Sentence_Tokenization(main) 26 | settings_sentence_tokenization.load_settings() 27 | settings_sentence_tokenization.load_settings(defaults = True) 28 | settings_sentence_tokenization.apply_settings() 29 | 30 | settings_sentence_tokenization.preview_changed() 31 | settings_sentence_tokenization.update_gui('test') 32 | settings_sentence_tokenization.update_gui_err() 33 | 34 | if __name__ == '__main__': 35 | test_wl_settings_sentence_tokenization() 36 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_sentiment_analysis.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Sentiment Analysis 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_sentiment_analysis 21 | 22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast') 23 | 24 | def test_wl_settings_sentiment_analysis(): 25 | settings_sentiment_analysis = wl_settings_sentiment_analysis.Wl_Settings_Sentiment_Analysis(main) 26 | settings_sentiment_analysis.load_settings() 27 | settings_sentiment_analysis.load_settings(defaults = True) 28 | settings_sentiment_analysis.apply_settings() 29 | 30 | settings_sentiment_analysis.preview_changed() 31 | settings_sentiment_analysis.update_gui(0.123456789) 32 | settings_sentiment_analysis.update_gui_err() 33 | 34 | if __name__ == '__main__': 35 | test_wl_settings_sentiment_analysis() 36 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_stop_word_lists.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Stop Word Lists 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_stop_word_lists 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_wl_settings_stop_word_lists(): 25 | settings_stop_word_lists = wl_settings_stop_word_lists.Wl_Settings_Stop_Word_Lists(main) 26 | settings_stop_word_lists.load_settings() 27 | settings_stop_word_lists.load_settings(defaults = True) 28 | settings_stop_word_lists.apply_settings() 29 | 30 | settings_stop_word_lists.stop_word_list_changed(settings_stop_word_lists.table_stop_word_lists.model().item(0, 0)) 31 | settings_stop_word_lists.preview_settings_changed() 32 | settings_stop_word_lists.preview_results_changed() 33 | 34 | if __name__ == '__main__': 35 | test_wl_settings_stop_word_lists() 36 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_syl_tokenization.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Syllable Tokenization 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_syl_tokenization 21 | 22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast') 23 | 24 | def test_wl_settings_syl_tokenization(): 25 | settings_syl_tokenization = wl_settings_syl_tokenization.Wl_Settings_Syl_Tokenization(main) 26 | settings_syl_tokenization.load_settings() 27 | settings_syl_tokenization.load_settings(defaults = True) 28 | settings_syl_tokenization.apply_settings() 29 | 30 | settings_syl_tokenization.preview_changed() 31 | settings_syl_tokenization.update_gui('test') 32 | settings_syl_tokenization.update_gui_err() 33 | 34 | if __name__ == '__main__': 35 | test_wl_settings_syl_tokenization() 36 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_tables.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Tables 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_tables 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_wl_settings_tables(): 25 | settings_tables = wl_settings_tables.Wl_Settings_Tables(main) 26 | settings_tables.load_settings() 27 | settings_tables.load_settings(defaults = True) 28 | settings_tables.apply_settings() 29 | 30 | def test_wl_settings_tables_concordancer(): 31 | settings_tables_concordancer = wl_settings_tables.Wl_Settings_Tables_Concordancer(main) 32 | settings_tables_concordancer.load_settings() 33 | settings_tables_concordancer.load_settings(defaults = True) 34 | settings_tables_concordancer.apply_settings() 35 | 36 | def test_wl_settings_tables_parallel_concordancer(): 37 | settings_tables_parallel_concordancer = wl_settings_tables.Wl_Settings_Tables_Parallel_Concordancer(main) 38 | settings_tables_parallel_concordancer.load_settings() 39 | settings_tables_parallel_concordancer.load_settings(defaults = True) 40 | settings_tables_parallel_concordancer.apply_settings() 41 | 42 | def test_wl_settings_tables_dependency_parser(): 43 | settings_tables_dependency_parser = wl_settings_tables.Wl_Settings_Tables_Dependency_Parser(main) 44 | settings_tables_dependency_parser.load_settings() 45 | settings_tables_dependency_parser.load_settings(defaults = True) 46 | settings_tables_dependency_parser.apply_settings() 47 | 48 | if __name__ == '__main__': 49 | test_wl_settings_tables() 50 | test_wl_settings_tables_concordancer() 51 | test_wl_settings_tables_parallel_concordancer() 52 | test_wl_settings_tables_dependency_parser() 53 | -------------------------------------------------------------------------------- /tests/tests_settings/test_settings_word_tokenization.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Settings - Word Tokenization 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_settings import wl_settings_word_tokenization 21 | 22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast') 23 | 24 | def test_wl_settings_word_tokenization(): 25 | settings_word_tokenization = wl_settings_word_tokenization.Wl_Settings_Word_Tokenization(main) 26 | settings_word_tokenization.load_settings() 27 | settings_word_tokenization.load_settings(defaults = True) 28 | settings_word_tokenization.apply_settings() 29 | 30 | settings_word_tokenization.preview_changed() 31 | settings_word_tokenization.update_gui('test') 32 | settings_word_tokenization.update_gui_err() 33 | 34 | if __name__ == '__main__': 35 | test_wl_settings_word_tokenization() 36 | -------------------------------------------------------------------------------- /tests/tests_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_utils/__init__.py -------------------------------------------------------------------------------- /tests/tests_utils/test_paths.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Utilities - Paths 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import os 20 | import sys 21 | 22 | from wordless.wl_utils import wl_paths, wl_misc 23 | 24 | def test_get_normalized_path(): 25 | assert wl_paths.get_normalized_path('.') != '.' 26 | assert wl_paths.get_normalized_path('/') 27 | assert wl_paths.get_normalized_path('a') != 'a' 28 | assert wl_paths.get_normalized_path('a/b/c') != 'a/b/c' 29 | 30 | def test_get_normalized_dir(): 31 | assert wl_paths.get_normalized_dir('.') != '.' 32 | assert wl_paths.get_normalized_dir('/') 33 | assert wl_paths.get_normalized_dir('a') != 'a' 34 | assert wl_paths.get_normalized_dir('a/b/c') != 'a/b/c' 35 | 36 | def test_get_path_file(): 37 | assert wl_paths.get_path_file('') 38 | assert wl_paths.get_path_file('a', 'b', 'c').endswith(os.path.sep.join(['a', 'b', 'c'])) 39 | assert wl_paths.get_path_file('a', '..', 'b').endswith('b') 40 | 41 | sys._MEIPASS = 'test' 42 | 43 | assert wl_paths.get_path_file('a', internal = True).endswith(os.path.sep.join(['test', 'a'])) 44 | assert wl_paths.get_path_file('a', internal = False).endswith('a') 45 | 46 | check_os_orig = wl_misc.check_os 47 | wl_misc.check_os = lambda: (False, True, False) 48 | 49 | assert wl_paths.get_path_file('a', internal = False).endswith(os.path.sep.join(['MacOS', 'a'])) 50 | 51 | wl_misc.check_os = check_os_orig 52 | del sys._MEIPASS 53 | 54 | def test_get_path_data(): 55 | assert wl_paths.get_path_data('a').endswith(os.path.sep.join(['data', 'a'])) 56 | 57 | def test_get_path_img(): 58 | assert wl_paths.get_path_img('a').endswith(os.path.sep.join(['imgs', 'a'])) 59 | 60 | if __name__ == '__main__': 61 | test_get_normalized_path() 62 | test_get_normalized_dir() 63 | 64 | test_get_path_file() 65 | test_get_path_data() 66 | test_get_path_img() 67 | -------------------------------------------------------------------------------- /tests/tests_utils/test_threading.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Utilities - Threading 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_dialogs import wl_dialogs_misc 21 | from wordless.wl_utils import wl_threading 22 | 23 | main = wl_test_init.Wl_Test_Main() 24 | 25 | def test_wl_worker(): 26 | dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress(main, 'test') 27 | wl_threading.Wl_Worker(main, dialog_progress, lambda: None) 28 | 29 | def test_wl_worker_no_progress(): 30 | wl_threading.Wl_Worker_No_Progress(main, lambda: None) 31 | 32 | def test_wl_worker_no_callback(): 33 | dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress(main, 'test') 34 | wl_threading.Wl_Worker_No_Callback(main, dialog_progress) 35 | 36 | def test_wl_thread(): 37 | dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress(main, 'test') 38 | worker = wl_threading.Wl_Worker(main, dialog_progress, lambda: None) 39 | worker.run = lambda: None 40 | 41 | wl_threading.Wl_Thread(worker) 42 | 43 | def test_wl_thread_no_progress(): 44 | worker = wl_threading.Wl_Worker_No_Progress(main, lambda: None) 45 | worker.run = lambda: None 46 | 47 | wl_threading.Wl_Thread_No_Progress(worker) 48 | 49 | if __name__ == '__main__': 50 | test_wl_worker() 51 | test_wl_worker_no_progress() 52 | test_wl_worker_no_callback() 53 | 54 | test_wl_thread() 55 | test_wl_thread_no_progress() 56 | -------------------------------------------------------------------------------- /tests/tests_widgets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_widgets/__init__.py -------------------------------------------------------------------------------- /tests/tests_widgets/test_buttons.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Widgets - Buttons 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from PyQt5 import QtWidgets 20 | 21 | from tests import wl_test_init 22 | from wordless.wl_widgets import wl_buttons 23 | 24 | main = wl_test_init.Wl_Test_Main() 25 | 26 | def test_wl_button(): 27 | wl_buttons.Wl_Button('test', main) 28 | 29 | def test_wl_button_browse(): 30 | wl_buttons.Wl_Button_Browse(main, 'test', QtWidgets.QLineEdit(), 'test', ['test']) 31 | 32 | def test_wl_button_color(): 33 | button = wl_buttons.Wl_Button_Color(main) 34 | button.get_color() 35 | button.set_color('test') 36 | 37 | _, checkbox_transparent = wl_buttons.wl_button_color(main, allow_transparent = True) 38 | checkbox_transparent.setChecked(True) 39 | checkbox_transparent.setChecked(False) 40 | 41 | wl_buttons.wl_button_color(main, allow_transparent = False) 42 | 43 | def test_wl_button_restore_default_vals(): 44 | wl_buttons.Wl_Button_Restore_Default_Vals(main, 'test') 45 | 46 | if __name__ == '__main__': 47 | test_wl_button() 48 | test_wl_button_browse() 49 | test_wl_button_color() 50 | test_wl_button_restore_default_vals() 51 | -------------------------------------------------------------------------------- /tests/tests_widgets/test_item_delegates.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Widgets - Item delegates 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from PyQt5 import QtWidgets 20 | 21 | from tests import wl_test_init 22 | from wordless.wl_widgets import wl_item_delegates 23 | 24 | main = wl_test_init.Wl_Test_Main() 25 | 26 | def test_wl_item_delegate_uneditable(): 27 | item_delegates = wl_item_delegates.Wl_Item_Delegate_Uneditable() 28 | item_delegates.createEditor(main, '', '') 29 | 30 | def test_wl_item_delegate(): 31 | item_delegate = wl_item_delegates.Wl_Item_Delegate(main, QtWidgets.QComboBox) 32 | item_delegate.createEditor(main, 'test', 'test') 33 | item_delegate.set_enabled(True) 34 | 35 | item_delegate = wl_item_delegates.Wl_Item_Delegate(main) 36 | item_delegate.createEditor(main, 'test', 'test') 37 | 38 | def test_wl_item_delegate_combo_box(): 39 | index_editable = wl_test_init.wl_test_index(0, 0) 40 | index_uneditable = wl_test_init.wl_test_index(0, 1) 41 | 42 | item_delegate_combo_box = wl_item_delegates.Wl_Item_Delegate_Combo_Box(main, row = 0, col = 0) 43 | item_delegate_combo_box.createEditor(main, 'test', index_editable) 44 | assert item_delegate_combo_box.createEditor(main, 'test', index_uneditable) is None 45 | assert item_delegate_combo_box.is_editable(index_editable) 46 | assert not item_delegate_combo_box.is_editable(index_uneditable) 47 | 48 | def test_wl_item_delegate_combo_box_custom(): 49 | item_delegate_combo_box_custom = wl_item_delegates.Wl_Item_Delegate_Combo_Box_Custom(main, QtWidgets.QComboBox, row = 0, col = 0) 50 | item_delegate_combo_box_custom.createEditor(main, 'test', wl_test_init.wl_test_index(0, 0)) 51 | item_delegate_combo_box_custom.createEditor(main, 'test', wl_test_init.wl_test_index(0, 1)) 52 | 53 | if __name__ == '__main__': 54 | test_wl_item_delegate_uneditable() 55 | test_wl_item_delegate() 56 | test_wl_item_delegate_combo_box() 57 | test_wl_item_delegate_combo_box_custom() 58 | -------------------------------------------------------------------------------- /tests/tests_widgets/test_labels.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Widgets - Labels 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from tests import wl_test_init 20 | from wordless.wl_widgets import wl_labels 21 | 22 | main = wl_test_init.Wl_Test_Main() 23 | 24 | def test_wl_label(): 25 | wl_labels.Wl_Label('test', main) 26 | 27 | def test_wl_label_hint(): 28 | wl_labels.Wl_Label_Hint('test', main) 29 | 30 | def test_wl_label_html(): 31 | wl_labels.Wl_Label_Html('test', main) 32 | 33 | def test_wl_label_html_centered(): 34 | wl_labels.Wl_Label_Html_Centered('test', main) 35 | 36 | def test_wl_label_dialog(): 37 | label = wl_labels.Wl_Label_Dialog('test', main) 38 | label.set_text('test') 39 | 40 | def test_wl_label_dialog_no_wrap(): 41 | wl_labels.Wl_Label_Dialog_No_Wrap('test', main) 42 | 43 | if __name__ == '__main__': 44 | test_wl_label() 45 | test_wl_label_hint() 46 | test_wl_label_html() 47 | test_wl_label_html_centered() 48 | test_wl_label_dialog() 49 | test_wl_label_dialog_no_wrap() 50 | -------------------------------------------------------------------------------- /tests/tests_widgets/test_layouts.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Tests: Widgets - Layouts 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from PyQt5 import QtCore 20 | from PyQt5 import QtWidgets 21 | 22 | from tests import wl_test_init 23 | from wordless.wl_widgets import wl_layouts 24 | 25 | main = wl_test_init.Wl_Test_Main() 26 | 27 | def test_wl_layout(): 28 | wl_layouts.Wl_Layout() 29 | 30 | def test_wl_wrapper(): 31 | wrapper = wl_layouts.Wl_Wrapper(main) 32 | wrapper.load_settings() 33 | 34 | def test_wl_splitter(): 35 | wl_layouts.Wl_Splitter(QtCore.Qt.Vertical, main) 36 | 37 | def test_wl_scroll_area(): 38 | wl_layouts.Wl_Scroll_Area(main) 39 | 40 | def test_wl_stacked_widget_resizable(): 41 | stacked_widget = wl_layouts.Wl_Stacked_Widget_Resizable(main) 42 | stacked_widget.addWidget(QtWidgets.QLabel()) 43 | stacked_widget.current_changed(0) 44 | 45 | def test_wl_separator(): 46 | wl_layouts.Wl_Separator(main, orientation = 'hor') 47 | wl_layouts.Wl_Separator(main, orientation = 'vert') 48 | 49 | if __name__ == '__main__': 50 | test_wl_layout() 51 | test_wl_wrapper() 52 | test_wl_splitter() 53 | test_wl_scroll_area() 54 | test_wl_stacked_widget_resizable() 55 | test_wl_separator() 56 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/utils/__init__.py -------------------------------------------------------------------------------- /utils/data_luong_nguyen_dinh_freq_syls_easy_1000.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Utilities: Data - Extract the 1000 most common syllables from all easy documents of the corpus of Vietnamese text readability dataset on literature domain 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import collections 20 | import glob 21 | 22 | syls = [] 23 | freq_syls = [] 24 | 25 | # The corpus of Vietnamese text readability dataset on literature domain: https://github.com/anvinhluong/Vietnamese-text-readability/blob/master/Vietnamese%20Text%20Readability%20Corpus.zip 26 | for file in glob.glob('Vietnamese Text Readability Corpus/easy_*.txt'): 27 | print(f'Processing file {file}...') 28 | 29 | with open(file, 'r', encoding = 'utf_8') as f: 30 | syls.extend(f.read().split()) 31 | 32 | # Get the 1000 most frequent syllables (excluding punctuation marks) 33 | for syl, freq in sorted(collections.Counter(syls).items(), key = lambda item: item[1], reverse = True): 34 | if any((char for char in syl if char.isalnum())): 35 | freq_syls.append((syl, freq)) 36 | 37 | if len(freq_syls) >= 1000: 38 | break 39 | 40 | with open('data/luong_nguyen_dinh_freq_syls_easy_1000.txt', 'w', encoding = 'utf_8') as f: 41 | for syl, _ in freq_syls: 42 | f.write(syl + '\n') 43 | -------------------------------------------------------------------------------- /utils/linux_compile_py_from_src.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ---------------------------------------------------------------------- 3 | # Utilities: Linux - Compile Python from source 4 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 5 | # 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | # ---------------------------------------------------------------------- 19 | 20 | # Install build dependencies for Python 21 | # Reference: https://devguide.python.org/getting-started/setup-building/#linux 22 | if ! grep -Fxq "deb-src http://cn.archive.ubuntu.com/ubuntu/ bionic main" "/etc/apt/sources.list"; then 23 | sudo sh -c "echo 'deb-src http://cn.archive.ubuntu.com/ubuntu/ bionic main' >> /etc/apt/sources.list" 24 | fi 25 | 26 | sudo apt-get update 27 | sudo apt-get -y build-dep python3 28 | sudo apt-get -y install build-essential gdb lcov pkg-config libbz2-dev libffi-dev libgdbm-dev libgdbm-compat-dev liblzma-dev libncurses5-dev libreadline6-dev libsqlite3-dev libssl-dev lzma lzma-dev tk-dev uuid-dev zlib1g-dev 29 | 30 | # Compile Python from source 31 | PY_VER=3.11.9 32 | PY_PACKAGE="Python-$PY_VER.tgz" 33 | PY_FOLDER="Python-$PY_VER" 34 | 35 | if [ ! -d $PY_FOLDER ]; then 36 | if [ ! -f $PY_PACKAGE ]; then 37 | wget "https://www.python.org/ftp/python/$PY_VER/$PY_PACKAGE" 38 | fi 39 | 40 | tar -xf $PY_PACKAGE 41 | fi 42 | 43 | cd $PY_FOLDER 44 | # PyInstaller requires "--enable-shared" 45 | ./configure --enable-optimizations --with-lto --enable-shared 46 | make -s -j 47 | sudo make altinstall 48 | cd .. 49 | 50 | # Fix error while loading shared libraries 51 | sudo ldconfig 52 | 53 | # Install 3rd-party libraries 54 | python3.11 -m pip install --upgrade pip setuptools 55 | pip3.11 install -r requirements_dev.txt 56 | pip3.11 cache purge 57 | 58 | # Fix libxcb-xinerama.so 59 | sudo apt-get install libxcb-xinerama0 60 | 61 | # Clean files 62 | sudo rm $PY_PACKAGE 63 | sudo rm -r $PY_FOLDER 64 | 65 | sudo apt-get -y autoremove 66 | sudo apt-get clean 67 | -------------------------------------------------------------------------------- /utils/linux_create_shortcut.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Utilities: Linux - Create shortcut 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import os 20 | import subprocess 21 | 22 | from wordless.wl_utils import wl_misc 23 | 24 | wl_ver = wl_misc.get_wl_ver() 25 | 26 | path_wl = os.path.split(globals()['__file__'])[0] 27 | path_exec = os.path.join(os.path.split(path_wl)[0], 'Wordless') 28 | path_icon = os.path.join(path_wl, 'imgs', 'wl_icon.ico') 29 | path_desktop = os.path.expanduser('~/.local/share/applications/Wordless.desktop') 30 | 31 | os.makedirs(os.path.expanduser('~/.local/share/applications'), exist_ok = True) 32 | 33 | with open(path_desktop, 'w', encoding = 'utf_8') as f: 34 | f.write(f''' 35 | [Desktop Entry] 36 | Type=Application 37 | Name=Wordless 38 | Version={wl_ver} 39 | Encoding=UTF-8 40 | Path={path_wl} 41 | Exec={path_exec} 42 | Icon={path_icon} 43 | Terminal=false 44 | ''') 45 | 46 | # Allow excuting file as program 47 | subprocess.run(('chmod', '+x', path_desktop), check = True) 48 | -------------------------------------------------------------------------------- /utils/wl_download_ci.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Utilities: Download - CI 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import nltk 20 | import spacy 21 | import stanza 22 | 23 | # Download NLTK data 24 | nltk.download('averaged_perceptron_tagger_eng') 25 | nltk.download('averaged_perceptron_tagger_rus') 26 | nltk.download('perluniprops') 27 | nltk.download('punkt_tab') 28 | nltk.download('stopwords') 29 | nltk.download('wordnet') 30 | nltk.download('words') 31 | 32 | # Download spaCy's and Stanza's models 33 | spacy.cli.download('en_core_web_trf') 34 | stanza.download('en', processors = ['tokenize', 'pos', 'lemma', 'depparse', 'sentiment']) 35 | -------------------------------------------------------------------------------- /utils/wl_trs_generate_ts_files.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Utilities: Translations - Generate TS files 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import pathlib 20 | import re 21 | import subprocess 22 | 23 | files = [] 24 | 25 | for file in pathlib.Path('wordless').rglob('*.py'): 26 | files.append(str(file)) 27 | 28 | # Use "_tr" as a shortcut of QCoreApplication.translate 29 | subprocess.run(('pylupdate5' ,'-verbose' ,'-translate-function', '_tr', *files, '-ts', 'trs/zho_cn.ts'), check = True) 30 | 31 | # Fix HTML entities 32 | with open(r'trs/zho_cn.ts', 'r', encoding = 'utf_8') as f: 33 | contents = f.read() 34 | 35 | # Replace "&xxxx;" with "&xxxx;" 36 | contents = re.sub(r'&([a-z]{2,5});', r'&\1;', contents) 37 | # Escape non-breaking spaces 38 | contents = contents.replace(r' ', r'&nbsp') 39 | 40 | with open(r'trs/zho_cn.ts', 'w', encoding = 'utf_8') as f: 41 | f.write(contents) 42 | -------------------------------------------------------------------------------- /utils/wl_trs_utils.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Utilities: Translations - Utilities 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import glob 20 | import subprocess 21 | 22 | import bs4 23 | 24 | # Fix format of ts files 25 | def fix_ts_format(ts_file): 26 | with open(ts_file, 'r', encoding = 'utf_8') as f: 27 | contents = f.read() 28 | 29 | with open(ts_file, 'w', encoding = 'utf_8') as f: 30 | contents = contents.replace('\n', '\n') 32 | 33 | f.write(contents) 34 | 35 | def del_obsolete_trans(ts_file): 36 | with open(ts_file, 'r', encoding = 'utf_8') as f: 37 | soup = bs4.BeautifulSoup(f.read(), features = 'lxml') 38 | 39 | for element_context in soup.select('context'): 40 | for element_message in element_context.select('message'): 41 | element_tr = element_message.select_one('translation') 42 | 43 | # Remove obsolete translations 44 | if 'type' in element_tr.attrs and element_tr['type'] == 'obsolete': 45 | element_message.decompose() 46 | 47 | # Remove empty contexts 48 | for element_context in soup.select('context'): 49 | if not element_context.select('message'): 50 | element_context.decompose() 51 | 52 | with open(ts_file, 'w', encoding = 'utf_8') as f: 53 | f.write(str(soup)) 54 | 55 | fix_ts_format(ts_file) 56 | 57 | def release_trs(): 58 | for ts_file in glob.glob('trs/*.ts'): 59 | subprocess.run(('lrelease', ts_file), check = True) 60 | 61 | if __name__ == '__main__': 62 | release_trs() 63 | -------------------------------------------------------------------------------- /utils/wl_trs_zho_tw.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Utilities: Translations - Chinese (Traditional) 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import bs4 20 | import opencc 21 | 22 | from utils import wl_trs_utils 23 | 24 | with open('trs/zho_cn.ts', 'r', encoding = 'utf_8') as f: 25 | trs_zho_cn = f.read() 26 | soup = bs4.BeautifulSoup(trs_zho_cn, features = 'lxml') 27 | 28 | # Convert Unix line endings to Windows ones 29 | with open('trs/zho_cn.ts', 'w', encoding = 'utf_8') as f: 30 | f.write(trs_zho_cn) 31 | 32 | cc = opencc.OpenCC('s2twp') 33 | 34 | # Change language 35 | soup.ts['language'] = 'zh_TW' 36 | # Translate Simplified Chinese into Traditional Chinese 37 | for element_context in soup.select('context'): 38 | for element_message in element_context.select('message'): 39 | element_src = element_message.select_one('source') 40 | element_trans = element_message.select_one('translation') 41 | 42 | # Language-specific files 43 | if element_src.text == 'doc/trs/zho_cn/ACKS.md': 44 | element_trans.string = 'doc/trs/zho_tw/ACKS.md' 45 | else: 46 | element_trans.string = cc.convert(element_trans.text) 47 | 48 | with open('trs/zho_tw.ts', 'w', encoding = 'utf_8') as f: 49 | f.write(str(soup)) 50 | 51 | # Release 52 | wl_trs_utils.fix_ts_format('trs/zho_tw.ts') 53 | wl_trs_utils.release_trs() 54 | -------------------------------------------------------------------------------- /wordless/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/__init__.py -------------------------------------------------------------------------------- /wordless/wl_checks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_checks/__init__.py -------------------------------------------------------------------------------- /wordless/wl_checks/wl_checks_misc.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Checks - Miscellaneous 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import os 20 | import pathlib 21 | 22 | def check_custom_settings(settings_custom, settings_default): 23 | def get_keys(settings, keys): 24 | for key, value in settings.items(): 25 | keys.append(key) 26 | 27 | if isinstance(value, dict): 28 | get_keys(value, keys) 29 | 30 | return keys 31 | 32 | keys_custom = [] 33 | keys_default = [] 34 | 35 | keys_custom = get_keys(settings_custom, keys_custom) 36 | keys_default = get_keys(settings_default, keys_default) 37 | 38 | return bool(keys_custom == keys_default) 39 | 40 | def check_dir(dir_name): 41 | if not os.path.exists(dir_name): 42 | pathlib.Path(dir_name).mkdir(parents = True, exist_ok = True) 43 | 44 | return dir_name 45 | 46 | def check_new_name(new_name, names, separator = None): 47 | i = 2 48 | names = set(names) 49 | 50 | if new_name in names: 51 | while True: 52 | if separator is None: 53 | new_name_valid = f'{new_name} ({i})' 54 | else: 55 | new_name_valid = f'{new_name}{separator}{i}' 56 | 57 | if new_name_valid in names: 58 | i += 1 59 | else: 60 | break 61 | else: 62 | new_name_valid = new_name 63 | 64 | return new_name_valid 65 | 66 | def check_new_path(new_path): 67 | i = 2 68 | 69 | if os.path.exists(new_path) and os.path.isfile(new_path): 70 | while True: 71 | path_head, ext = os.path.splitext(new_path) 72 | new_path_valid = f'{path_head} ({i}){ext}' 73 | 74 | if os.path.exists(new_path_valid) and os.path.isfile(new_path_valid): 75 | i += 1 76 | else: 77 | break 78 | else: 79 | new_path_valid = new_path 80 | 81 | # Placeholder for the new path 82 | with open(new_path_valid, 'wb') as _: 83 | pass 84 | 85 | return new_path_valid 86 | -------------------------------------------------------------------------------- /wordless/wl_dialogs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_dialogs/__init__.py -------------------------------------------------------------------------------- /wordless/wl_figs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_figs/__init__.py -------------------------------------------------------------------------------- /wordless/wl_measures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_measures/__init__.py -------------------------------------------------------------------------------- /wordless/wl_measures/wl_measures_misc.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Measures - Miscellaneous 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import numpy 20 | 21 | def modes(inputs): 22 | inputs_modes = [] 23 | 24 | inputs = numpy.array(inputs) 25 | 26 | if inputs.size > 0: 27 | unique, unique_counts = numpy.unique(inputs, return_counts = True) 28 | unique_counts_max = numpy.max(unique_counts) 29 | 30 | for val, freq in zip(unique, unique_counts): 31 | if freq == unique_counts_max: 32 | inputs_modes.append(val) 33 | 34 | return inputs_modes 35 | -------------------------------------------------------------------------------- /wordless/wl_nlp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_nlp/__init__.py -------------------------------------------------------------------------------- /wordless/wl_results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_results/__init__.py -------------------------------------------------------------------------------- /wordless/wl_settings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_settings/__init__.py -------------------------------------------------------------------------------- /wordless/wl_tagsets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_tagsets/__init__.py -------------------------------------------------------------------------------- /wordless/wl_tagsets/wl_tagset_fra_universal.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Tagsets - Universal POS tags - French 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # Universal POS Tags: https://universaldependencies.org/fr/pos/ 20 | tagset_mapping = [ 21 | ['ADJ', 'ADJ', 'Adjective', 'grand/grande/grands/grandes, vieux/vieille/vieilles'], 22 | ['ADP', 'ADP', 'Adposition', 'pour, de, à, dans'], 23 | ['ADV', 'ADV', 'Adverb', 'très (joli), (fondues) ensemble'], 24 | ['AUX', 'AUX', 'Auxiliary', 'être, avoir, faire'], 25 | ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], 26 | ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'mais, ou, et, or, ni, car'], 27 | ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'quand\nMultiword subordinating conjunction: (parce) que, (afin) que, (avant) que)'], 28 | ['DET', 'DET', 'Determiner', 'Articles (a closed class indicating definiteness, specificity or givenness): le, la, les\nPossessive determiners: mon, ton, son, ma, ta, sa, mes, tes, ses, notre, votre, leur, nos, vos, leurs\nDemonstrative determiners: (J’ai vu) ce (vélo hier.), cet, cette\nInterrogative determiners: quel, Quelle (couleur aimez-vous?)\nRelative determiners: quel, (Je me demande) quelle (couleur vous aimez.)\nQuantity/quantifier determiners: aucun'], 29 | ['INTJ', 'INTJ', 'Interjection', 'bref, bon, enfin'], 30 | ['NOUN', 'NOUN', 'Noun', 'fille, chat, arbre, air, beauté'], 31 | ['PROPN', 'PROPN', 'Proper noun', 'Pierre, ONU, Mexique'], 32 | ['NUM', 'NUM', 'Numeral', 'quatre, 4, IV'], 33 | ['PART', 'PART', 'Particle', 'Negation particle: ne'], 34 | ['PRON', 'PRON', 'Pronoun', 'Personal pronouns: je, tu, il\nDemonstrative pronouns: ceux\nReflexive pronouns: me, se\nInterrogative/relative pronouns: qui, que'], 35 | ['VERB', 'VERB', 'Verb', '(je) vois, (à) lire, (en) marchant'], 36 | 37 | ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], 38 | ['SYM', 'SYM', 'Symbol', '$, %, §, ©\n+, −, ×, ÷, =, <, >\n:), ♥‿♥, 😝\njohn.doe@universal.org, http://universaldependencies.org/, 1-800-COMPANY'], 39 | ['X', 'X', 'Other', 'etc'] 40 | ] 41 | -------------------------------------------------------------------------------- /wordless/wl_tagsets/wl_tagset_khm_alt.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Tagsets - Asian Language Treebank 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # Reference: https://www2.nict.go.jp/astrec-att/member/mutiyama/ALT/Khmer-annotation-guideline.pdf 20 | tagset_mapping = [ 21 | ['n', 'NOUN', 'General nouns, can be subjects or objects of tokens tagged by v', ''], 22 | ['v', 'VERB', 'General verbs, can take tokens tagged by n as arguments', ''], 23 | ['a', 'ADJ', 'General adjectives, can directly describe or modify tokens tagged by n', ''], 24 | ['o', 'PART', 'Other modifications or complements for tokens or larger syntactic parts', ''], 25 | 26 | ['1', 'NUM', 'General numbers', ''], 27 | ['.', 'PUNCT', 'General punctuation marks', ''], 28 | ['+', 'X', 'A catch-all category, for tokens with weak syntactic roles', ''] 29 | ] 30 | -------------------------------------------------------------------------------- /wordless/wl_tagsets/wl_tagset_kor_mecab.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Tagsets - Mecab 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # References: 20 | # MeCab: https://docs.google.com/spreadsheets/u/0/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY/edit?usp=sharing 21 | # spaCy: https://github.com/explosion/spaCy/blob/2ce9a220dbd30d3a79c2a232230204a102fb3f1d/spacy/lang/ko/tag_map.py 22 | tagset_mapping = [ 23 | ['NNG', 'NOUN', '일반 명사', ''], 24 | ['NNP', 'PROPN', '고유 명사', ''], 25 | ['NNB', 'NOUN', '의존 명사', ''], 26 | ['NNBC', 'NOUN', '단위를 나타내는 명사', ''], 27 | ['NR', 'NUM', '수사', ''], 28 | ['NP', 'PRON', '대명사', ''], 29 | 30 | ['VV', 'VERB', '동사', ''], 31 | ['VA', 'ADJ', '형용사', ''], 32 | ['VX', 'AUX', '보조 용언', ''], 33 | ['VCP', 'ADP', '긍정 지정사', ''], 34 | ['VCN', 'ADJ', '부정 지정사', ''], 35 | 36 | ['MM', 'DET', '관형사', ''], 37 | ['MAG', 'ADV', '일반 부사', ''], 38 | ['MAJ', 'CONJ', '접속 부사', ''], 39 | 40 | ['IC', 'INTJ', '감탄사', ''], 41 | 42 | ['JKS', 'ADP', '주격 조사', ''], 43 | ['JKC', 'ADP', '보격 조사', ''], 44 | ['JKG', 'ADP', '관형격 조사', ''], 45 | ['JKO', 'ADP', '목적격 조사', ''], 46 | ['JKB', 'ADP', '부사격 조사', ''], 47 | ['JKV', 'ADP', '호격 조사', ''], 48 | ['JKQ', 'ADP', '인용격 조사', ''], 49 | ['JX', 'ADP', '보조사', ''], 50 | ['JC', 'CONJ', '접속 조사', ''], 51 | 52 | ['EP', 'X', '선어말 어미', ''], 53 | ['EF', 'X', '종결 어미', ''], 54 | ['EC', 'X', '연결 어미', ''], 55 | ['ETN', 'X', '명사형 전성 어미', ''], 56 | ['ETM', 'X', '관형형 전성 어미', ''], 57 | 58 | ['XPN', 'PART', '체언 접두사', ''], 59 | 60 | ['XSN', 'X', '명사 파생 접미사 ', ''], 61 | ['XSV', 'X', '동사 파생 접미사', ''], 62 | ['XSA', 'X', '형용사 파생 접미사', ''], 63 | 64 | ['XR', 'X', '어근', ''], 65 | 66 | ['SF', 'PUNCT', '마침표, 물음표, 느낌표', ''], 67 | ['SE', 'PUNCT', '줄임표', '…'], 68 | ['SSO', 'PUNCT', '여는 괄호', '( ['], 69 | ['SSC', 'PUNCT', '닫는 괄호', ') ]'], 70 | ['SC', 'PUNCT', '구분자', ', · / :'], 71 | ['SY', 'SYM', '', ''], 72 | 73 | ['SL', 'X', '외국어', ''], 74 | ['SH', 'X', '한자', ''], 75 | ['SN', 'NUM', '숫자', ''] 76 | ] 77 | -------------------------------------------------------------------------------- /wordless/wl_tagsets/wl_tagset_lao_seqlabeling.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Tagsets - SeqLabeling 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # Reference: https://github.com/FoVNull/SeqLabeling/blob/main/reference/Lao_POS.tsv 20 | tagset_mapping = [ 21 | ['N', 'NOUN', '名词', ''], 22 | ['TTL', 'NOUN', '称号名词', ''], 23 | ['PRN', 'PROPN', '专有名词', ''], 24 | 25 | ['NTR', 'PRON', '疑问代词', ''], 26 | ['DMN', 'PRON', '指示代词', ''], 27 | ['PRS', 'PRON', '人称代词', ''], 28 | ['REL', 'PRON', '关系代词', ''], 29 | 30 | ['V', 'VERB', '动词', ''], 31 | 32 | ['PRA', 'AUX', '前置助动词', ''], 33 | ['PVA', 'AUX', '后置助动词', ''], 34 | 35 | ['ADJ', 'ADJ', '形容词', ''], 36 | ['ADV', 'ADV', '副词', ''], 37 | 38 | ['DBQ', 'DET', '数词前限定词', ''], 39 | ['DAQ', 'DET', '数词后限定词', ''], 40 | ['IBQ', 'DET', '数词前不定限定词', ''], 41 | ['IAQ', 'DET', '数词后不定限定词', ''], 42 | ['DAN', 'DET', '名词后限定词', ''], 43 | ['IAC', 'DET', '名词后不定限定词', ''], 44 | 45 | ['CNM', 'NUM', '基数词', ''], 46 | ['ONM', 'ADJ', '序数词', ''], 47 | 48 | ['COJ', 'CONJ', '连词', ''], 49 | ['PRE', 'ADP', '介词', ''], 50 | 51 | ['CLF', 'PART', '量词', ''], 52 | ['FIX', 'PART', '前置词', ''], 53 | ['NEG', 'PART', '否定词', ''], 54 | 55 | ['INT', 'INTJ', '语气词', ''], 56 | ['PUNCT', 'PUNCT', '标点符号', ''] 57 | ] 58 | -------------------------------------------------------------------------------- /wordless/wl_tagsets/wl_tagset_lao_yunshan_cup_2020.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Tagsets - Yunshan Cup 2020 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # Reference: https://github.com/FoVNull/SeqLabeling/blob/main/reference/Lao_POS.tsv 20 | tagset_mapping = [ 21 | ['N', 'NOUN', '名词', ''], 22 | ['TTL', 'NOUN', '称号名词', ''], 23 | ['PRN', 'PROPN', '专有名词', ''], 24 | 25 | ['NTR', 'PRON', '疑问代词', ''], 26 | ['DMN', 'PRON', '指示代词', ''], 27 | ['PRS', 'PRON', '人称代词', ''], 28 | ['REL', 'PRON', '关系代词', ''], 29 | 30 | ['V', 'VERB', '动词', ''], 31 | 32 | ['PRA', 'AUX', '前置助动词', ''], 33 | ['PVA', 'AUX', '后置助动词', ''], 34 | 35 | ['ADJ', 'ADJ', '形容词', ''], 36 | ['ADV', 'ADV', '副词', ''], 37 | 38 | ['DBQ', 'DET', '数词前限定词', ''], 39 | ['DAQ', 'DET', '数词后限定词', ''], 40 | ['IBQ', 'DET', '数词前不定限定词', ''], 41 | ['IAQ', 'DET', '数词后不定限定词', ''], 42 | ['DAN', 'DET', '名词后限定词', ''], 43 | ['IAC', 'DET', '名词后不定限定词', ''], 44 | 45 | ['CNM', 'NUM', '基数词', ''], 46 | ['ONM', 'ADJ', '序数词', ''], 47 | 48 | ['COJ', 'CONJ', '连词', ''], 49 | ['PRE', 'ADP', '介词', ''], 50 | 51 | ['CLF', 'PART', '量词', ''], 52 | ['FIX', 'PART', '前置词', ''], 53 | ['NEG', 'PART', '否定词', ''], 54 | 55 | ['INT', 'INTJ', '语气词', ''], 56 | ['PUNCT', 'PUNCT', '标点符号', ''] 57 | ] 58 | -------------------------------------------------------------------------------- /wordless/wl_tagsets/wl_tagset_nor_universal.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Tagsets - Universal POS tags - Norwegian 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # Universal POS Tags: https://universaldependencies.org/no/pos/ 20 | tagset_mapping = [ 21 | ['ADJ', 'ADJ', 'Adjective', 'stor, gammel, grønn'], 22 | ['ADP', 'ADP', 'Adposition', 'i, på, utenfor'], 23 | ['ADV', 'ADV', 'Adverb', '(Han kom) nettopp, Derfor (kom han), nesten (ferdig)'], 24 | ['AUX', 'AUX', 'Auxiliary', 'Temporal: har (spist), er (kommet)\nPassive: blir (spist)\nModal: kan/skal/vil/må/bør (spise)\nCopula: er (god)'], 25 | ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], 26 | ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'og, eller, men'], 27 | ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'Complementizers: at, om\nAdverbial clause introducers: når, siden, fordi'], 28 | ['DET', 'DET', 'Determiner', 'Possessive: mitt (barn), våre (barn), (barnet) vårt\nDemonstrative: dette (barnet), det (barnet), den (bilen), (det) samme (barnet) , (det) andre (barnet), hvilken (bil), hvilket (hus)\nQuantifying: en (bil), et (barn), ei (jente), noen (biler), alle (biler), begge (bilene)'], 29 | ['INTJ', 'INTJ', 'Interjection', 'ja, nei, hei, hallo, heisan, å, ok, piip'], 30 | ['NOUN', 'NOUN', 'Noun', 'jente, katt, tre, luft, skjønnhet'], 31 | ['PROPN', 'PROPN', 'Proper noun', 'Kari, Ola\nOslo, Bergen'], 32 | ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 4, 5, 2014, 1000000, 3.14159265359\ntre, femtito, fire-fem, tusen'], 33 | ['PART', 'PART', 'Particle', '(Han liker) ikke å (spise is)'], 34 | ['PRON', 'PRON', 'Pronoun', 'Personal: han, hun, det, ham, henne\nDemonstrative: dette\nReflexive: seg\nReciprocal: hverandre\nInterrogative: hvem, hva, hvilken\nTotality: alle\nIndefinite: noen\nRelative: som'], 35 | ['VERB', 'VERB', 'Verb', 'løpe, løper, løp, (har) løpt\nspise, spiser, spiste, (har) spist'], 36 | 37 | ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], 38 | ['SYM', 'SYM', 'Symbol', '/, * *, *'], 39 | ['X', 'X', 'Other', '[English] (And then he just) xfgh pdl jklw'] 40 | ] 41 | -------------------------------------------------------------------------------- /wordless/wl_tagsets/wl_tagset_rus_open_corpora.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Tagsets - OpenCorpora 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # Reference: https://pymorphy2.readthedocs.io/en/latest/user/grammemes.html 20 | tagset_mapping = [ 21 | ['NOUN', 'NOUN', 'Noun', 'хомяк'], 22 | ['ADJF', 'ADJ', 'Adjective (full)', 'хороший'], 23 | ['ADJS', 'ADJ', 'Adjective (short)', 'хорош'], 24 | ['COMP', 'ADJ', 'Comparative', 'лучше, получше, выше'], 25 | ['VERB', 'VERB', 'Verb (personal form)', 'говорю, говорит, говорил'], 26 | ['INFN', 'VERB', 'Verb (infinitive)', 'говорить, сказать'], 27 | ['PRTF', 'VERB', 'Participle (full)', 'прочитавший, прочитанная'], 28 | ['PRTS', 'VERB', 'Participle (short)', 'прочитана'], 29 | ['GRND', 'VERB', 'Verbal adverb', 'прочитав, рассказывая'], 30 | ['NUMR', 'NUM', 'Numeral', 'три, пятьдесят'], 31 | ['ADVB', 'ADV', 'Adverb', 'круто'], 32 | ['NPRO', 'PRON', 'Pronoun-noun', 'он'], 33 | ['PRED', 'PART', 'Predicative', 'некогда'], 34 | ['PREP', 'ADP', 'Preposition', 'в'], 35 | ['CONJ', 'CONJ', 'Conjunction', 'и'], 36 | ['PRCL', 'PART', 'Particle', 'бы, же, лишь'], 37 | ['INTJ', 'INTJ', 'Interjection', 'ой'], 38 | 39 | ['LATN', 'X', 'Токен состоит из латинских букв', 'foo-bar, Maßstab'], 40 | ['NUMB', 'NUM', 'Число', '204, 3.14'], 41 | ['ROMN', 'X', 'Римское число', 'XI'], 42 | 43 | ['PNCT', 'PUNCT', 'Пунктуация', ', ! ? …'], 44 | ['UNKN', 'SYM/X', 'Токен не удалось разобрать', ''] 45 | ] 46 | -------------------------------------------------------------------------------- /wordless/wl_tagsets/wl_tagset_tha_blackboard.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Tagsets - Blackboard 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # References: 20 | # https://github.com/PyThaiNLP/pythainlp/blob/dev/docs/api/tag.rst#pythainlptag 21 | # https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/tag/blackboard.py 22 | # https://bitbucket.org/kaamanita/blackboard-treebank/src/master/Blackboard-Treebank.pdf 23 | tagset_mapping = [ 24 | ['AJ', 'ADJ', 'Adjective: Attribute, modifier, or description of a noun', 'ใหม่, พิเศษ , ก่อน, มาก, สูง'], 25 | ['AV', 'ADV', 'Adverb: Word that modifies or qualifies an adjective, verb, or another adverb', 'ก่อน, ก็, เล็กน้อย, เลย, สุด'], 26 | ['AX', 'AUX', 'Auxiliary: Tense, aspect, mood, and voice', 'เป็น, ใช่, คือ, คล้าย'], 27 | ['CC', 'CCONJ', 'Connector: Conjunction and relative pronoun', 'แต่, และ, หรือ'], 28 | ['CL', 'NOUN', 'Classifier: Class or measurement unit to which a noun or an action belongs', 'กำมือ, พวก, สนาม, กีฬา, บัญชี'], 29 | ['FX', 'NOUN', 'Prefix: Inflectional (nominalizer, adjectivizer, adverbializer, and courteous verbalizer), and derivational', 'กำมือ, พวก, สนาม, กีฬา, บัญชี'], 30 | ['IJ', 'INTJ', 'Interjection: Exclamation word', 'อุ้ย, โอ้ย'], 31 | ['NG', 'PART', 'Negator: Word of negatio', ''], 32 | ['NN', 'NOUN', 'Noun: Person, place, thing, abstract concept, and proper name', 'กำมือ, พวก, สนาม, กีฬา, บัญชี'], 33 | ['NU', 'NUM', 'Number: Quantity for counting and calculation', '5,000, 103.7, 2004, หนึ่ง, ร้อย'], 34 | ['PA', 'PART', 'Particle: Politeness, intention, belief, question', 'มา ขึ้น ไม่ ได้ เข้า'], 35 | ['PR', 'PRON', 'Pronoun: Word used to refer to an element in the discourse', 'เรา, เขา, ตัวเอง, ใคร, เธอ'], 36 | ['PS', 'ADP', 'Preposition: Location, comparison, instrument, exemplification', 'แม้, ว่า, เมื่อ, ของ, สำหรับ'], 37 | ['PU', 'PUNCT', 'Punctuation: Punctuation mark', '''(, ), ", ', :'''], 38 | ['VV', 'VERB', 'Verb: Action, state, occurrence, and word that forms the predicate part', 'เปิด, ให้, ใช้, เผชิญ, อ่าน'], 39 | ['XX', 'X', 'Others: Unknown category', 'xfgh, pdl, jklw'] 40 | ] 41 | -------------------------------------------------------------------------------- /wordless/wl_tagsets/wl_tagset_vie_underthesea.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Tagsets - Underthesea 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # Reference: https://github.com/undertheseanlp/underthesea/wiki/M%C3%B4-t%E1%BA%A3-d%E1%BB%AF-li%E1%BB%87u-b%C3%A0i-to%C3%A1n-POS-Tag 20 | tagset_mapping = [ 21 | ['A', 'ADJ', 'Tính từ', 'nhiều, hơn, khác, gần, lớn'], 22 | ['Ab', 'ADJ', 'Tính từ mượn', 'sexy, Peace, đờmi'], 23 | ['B', 'X', 'Từ mượn', 'karaoke, nilông, fax, oxy'], 24 | ['C', 'CCONJ', 'Liên từ', 'thì, nhưng, như, mà'], 25 | ['Cc', 'SCONJ', 'Liên từ đẳng lập', 'và, hay, hoặc, cùng'], 26 | ['E', 'ADP', 'Giới từ', 'của, trong, với, ở, cho'], 27 | ['Fw', 'X', 'Từ nước ngoài', 'Eleocharis, karaoke, Internationa'], 28 | ['FW', 'X', 'Từ nước ngoài', 'photo, knock-out, chat'], 29 | ['I', 'INTJ', 'Thán từ', 'ơi, ạ, Ôi, à, Vâng'], 30 | ['L', 'DET', 'Định từ', 'những, các, mấy, mọi, một số'], 31 | ['M', 'NUM', 'Số từ', 'một, hai, ba, Một, triệu, 1'], 32 | ['N', 'NOUN', 'Danh từ', 'người, khi, nhà, năm, ngày'], 33 | ['Nb', 'NOUN', 'Danh từ mượn', 'tivi, két, casino, golf, bar'], 34 | ['Nc', 'NOUN', 'Danh từ chỉ loại', 'con, cái, chiếc, ngôi'], 35 | ['Ne', 'NOUN', '', 'bọn, bộ, đoàn, tụi'], 36 | ['Ni', 'NOUN', 'Danh từ kí hiệu', 'A5, 1A, A4, B, A, 2032TS'], 37 | ['Np', 'PROPN', 'Danh từ riêng', ''], 38 | ['NNP', 'NOUN', '', 'VN, Nguyễn, Văn'], 39 | ['Ns', 'NOUN', '', 'ông, anh, người, chị'], 40 | ['Nu', 'NOUN', 'Danh từ đơn vị', 'đồng, m, tuổi, ha'], 41 | ['Ny', 'NOUN', 'Danh từ viết tắt', 'VN, TP, UBND, SV, ĐL'], 42 | ['P', 'PRON', 'Đại từ', 'này, tôi, đó, mình, đây'], 43 | ['R', 'X', 'Phó từ (Trạng từ)', 'không, đã, cũng, lại'], 44 | ['S', 'X', '', 'phó, trưởng, nguyên, Phó'], 45 | ['T', 'PART', 'Trợ từ', 'cả, ngay, chính, đến'], 46 | ['V', 'VERB', 'Động từ', 'có, là, được, đi, làm'], 47 | ['X', 'X', 'Không phân loại', 'như vậy, làm sao, nhất là'], 48 | ['Y', 'NOUN', '', ''], 49 | ['Z', 'X', 'Yếu tố cấu tạo từ', 'phó, viên, bất, siêu, tái, tổng'], 50 | 51 | ['CH', 'PUNCT', 'Dấu câu', ', . " ... “ ”'], 52 | ] 53 | -------------------------------------------------------------------------------- /wordless/wl_tagsets/wl_tagset_xct_botok.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Tagsets - Botok (Classical) 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | # Reference: https://github.com/Esukhia/botok/blob/master/botok/vars.py 20 | tagset_mapping = [ 21 | ['ADJ', 'ADJ', 'Adjectives', ''], 22 | ['ADP', 'ADP', 'Adposition', ''], 23 | ['ADV', 'ADV', 'Adverb', ''], 24 | ['AUX', 'AUX', 'Auxiliary', ''], 25 | ['CONJ', 'CONJ', 'Conjunction', ''], 26 | ['CCONJ', 'CCONJ', 'Coordinating conjunction', ''], 27 | ['SCONJ', 'SCONJ', 'Subordinating conjunction', ''], 28 | ['DET', 'DET', 'Determiner', ''], 29 | ['INTJ', 'INTJ', 'Interjection', ''], 30 | ['NOUN', 'NOUN', 'Noun', ''], 31 | ['PROPN', 'PROPN', 'Proper noun', ''], 32 | ['PART', 'PART', 'Particle', ''], 33 | ['PRON', 'PRON', 'Pronoun', ''], 34 | ['VERB', 'VERB', 'Verb', ''], 35 | ['NO_POS', 'X', 'No part-of-speech', ''], 36 | ['NON_WORD', 'X', 'Non-word', ''], 37 | 38 | # Languages 39 | ['BO', 'X', 'Tibetan language', ''], 40 | ['LATIN', 'X', 'Latin languages', ''], 41 | ['CJK', 'X', 'CJK languages', ''], 42 | ['OTHER', 'X', 'Other languages', ''], 43 | 44 | # Tibetan Textual Content 45 | ['TEXT', 'X', 'Tibetan textual content', ''], 46 | 47 | # Tibetan Non-textual Content 48 | ['NUM', 'NUM', 'Numeral', ''], 49 | ['NON_NUM', 'X', 'Non-numeral', ''], 50 | ['PUNCT', 'PUNCT', 'Punctuation', ''], 51 | ['NON_PUNCT', 'X', 'Non-punctuation', ''], 52 | ['SYM', 'SYM', 'Symbol', ''], 53 | ['NON_SYM', 'X', 'Non-symbol', ''], 54 | ['SPACE', 'X', 'Space', ''], 55 | ['NON_SPACE', 'X', 'Non-space', ''] 56 | ] 57 | -------------------------------------------------------------------------------- /wordless/wl_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_utils/__init__.py -------------------------------------------------------------------------------- /wordless/wl_utils/wl_excs.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Utilities - Exceptions 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | class Wl_Exc(Exception): 20 | pass 21 | 22 | class Wl_Exc_Word_Cloud(Wl_Exc): 23 | pass 24 | 25 | class Wl_Exc_Word_Cloud_Font(Wl_Exc_Word_Cloud): 26 | pass 27 | 28 | class Wl_Exc_Word_Cloud_Font_Nonexistent(Wl_Exc_Word_Cloud_Font): 29 | pass 30 | 31 | class Wl_Exc_Word_Cloud_Font_Is_Dir(Wl_Exc_Word_Cloud_Font): 32 | pass 33 | 34 | class Wl_Exc_Word_Cloud_Font_Unsupported(Wl_Exc_Word_Cloud_Font): 35 | pass 36 | 37 | class Wl_Exc_Word_Cloud_Mask(Wl_Exc_Word_Cloud): 38 | pass 39 | 40 | class Wl_Exc_Word_Cloud_Mask_Nonexistent(Wl_Exc_Word_Cloud_Mask): 41 | pass 42 | 43 | class Wl_Exc_Word_Cloud_Mask_Is_Dir(Wl_Exc_Word_Cloud_Mask): 44 | pass 45 | 46 | class Wl_Exc_Word_Cloud_Mask_Unsupported(Wl_Exc_Word_Cloud_Mask): 47 | pass 48 | -------------------------------------------------------------------------------- /wordless/wl_utils/wl_paths.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Utilities - Paths 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | import os 20 | import sys 21 | 22 | from wordless.wl_utils import wl_misc 23 | 24 | def get_normalized_path(path): 25 | path = os.path.realpath(path) 26 | path = os.path.normpath(path) 27 | 28 | return path 29 | 30 | def get_normalized_dir(path): 31 | path = get_normalized_path(path) 32 | 33 | return os.path.dirname(path) 34 | 35 | def get_path_file(*paths, internal = True): 36 | if getattr(sys, '_MEIPASS', False): 37 | if internal: 38 | path = os.path.join(sys._MEIPASS, *paths) 39 | else: 40 | is_windows, is_macos, is_linux = wl_misc.check_os() 41 | 42 | if is_windows or is_linux: 43 | path = os.path.join(sys._MEIPASS, '..', *paths) 44 | elif is_macos: 45 | path = os.path.join(sys._MEIPASS, '..', 'MacOS', *paths) 46 | else: 47 | path = os.path.join(*paths) 48 | 49 | return get_normalized_path(path) 50 | 51 | def get_path_data(*paths): 52 | return get_path_file('data', *paths) 53 | 54 | def get_path_img(*paths): 55 | return get_path_file('imgs', *paths) 56 | -------------------------------------------------------------------------------- /wordless/wl_widgets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_widgets/__init__.py -------------------------------------------------------------------------------- /wordless/wl_widgets/wl_labels.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Wordless: Widgets - Labels 3 | # Copyright (C) 2018-2025 Ye Lei (叶磊) 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | # ---------------------------------------------------------------------- 18 | 19 | from PyQt5 import QtCore 20 | from PyQt5 import QtWidgets 21 | 22 | from wordless.wl_utils import wl_misc 23 | 24 | class Wl_Label(QtWidgets.QLabel): 25 | def __init__(self, text, parent): 26 | super().__init__(text, parent) 27 | 28 | self.main = wl_misc.find_wl_main(parent) 29 | 30 | class Wl_Label_Hint(Wl_Label): 31 | def __init__(self, text, parent): 32 | super().__init__(text, parent) 33 | 34 | self.setStyleSheet(''' 35 | color: #777; 36 | ''') 37 | 38 | class Wl_Label_Html(Wl_Label): 39 | def __init__(self, html, parent): 40 | super().__init__(html, parent) 41 | 42 | self.setAlignment(QtCore.Qt.AlignJustify | QtCore.Qt.AlignVCenter) 43 | self.setTextFormat(QtCore.Qt.RichText) 44 | self.setOpenExternalLinks(True) 45 | 46 | class Wl_Label_Html_Centered(Wl_Label_Html): 47 | def __init__(self, html, parent): 48 | super().__init__(html, parent) 49 | 50 | self.setAlignment(QtCore.Qt.AlignCenter) 51 | 52 | STYLES_DIALOG = ''' 53 | 59 | ''' 60 | 61 | class Wl_Label_Dialog(Wl_Label_Html): 62 | def __init__(self, text, parent, word_wrap = True): 63 | super().__init__( 64 | f''' 65 | {STYLES_DIALOG} 66 | {text} 67 | ''', 68 | parent 69 | ) 70 | 71 | self.setWordWrap(word_wrap) 72 | 73 | def set_text(self, text): 74 | super().setText(f''' 75 | {STYLES_DIALOG} 76 | {text} 77 | ''') 78 | 79 | class Wl_Label_Dialog_No_Wrap(Wl_Label_Dialog): 80 | def __init__(self, text, parent): 81 | super().__init__(text, parent) 82 | 83 | self.setWordWrap(False) 84 | --------------------------------------------------------------------------------