3 |
FACTSHEET WHAT IS AIDS?
4 | AIDS (Acquired Immune Deficiency Syndrome)is a condition caused by a virus called HIV (Human Immuno Deficiency Virus).
5 | This virus affects the body's defence system so that it cannot fight infection.
6 |
--------------------------------------------------------------------------------
/tests/files/wl_file_area/misc/vie_tokenized.txt:
--------------------------------------------------------------------------------
1 | Tiếng Việt , cũng gọi_là tiếng Việt_Nam [ 9_] hay Việt_ngữ là ngôn_ngữ của người Việt và là ngôn_ngữ chính_thức tại Việt_Nam . Đây là tiếng_mẹ_đẻ của khoảng 85 % dân_cư Việt_Nam cùng với hơn 4 triệu người Việt_kiều . Tiếng Việt còn là ngôn_ngữ thứ hai của các dân_tộc_thiểu_số tại Việt_Nam và là ngôn_ngữ dân_tộc_thiểu_số được công_nhận tại Cộng_hòa_Séc .
2 |
3 | _test test_ _
4 |
--------------------------------------------------------------------------------
/tests/files/wl_file_area/tags/tokenized_tagged.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | This is the first sentence . This_TAG3RunningToken_TAG3 is the second sentence .
4 |
5 |
6 | This is the third sentence .
7 |
8 |
9 |
--------------------------------------------------------------------------------
/tests/files/wl_file_area/tags/tokenized_untagged.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | This is the first sentence . This is the second sentence .
4 |
5 |
6 | This is the third sentence .
7 |
8 |
9 |
--------------------------------------------------------------------------------
/tests/files/wl_file_area/tags/untokenized_tagged.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | This is the first sentence. This_TAG3 is the second sentence.
4 |
5 |
6 | This is the third sentence.
7 |
8 |
9 |
--------------------------------------------------------------------------------
/tests/files/wl_file_area/tags/untokenized_untagged.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | This is the first sentence. This is the second sentence.
4 |
5 |
6 | This is the third sentence.
7 |
8 |
9 |
--------------------------------------------------------------------------------
/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).csv
--------------------------------------------------------------------------------
/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).html
--------------------------------------------------------------------------------
/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).txt
--------------------------------------------------------------------------------
/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/files/wl_file_area/unicode_decode_error/unicode_decode_error (utf_16).xml
--------------------------------------------------------------------------------
/tests/test_concordancer_parallel.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Work Area - Parallel Concordancer
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import glob
20 |
21 | from tests import wl_test_init
22 | from wordless import wl_concordancer_parallel
23 | from wordless.wl_dialogs import wl_dialogs_misc
24 |
25 | def test_concordancer_parallel():
26 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast')
27 |
28 | settings = main.settings_custom['concordancer_parallel']
29 |
30 | settings['search_settings']['multi_search_mode'] = True
31 | settings['search_settings']['search_terms'] = wl_test_init.SEARCH_TERMS
32 |
33 | for i in range(2):
34 | match i:
35 | case 0:
36 | wl_test_init.select_test_files(main, no_files = [0, 1, 2])
37 | case 1:
38 | wl_test_init.select_test_files(
39 | main,
40 | no_files = list(range(1, 3 + len(glob.glob('tests/files/file_area/misc/*.txt'))))
41 | )
42 |
43 | print(f"Files: {' | '.join(wl_test_init.get_test_file_names(main))}")
44 |
45 | wl_concordancer_parallel.Wl_Worker_Concordancer_Parallel_Table(
46 | main,
47 | dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(main),
48 | update_gui = update_gui
49 | ).run()
50 |
51 | def update_gui(err_msg, concordance_lines):
52 | print(err_msg)
53 | assert not err_msg
54 | assert concordance_lines
55 |
56 | for concordance_line in concordance_lines:
57 | assert len(concordance_line) == 2
58 |
59 | parallel_unit_no, len_parallel_units = concordance_line[0]
60 |
61 | # Parallel Unit No.
62 | assert parallel_unit_no >= 1
63 | assert len_parallel_units >= 1
64 |
65 | # Parallel Units
66 | for parallel_unit in concordance_line[1]:
67 | assert len(parallel_unit) == 2
68 |
69 | if __name__ == '__main__':
70 | test_concordancer_parallel()
71 |
--------------------------------------------------------------------------------
/tests/tests_checks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_checks/__init__.py
--------------------------------------------------------------------------------
/tests/tests_checks/test_checks_files.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Checks - Files
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_checks import wl_checks_files
21 | from wordless.wl_utils import wl_paths
22 |
23 | def get_normalized_file_path(file_name):
24 | return wl_paths.get_normalized_path(f'tests/files/wl_checks/wl_checks_files/{file_name}')
25 |
26 | main = wl_test_init.Wl_Test_Main()
27 | main.settings_custom['file_area']['files_open'] = [
28 | {
29 | 'path_orig': get_normalized_file_path('dup.txt')
30 | }
31 | ]
32 |
33 | FILE_PATHS_UNSUPPORTED = [
34 | get_normalized_file_path('unsupported.unsupported')
35 | ]
36 | FILE_PATHS_EMPTY = [
37 | get_normalized_file_path('empty_txt.txt'),
38 | get_normalized_file_path('empty_docx.docx')
39 | ]
40 | FILE_PATHS_DUP = [
41 | get_normalized_file_path('dup.txt'),
42 | get_normalized_file_path('dup.xml'),
43 | get_normalized_file_path('dup.xml')
44 | ]
45 |
46 | def test_check_file_paths_unsupported():
47 | _, files_unsupported = wl_checks_files.check_file_paths_unsupported(main, ['supported.txt'] + FILE_PATHS_UNSUPPORTED)
48 |
49 | assert files_unsupported == FILE_PATHS_UNSUPPORTED
50 |
51 | def test_check_file_paths_empty():
52 | _, files_empty = wl_checks_files.check_file_paths_empty(main, [FILE_PATHS_DUP[0]] + FILE_PATHS_EMPTY)
53 |
54 | assert files_empty == FILE_PATHS_EMPTY
55 |
56 | def test_check_file_paths_duplicate():
57 | _, files_dup = wl_checks_files.check_file_paths_dup(main, FILE_PATHS_DUP)
58 |
59 | assert files_dup == FILE_PATHS_DUP[:2]
60 |
61 | def test_check_err_file_area():
62 | assert wl_checks_files.check_err_file_area(main, '')
63 | assert not wl_checks_files.check_err_file_area(main, 'test')
64 |
65 | if __name__ == '__main__':
66 | test_check_file_paths_unsupported()
67 | test_check_file_paths_empty()
68 | test_check_file_paths_duplicate()
69 |
70 | test_check_err_file_area()
71 |
--------------------------------------------------------------------------------
/tests/tests_checks/test_checks_misc.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Checks - Miscellaneous
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import os
20 | import shutil
21 |
22 | from wordless.wl_checks import wl_checks_misc
23 |
24 | def test_check_custom_settings():
25 | settings_custom = settings_default = {
26 | 'key_1': 'val_2',
27 | 'key_2': {
28 | 'key_3': 'val_3',
29 | 'key_4': 'val_4'
30 | }
31 | }
32 |
33 | assert wl_checks_misc.check_custom_settings(settings_custom, settings_default)
34 | assert not wl_checks_misc.check_custom_settings(settings_custom, {})
35 |
36 | def test_check_dir():
37 | if os.path.exists('temp'):
38 | shutil.rmtree('temp')
39 |
40 | wl_checks_misc.check_dir('temp')
41 |
42 | assert os.path.exists('temp')
43 |
44 | os.rmdir('temp')
45 |
46 | def test_check_new_name():
47 | assert wl_checks_misc.check_new_name('new_name', ['new_name', 'new_name (2)', 'new_name (4)']) == 'new_name (3)'
48 | assert wl_checks_misc.check_new_name(
49 | 'new_name', ['new_name', 'new_name (2)', 'new_name (4)'],
50 | separator = '/'
51 | ) == 'new_name/2'
52 |
53 | def test_check_new_path():
54 | if os.path.exists('temp'):
55 | shutil.rmtree('temp')
56 |
57 | os.mkdir('temp')
58 |
59 | for file_name in ('temp', 'temp (2)', 'temp (4)'):
60 | with open(f'temp/{file_name}.temp', 'w', encoding = 'utf_8'):
61 | pass
62 |
63 | assert wl_checks_misc.check_new_path('temp/temp.temp') == 'temp/temp (3).temp'
64 |
65 | shutil.rmtree('temp')
66 |
67 | if __name__ == '__main__':
68 | test_check_custom_settings()
69 | test_check_dir()
70 | test_check_new_name()
71 | test_check_new_path()
72 |
--------------------------------------------------------------------------------
/tests/tests_dialogs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_dialogs/__init__.py
--------------------------------------------------------------------------------
/tests/tests_dialogs/test_dialogs_errs.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Dialogs - Errors
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_dialogs import wl_dialogs_errs
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_wl_dialog_err():
25 | wl_dialogs_errs.Wl_Dialog_Err(main, title = 'test').open()
26 |
27 | def test_wl_dialog_err_files():
28 | wl_dialogs_errs.Wl_Dialog_Err_Files(main, title = 'test').open()
29 |
30 | def test_wl_dialog_err_info_copy():
31 | wl_dialogs_errs.Wl_Dialog_Err_Info_Copy(main, title = 'test').open()
32 |
33 | def test_wl_dialog_err_fatal():
34 | wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg = 'test').open()
35 |
36 | def test_wl_dialog_err_download_model():
37 | wl_dialogs_errs.Wl_Dialog_Err_Download_Model(main, err_msg = 'test').open()
38 |
39 | if __name__ == '__main__':
40 | test_wl_dialog_err()
41 | test_wl_dialog_err_files()
42 |
43 | test_wl_dialog_err_info_copy()
44 | test_wl_dialog_err_fatal()
45 | test_wl_dialog_err_download_model()
46 |
--------------------------------------------------------------------------------
/tests/tests_dialogs/test_dialogs_misc.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Dialogs - Miscellaneous
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_dialogs import wl_dialogs_misc
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_wl_dialog_progress():
25 | wl_dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress(main, text = 'test')
26 | wl_dialog_progress.open()
27 | wl_dialog_progress.update_elapsed_time()
28 | wl_dialog_progress.update_progress('test')
29 |
30 | def test_wl_dialog_progress_process_data():
31 | wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(main).open()
32 |
33 | def test_wl_dialog_progress_download_model():
34 | wl_dialogs_misc.Wl_Dialog_Progress_Download_Model(main).open()
35 |
36 | def test_wl_dialog_restart_required():
37 | wl_dialogs_misc.Wl_Dialog_Restart_Required(main).open()
38 |
39 | if __name__ == '__main__':
40 | test_wl_dialog_progress()
41 | test_wl_dialog_progress_process_data()
42 | test_wl_dialog_progress_download_model()
43 | test_wl_dialog_restart_required()
44 |
--------------------------------------------------------------------------------
/tests/tests_figs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_figs/__init__.py
--------------------------------------------------------------------------------
/tests/tests_file_area/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_file_area/__init__.py
--------------------------------------------------------------------------------
/tests/tests_measures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_measures/__init__.py
--------------------------------------------------------------------------------
/tests/tests_measures/test_measures_bayes_factor.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Measures - Bayes factor
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import numpy
20 |
21 | from tests import wl_test_init
22 | from wordless.wl_measures import wl_measures_bayes_factor
23 |
24 | main = wl_test_init.Wl_Test_Main()
25 |
26 | def test_bayes_factor_log_likelihood_ratio_test():
27 | numpy.testing.assert_array_equal(
28 | wl_measures_bayes_factor.bayes_factor_log_likelihood_ratio_test(
29 | main,
30 | numpy.array([0] * 2),
31 | numpy.array([0] * 2),
32 | numpy.array([0] * 2),
33 | numpy.array([0] * 2)
34 | ),
35 | numpy.array([0] * 2)
36 | )
37 |
38 | def test_bayes_factor_students_t_test_2_sample():
39 | numpy.testing.assert_array_equal(
40 | wl_measures_bayes_factor.bayes_factor_students_t_test_2_sample(
41 | main,
42 | numpy.array([[0] * 5] * 2),
43 | numpy.array([[0] * 5] * 2),
44 | ),
45 | numpy.array([0] * 2)
46 | )
47 |
48 | if __name__ == '__main__':
49 | test_bayes_factor_log_likelihood_ratio_test()
50 | test_bayes_factor_students_t_test_2_sample()
51 |
--------------------------------------------------------------------------------
/tests/tests_measures/test_measures_misc.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Measures - Miscellaneous
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_measures import wl_measures_misc
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_modes():
25 | nums = [1, 3, 3, 3, 2, 2, 1, 2, 5, 4]
26 |
27 | assert wl_measures_misc.modes(nums) == [2, 3]
28 | assert wl_measures_misc.modes([0] * 10) == [0]
29 | assert wl_measures_misc.modes([*range(10)]) == [*range(10)]
30 | assert not wl_measures_misc.modes([])
31 |
32 | if __name__ == '__main__':
33 | test_modes()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_nlp/__init__.py
--------------------------------------------------------------------------------
/tests/tests_nlp/test_stop_word_lists.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stop word lists
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import pytest
20 |
21 | from tests import wl_test_init
22 | from wordless.wl_nlp import wl_stop_word_lists
23 |
24 | main = wl_test_init.Wl_Test_Main()
25 |
26 | test_stop_word_lists = []
27 |
28 | for lang, stop_word_lists in main.settings_global['stop_word_lists'].items():
29 | for stop_word_list in stop_word_lists:
30 | test_stop_word_lists.append((lang, stop_word_list))
31 |
32 | @pytest.mark.parametrize('lang, stop_word_list', test_stop_word_lists)
33 | def test_get_stop_word_list(lang, stop_word_list):
34 | stop_words = wl_stop_word_lists.wl_get_stop_word_list(main, lang, stop_word_list = stop_word_list)
35 |
36 | print(f'Number of stop words ({lang} / {stop_word_list}): {len(stop_words)}')
37 |
38 | if stop_word_list == 'custom':
39 | # Check if the custom list is empty
40 | assert stop_words == set()
41 | else:
42 | # Check for empty stop words
43 | assert stop_words
44 | assert all((stop_word.strip() for stop_word in stop_words))
45 |
46 | def test_filter_stop_words():
47 | assert wl_stop_word_lists.wl_filter_stop_words(main, items = ['a', 'aa'], lang = 'eng_us') == ['aa']
48 | assert wl_stop_word_lists.wl_filter_stop_words(main, items = [], lang = 'eng_us') == []
49 |
50 | main.settings_custom['stop_word_lists']['stop_word_list_settings']['case_sensitive'] = False
51 | assert wl_stop_word_lists.wl_filter_stop_words(main, items = ['A', 'a'], lang = 'eng_us') == []
52 | main.settings_custom['stop_word_lists']['stop_word_list_settings']['case_sensitive'] = True
53 | assert wl_stop_word_lists.wl_filter_stop_words(main, items = ['A', 'a'], lang = 'eng_us') == ['A']
54 |
55 | def test_stop_word_lists_misc():
56 | # Other languages
57 | wl_stop_word_lists.wl_get_stop_word_list(main, lang = 'test')
58 |
59 | if __name__ == '__main__':
60 | for lang, stop_word_list in test_stop_word_lists:
61 | test_get_stop_word_list(lang, stop_word_list)
62 |
63 | test_filter_stop_words()
64 | test_stop_word_lists_misc()
65 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_spacy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_nlp/tests_spacy/__init__.py
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_spacy/test_spacy_dan.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - spaCy - Danish
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_spacy import test_spacy
20 |
21 | def test_spacy_dan():
22 | results_pos_tag = [('Dansk', 'NOUN'), ('er', 'AUX'), ('et', 'DET'), ('østnordisk', 'ADJ'), ('sprog', 'NOUN'), ('indenfor', 'ADP'), ('den', 'DET'), ('germanske', 'ADJ'), ('gren', 'NOUN'), ('af', 'ADP'), ('den', 'DET'), ('indoeuropæiske', 'ADJ'), ('sprogfamilie', 'NOUN'), ('.', 'PUNCT')]
23 |
24 | test_spacy.wl_test_spacy(
25 | lang = 'dan',
26 | results_sentence_tokenize_trf = ['Dansk er et østnordisk sprog indenfor den germanske gren af den indoeuropæiske sprogfamilie.', 'Det danske sprog tales af ca. seks millioner mennesker, hovedsageligt i Danmark, men også i Sydslesvig, på Færøerne og Grønland.[1]'],
27 | results_word_tokenize = ['Dansk', 'er', 'et', 'østnordisk', 'sprog', 'indenfor', 'den', 'germanske', 'gren', 'af', 'den', 'indoeuropæiske', 'sprogfamilie', '.'],
28 | results_pos_tag = results_pos_tag,
29 | results_pos_tag_universal = results_pos_tag,
30 | results_lemmatize = ['dansk', 'være', 'en', 'østnordisk', 'sprog', 'indenfor', 'den', 'germansk', 'gren', 'af', 'den', 'indoeuropæisk', 'sprogfamilie', '.'],
31 | results_dependency_parse = [('Dansk', 'sprog', 'nsubj', 4), ('er', 'sprog', 'cop', 3), ('et', 'sprog', 'det', 2), ('østnordisk', 'sprog', 'amod', 1), ('sprog', 'sprog', 'ROOT', 0), ('indenfor', 'gren', 'case', 3), ('den', 'gren', 'det', 2), ('germanske', 'gren', 'amod', 1), ('gren', 'sprog', 'nmod', -4), ('af', 'sprogfamilie', 'case', 3), ('den', 'sprogfamilie', 'det', 2), ('indoeuropæiske', 'sprogfamilie', 'amod', 1), ('sprogfamilie', 'gren', 'nmod', -4), ('.', 'sprog', 'punct', -9)]
32 | )
33 |
34 | if __name__ == '__main__':
35 | test_spacy_dan()
36 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_spacy/test_spacy_ell.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - spaCy - Greek (Modern)
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_spacy import test_spacy
20 |
21 | def test_spacy_ell():
22 | results_pos_tag = [('Η', 'DET'), ('ελληνική', 'ADJ'), ('γλώσσα', 'NOUN'), ('ανήκει', 'VERB'), ('στην', 'ADP'), ('ινδοευρωπαϊκή', 'ADJ'), ('οικογένεια[9', 'NOUN'), (']', 'NOUN'), ('secεπίσης', 'X'), ('στο', 'ADP'), ('βαλκανικό', 'ADJ'), ('γλωσσικό', 'ADJ'), ('δεσμό', 'NOUN'), ('.', 'PUNCT')]
23 |
24 | test_spacy.wl_test_spacy(
25 | lang = 'ell',
26 | results_sentence_tokenize_trf = ['Η ελληνική γλώσσα ανήκει στην ινδοευρωπαϊκή οικογένεια[9] secεπίσης στο βαλκανικό γλωσσικό δεσμό.', 'ελληνική γλώσσα', ', έχουμε γραπτά κείμενα ήδη από τον 15ο αιώνα π.', 'Χ..'],
27 | results_sentence_tokenize_lg = ['Η ελληνική γλώσσα ανήκει στην ινδοευρωπαϊκή οικογένεια[9] secεπίσης στο βαλκανικό γλωσσικό δεσμό.', 'ελληνική γλώσσα, έχουμε γραπτά κείμενα ήδη από τον 15ο αιώνα π.', 'Χ..'],
28 | results_word_tokenize = ['Η', 'ελληνική', 'γλώσσα', 'ανήκει', 'στην', 'ινδοευρωπαϊκή', 'οικογένεια[9', ']', 'secεπίσης', 'στο', 'βαλκανικό', 'γλωσσικό', 'δεσμό', '.'],
29 | results_pos_tag = results_pos_tag,
30 | results_pos_tag_universal = results_pos_tag,
31 | results_lemmatize = ['ο', 'ελληνικός', 'γλώσσα', 'ανήκω', 'σε ο', 'ινδοευρωπαϊκός', 'οικογένεια[9', ']', 'secεπίσης', 'σε ο', 'βαλκανικός', 'γλωσσικός', 'δεσμός', '.'],
32 | results_dependency_parse = [('Η', 'γλώσσα', 'det', 2), ('ελληνική', 'γλώσσα', 'amod', 1), ('γλώσσα', 'ανήκει', 'nsubj', 1), ('ανήκει', 'ανήκει', 'ROOT', 0), ('στην', 'οικογένεια[9', 'case', 2), ('ινδοευρωπαϊκή', 'οικογένεια[9', 'amod', 1), ('οικογένεια[9', 'ανήκει', 'obl', -3), (']', 'ανήκει', 'obl', -4), ('secεπίσης', ']', 'nmod', -1), ('στο', 'δεσμό', 'case', 3), ('βαλκανικό', 'δεσμό', 'amod', 2), ('γλωσσικό', 'δεσμό', 'amod', 1), ('δεσμό', ']', 'nmod', -5), ('.', 'ανήκει', 'punct', -10)]
33 | )
34 |
35 | if __name__ == '__main__':
36 | test_spacy_ell()
37 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_spacy/test_spacy_ita.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - spaCy - Italian
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_spacy import test_spacy
20 |
21 | def test_spacy_ita():
22 | results_sentence_tokenize = ["L'italiano è una lingua romanza parlata principalmente in Italia.", "Per ragioni storiche e geografiche, l'italiano è la lingua romanza meno divergente dal latino (complessivamente a pari merito, anche se in parametri diversi, con la lingua sarda).[2][3][4][5]"]
23 |
24 | test_spacy.wl_test_spacy(
25 | lang = 'ita',
26 | results_sentence_tokenize_trf = results_sentence_tokenize,
27 | results_sentence_tokenize_lg = results_sentence_tokenize,
28 | results_word_tokenize = ["L'", 'italiano', 'è', 'una', 'lingua', 'romanza', 'parlata', 'principalmente', 'in', 'Italia', '.'],
29 | results_pos_tag = [("L'", 'RD'), ('italiano', 'S'), ('è', 'V'), ('una', 'RI'), ('lingua', 'S'), ('romanza', 'A'), ('parlata', 'V'), ('principalmente', 'B'), ('in', 'E'), ('Italia', 'SP'), ('.', 'FS')],
30 | results_pos_tag_universal = [("L'", 'DET'), ('italiano', 'NOUN'), ('è', 'AUX'), ('una', 'DET'), ('lingua', 'NOUN'), ('romanza', 'ADJ'), ('parlata', 'VERB'), ('principalmente', 'ADV'), ('in', 'ADP'), ('Italia', 'PROPN'), ('.', 'PUNCT')],
31 | results_lemmatize = ['il', 'italiano', 'essere', 'uno', 'lingua', 'romanza', 'parlare', 'principalmente', 'in', 'Italia', '.'],
32 | results_dependency_parse = [("L'", 'italiano', 'det', 1), ('italiano', 'lingua', 'nsubj', 3), ('è', 'lingua', 'cop', 2), ('una', 'lingua', 'det', 1), ('lingua', 'lingua', 'ROOT', 0), ('romanza', 'lingua', 'amod', -1), ('parlata', 'lingua', 'amod', -2), ('principalmente', 'parlata', 'advmod', -1), ('in', 'Italia', 'case', 1), ('Italia', 'parlata', 'obl', -3), ('.', 'lingua', 'punct', -6)]
33 | )
34 |
35 | if __name__ == '__main__':
36 | test_spacy_ita()
37 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_spacy/test_spacy_kor.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - spaCy - Korean
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_spacy import test_spacy
20 |
21 | def test_spacy_kor():
22 | results_sentence_tokenize = ['한국어(韓國語), 조선어(朝鮮語)는 대한민국과 조선민주주의인민공화국의 공용어이다.', '둘은 표기나 문법, 동사 어미나 표현에서 약간의 차이가 있다.']
23 |
24 | test_spacy.wl_test_spacy(
25 | lang = 'kor',
26 | results_sentence_tokenize_trf = results_sentence_tokenize,
27 | results_sentence_tokenize_lg = results_sentence_tokenize,
28 | results_word_tokenize = ['한국어', '(', '韓國語', ')', ',', '조선어', '(', '朝鮮語', ')', '는', '대한민국과', '조선민주주의인민공화국의', '공용어이다', '.'],
29 | results_pos_tag = [('한국어', 'nq'), ('(', 'sl'), ('韓國語', 'nq'), (')', 'sr'), (',', 'sp'), ('조선어', 'nq'), ('(', 'sl'), ('朝鮮語', 'nq'), (')', 'sr'), ('는', 'jxt'), ('대한민국과', 'nq+jcj'), ('조선민주주의인민공화국의', 'nq+ncn+jcm'), ('공용어이다', 'ncn+jp+ef'), ('.', 'sf')],
30 | results_pos_tag_universal = [('한국어', 'PROPN'), ('(', 'PUNCT'), ('韓國語', 'PROPN'), (')', 'PUNCT'), (',', 'PUNCT'), ('조선어', 'PROPN'), ('(', 'PUNCT'), ('朝鮮語', 'PROPN'), (')', 'PUNCT'), ('는', 'ADP'), ('대한민국과', 'CCONJ'), ('조선민주주의인민공화국의', 'PROPN'), ('공용어이다', 'VERB'), ('.', 'PUNCT')],
31 | results_lemmatize = ['한국어', '(', '韓國語', ')', ',', '조선어', '(', '朝鮮語', ')', '는', '대한민국+과', '조선민주주의인민공+화국+의', '공용어+이+다', '.'],
32 | results_dependency_parse = [('한국어', '공용어이다', 'advmod', 12), ('(', '韓國語', 'punct', 1), ('韓國語', '한국어', 'appos', -2), (')', '韓國語', 'punct', -1), (',', '한국어', 'punct', -4), ('조선어', '한국어', 'flat', -5), ('(', '朝鮮語', 'punct', 1), ('朝鮮語', '조선어', 'appos', -2), (')', '朝鮮語', 'punct', -1), ('는', '조선어', 'case', -4), ('대한민국과', '공용어이다', 'nmod', 2), ('조선민주주의인민공화국의', '대한민국과', 'conj', -1), ('공용어이다', '공용어이다', 'ROOT', 0), ('.', '공용어이다', 'punct', -1)]
33 | )
34 |
35 | if __name__ == '__main__':
36 | test_spacy_kor()
37 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_spacy/test_spacy_nob.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - spaCy - Norwegian (Bokmål)
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_spacy import test_spacy
20 |
21 | def test_spacy_nob():
22 | results_sentence_tokenize = ['Bokmål er en av to offisielle målformer av norsk skriftspråk, hvorav den andre er nynorsk.', 'I skrift har 87,3 % bokmål som hovedmål i skolen.[3]']
23 | results_pos_tag = [('Bokmål', 'PROPN'), ('er', 'AUX'), ('en', 'DET'), ('av', 'ADP'), ('to', 'NUM'), ('offisielle', 'ADJ'), ('målformer', 'NOUN'), ('av', 'ADP'), ('norsk', 'ADJ'), ('skriftspråk', 'NOUN'), (',', 'PUNCT'), ('hvorav', 'ADV'), ('den', 'DET'), ('andre', 'DET'), ('er', 'AUX'), ('nynorsk', 'ADJ'), ('.', 'PUNCT')]
24 |
25 | test_spacy.wl_test_spacy(
26 | lang = 'nob',
27 | results_sentence_tokenize_trf = results_sentence_tokenize,
28 | results_sentence_tokenize_lg = results_sentence_tokenize,
29 | results_word_tokenize = ['Bokmål', 'er', 'en', 'av', 'to', 'offisielle', 'målformer', 'av', 'norsk', 'skriftspråk', ',', 'hvorav', 'den', 'andre', 'er', 'nynorsk', '.'],
30 | results_pos_tag = results_pos_tag,
31 | results_pos_tag_universal = results_pos_tag,
32 | results_lemmatize = ['bokmål', 'være', 'en', 'av', 'to', 'offisiell', 'målforme', 'av', 'norsk', 'skriftspråk', '$,', 'hvorav', 'den', 'annen', 'være', 'nynorsk', '$.'],
33 | results_dependency_parse = [('Bokmål', 'en', 'nsubj', 2), ('er', 'en', 'cop', 1), ('en', 'en', 'ROOT', 0), ('av', 'målformer', 'case', 3), ('to', 'målformer', 'nummod', 2), ('offisielle', 'målformer', 'amod', 1), ('målformer', 'en', 'nmod', -4), ('av', 'skriftspråk', 'case', 2), ('norsk', 'skriftspråk', 'amod', 1), ('skriftspråk', 'målformer', 'nmod', -3), (',', 'nynorsk', 'punct', 5), ('hvorav', 'nynorsk', 'advmod', 4), ('den', 'andre', 'det', 1), ('andre', 'nynorsk', 'nsubj', 2), ('er', 'nynorsk', 'cop', 1), ('nynorsk', 'målformer', 'amod', -9), ('.', 'en', 'punct', -14)]
34 | )
35 |
36 | if __name__ == '__main__':
37 | test_spacy_nob()
38 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_nlp/tests_stanza/__init__.py
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_ara.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Arabic
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_ara():
22 | test_stanza.wl_test_stanza(
23 | lang = 'ara',
24 | results_sentence_tokenize = ['تحتوي اللغة العربية 28 حرفاً مكتوباً. ويرى بعضُ اللغويين أنه يجب إضافة حرف الهمزة إلى حروف العربية، ليصبحَ عدد الحروف 29. تُكتب العربية من اليمين إلى اليسار - ومثلها اللغة الفارسية والعبرية على عكس كثير من اللغات العالمية - ومن أعلى الصفحة إلى أسفلها.'],
25 | results_word_tokenize = ['تحتوي', 'اللغة', 'العربية', '28', 'حرفاً', 'مكتوباً', '.'],
26 | results_pos_tag = [('تحتوي', 'VIIA-3FS--'), ('اللغة', 'N------S1D'), ('العربية', 'A-----FS1D'), ('28', 'Q---------'), ('حرفاً', 'N------S4I'), ('مكتوباً', 'A-----MS4I'), ('.', 'G---------')],
27 | results_pos_tag_universal = [('تحتوي', 'VERB'), ('اللغة', 'NOUN'), ('العربية', 'ADJ'), ('28', 'NUM'), ('حرفاً', 'NOUN'), ('مكتوباً', 'ADJ'), ('.', 'PUNCT')],
28 | results_lemmatize = ['اِحتَوَى', 'لُغَة', 'عَرَبِيّ', '28', 'حَرف', 'مُكتَوِب', '.'],
29 | results_dependency_parse = [('تحتوي', 'تحتوي', 'root', 0), ('اللغة', 'تحتوي', 'nsubj', -1), ('العربية', 'اللغة', 'amod', -1), ('28', 'تحتوي', 'obj', -3), ('حرفاً', '28', 'nmod', -1), ('مكتوباً', 'حرفاً', 'amod', -1), ('.', 'تحتوي', 'punct', -6)]
30 | )
31 |
32 | if __name__ == '__main__':
33 | test_stanza_ara()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_chu.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Church Slavonic (Old)
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_chu():
22 | test_stanza.wl_test_stanza(
23 | lang = 'chu',
24 | results_sentence_tokenize = ['ВЪ И҃ В҃ ДЬНЬ КЛꙆМЕНТА', 'Бъ҃ ꙇже нъи лѣта огрѧдѫцѣ блаженаго климента мѫченіка твоего ꙇ папежа чьстьѭ веселішꙇ подазь мілостівъі да егоже чьсть чьстімъ сілоѭ ѹбо мѫчениѣ его наслѣдѹемъ г҃мь'],
25 | results_word_tokenize = ['ВЪ', 'И҃', 'В҃', 'ДЬНЬ', 'КЛꙆМЕНТА'],
26 | results_pos_tag = [('ВЪ', 'R-'), ('И҃', 'Nb'), ('В҃', 'R-'), ('ДЬНЬ', 'Nb'), ('КЛꙆМЕНТА', 'Ne')],
27 | results_pos_tag_universal = [('ВЪ', 'ADP'), ('И҃', 'NOUN'), ('В҃', 'ADP'), ('ДЬНЬ', 'NOUN'), ('КЛꙆМЕНТА', 'PROPN')],
28 | results_lemmatize = ['въ', 'изоусъ', 'въ', 'дьнь', 'кламенъ'],
29 | results_dependency_parse = [('ВЪ', 'И҃', 'case', 1), ('И҃', 'И҃', 'root', 0), ('В҃', 'ДЬНЬ', 'case', 1), ('ДЬНЬ', 'И҃', 'orphan', -2), ('КЛꙆМЕНТА', 'ДЬНЬ', 'nmod', -1)]
30 | )
31 |
32 | if __name__ == '__main__':
33 | test_stanza_chu()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_cop.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Coptic
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_cop():
22 | test_stanza.wl_test_stanza(
23 | lang = 'cop',
24 | results_sentence_tokenize = ['ϭⲟⲗ ·', 'ⲛⲉⲛⲧⲁⲩⲕⲗⲏⲣⲟⲛⲟⲙⲉⲓ ⲉⲛⲉϩ ⲛⲧⲙⲛⲧⲣⲣⲟ ⲙⲡⲛⲟⲩⲧⲉ ·'],
25 | results_word_tokenize = ['ϭⲟⲗ', '·'],
26 | results_pos_tag = [('ϭⲟⲗ', 'VIMP'), ('·', 'PUNCT')],
27 | results_pos_tag_universal = [('ϭⲟⲗ', 'VERB'), ('·', 'PUNCT')],
28 | results_lemmatize = ['ϭⲟⲗ', '·'],
29 | results_dependency_parse = [('ϭⲟⲗ', 'ϭⲟⲗ', 'root', 0), ('·', 'ϭⲟⲗ', 'punct', -1)]
30 | )
31 |
32 | if __name__ == '__main__':
33 | test_stanza_cop()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_dan.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Danish
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_dan():
22 | results_pos_tag = [('Dansk', 'ADJ'), ('er', 'AUX'), ('et', 'DET'), ('østnordisk', 'ADJ'), ('sprog', 'NOUN'), ('indenfor', 'ADP'), ('den', 'DET'), ('germanske', 'ADJ'), ('gren', 'NOUN'), ('af', 'ADP'), ('den', 'DET'), ('indoeuropæiske', 'ADJ'), ('sprogfamilie', 'NOUN'), ('.', 'PUNCT')]
23 |
24 | test_stanza.wl_test_stanza(
25 | lang = 'dan',
26 | results_sentence_tokenize = ['Dansk er et østnordisk sprog indenfor den germanske gren af den indoeuropæiske sprogfamilie.', 'Det danske sprog tales af ca. seks millioner mennesker, hovedsageligt i Danmark, men også i Sydslesvig, på Færøerne og Grønland.[1]', 'Dansk er tæt beslægtet med norsk, svensk og islandsk, og sproghistorisk har dansk været stærkt påvirket af plattysk.'],
27 | results_word_tokenize = ['Dansk', 'er', 'et', 'østnordisk', 'sprog', 'indenfor', 'den', 'germanske', 'gren', 'af', 'den', 'indoeuropæiske', 'sprogfamilie', '.'],
28 | results_pos_tag = results_pos_tag,
29 | results_pos_tag_universal = results_pos_tag,
30 | results_lemmatize = ['dansk', 'være', 'en', 'østnordisk', 'sprog', 'indenfor', 'den', 'germansk', 'gren', 'af', 'den', 'indoeuropæisk', 'sprogfamilie', '.'],
31 | results_dependency_parse = [('Dansk', 'sprog', 'nsubj', 4), ('er', 'sprog', 'cop', 3), ('et', 'sprog', 'det', 2), ('østnordisk', 'sprog', 'amod', 1), ('sprog', 'sprog', 'root', 0), ('indenfor', 'gren', 'case', 3), ('den', 'gren', 'det', 2), ('germanske', 'gren', 'amod', 1), ('gren', 'sprog', 'nmod', -4), ('af', 'sprogfamilie', 'case', 3), ('den', 'sprogfamilie', 'det', 2), ('indoeuropæiske', 'sprogfamilie', 'amod', 1), ('sprogfamilie', 'gren', 'nmod', -4), ('.', 'sprog', 'punct', -9)]
32 | )
33 |
34 | if __name__ == '__main__':
35 | test_stanza_dan()
36 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_eus.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Basque
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_eus():
22 | results_pos_tag = [('Euskara', 'NOUN'), ('Euskal', 'PROPN'), ('Herriko', 'NOUN'), ('hizkuntza', 'NOUN'), ('da', 'AUX'), ('.', 'PUNCT'), ('[8]', 'PUNCT')]
23 |
24 | test_stanza.wl_test_stanza(
25 | lang = 'eus',
26 | results_sentence_tokenize = ['Euskara Euskal Herriko hizkuntza da.', '[8] Hizkuntza bakartua da, ez baitzaio ahaidetasunik aurkitu.', 'Morfologiari dagokionez, hizkuntza eranskari eta ergatiboa da.', 'Euskaraz mintzo direnei euskaldun deritze.', 'Gaur egun, Euskal Herrian bertan ere hizkuntza gutxitua da, lurralde horretan gaztelania eta frantsesa nagusitu baitira.'],
27 | results_word_tokenize = ['Euskara', 'Euskal', 'Herriko', 'hizkuntza', 'da', '.', '[8]'],
28 | results_pos_tag = results_pos_tag,
29 | results_pos_tag_universal = results_pos_tag,
30 | results_lemmatize = ['euskara', 'Euskal', 'herri', 'hizkuntza', 'izan', '.', '[8]'],
31 | results_dependency_parse = [('Euskara', 'hizkuntza', 'nsubj', 3), ('Euskal', 'Herriko', 'compound', 1), ('Herriko', 'hizkuntza', 'nmod', 1), ('hizkuntza', 'hizkuntza', 'root', 0), ('da', 'hizkuntza', 'cop', -1), ('.', 'hizkuntza', 'punct', -2), ('[8]', '[8]', 'root', 0)]
32 | )
33 |
34 | if __name__ == '__main__':
35 | test_stanza_eus()
36 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_fao.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Faroese
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_fao():
22 | test_stanza.wl_test_stanza(
23 | lang = 'fao',
24 | results_sentence_tokenize = ['Føroyskt er høvuðsmálið í Føroyum.', 'Føroyskt er almenna málið í Føroyum, og tað er tjóðarmál føroyinga.', 'Harafturat verður nógv føroyskt tosað í Danmark og Íslandi.', 'Í Føroyum tosa 48.', '000 fólk føroyskt, í Danmark umleið 25.', '000 og í Íslandi umleið 5.000, so samlaða talið av fólkum, ið duga føroyskt liggur um 75-80.', '000.', 'Føroyskt er tí í altjóða høpi eitt lítið mál.', 'Føroyskt mál hevur fýra føll og trý kyn, og grammatiski málbygningurin líkist ógvuliga nógv íslendskum, meðan orðatilfarið og í summum lutum úttalan líkist norska landsmálinum.'],
25 | results_word_tokenize = ['Føroyskt', 'er', 'høvuðsmálið', 'í', 'Føroyum', '.'],
26 | results_pos_tag = [('Føroyskt', 'ADJ-N'), ('er', 'BEPI'), ('høvuðsmálið', 'N-N'), ('í', 'P'), ('Føroyum', 'N-D'), ('.', '.')],
27 | results_pos_tag_universal = [('Føroyskt', 'ADJ'), ('er', 'AUX'), ('høvuðsmálið', 'NOUN'), ('í', 'ADP'), ('Føroyum', 'NOUN'), ('.', 'PUNCT')],
28 | results_dependency_parse = [('Føroyskt', 'Føroyskt', 'root', 0), ('er', 'Føroyskt', 'cop', -1), ('høvuðsmálið', 'Føroyskt', 'nsubj', -2), ('í', 'Føroyum', 'case', 1), ('Føroyum', 'Føroyskt', 'obl', -4), ('.', 'Føroyum', 'punct', -1)]
29 | )
30 |
31 | if __name__ == '__main__':
32 | test_stanza_fao()
33 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_fro.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - French (Old)
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_fro():
22 | test_stanza.wl_test_stanza(
23 | lang = 'fro',
24 | results_sentence_tokenize = ["Si l'orrat Carles, ki est as porz passant. Je vos plevis, ja returnerunt Franc."],
25 | results_word_tokenize = ['Si', "l'", 'orrat', 'Carles', ',', 'ki', 'est', 'as', 'porz', 'passant', '.'],
26 | results_pos_tag = [('Si', 'ADVgen'), ("l'", 'PROper'), ('orrat', 'VERcjg'), ('Carles', 'NOMpro'), (',', 'PONfbl'), ('ki', 'PROrel'), ('est', 'VERcjg'), ('as', 'PRE.DETdef'), ('porz', 'NOMcom'), ('passant', 'VERppa'), ('.', 'PONfrt')],
27 | results_pos_tag_universal = [('Si', 'ADV'), ("l'", 'PRON'), ('orrat', 'VERB'), ('Carles', 'PROPN'), (',', 'PUNCT'), ('ki', 'PRON'), ('est', 'AUX'), ('as', 'ADP'), ('porz', 'NOUN'), ('passant', 'VERB'), ('.', 'PUNCT')],
28 | results_lemmatize = ['si', "l'", 'orrat', 'Carles', ',', 'ki', 'est', 'as', 'porz', 'passant', '.'],
29 | results_dependency_parse = [('Si', 'orrat', 'advmod', 2), ("l'", 'orrat', 'obj', 1), ('orrat', 'orrat', 'root', 0), ('Carles', 'orrat', 'nsubj', -1), (',', 'Carles', 'punct', -1), ('ki', 'passant', 'nsubj', 4), ('est', 'passant', 'aux', 3), ('as', 'porz', 'case:det', 1), ('porz', 'passant', 'obl', 1), ('passant', 'Carles', 'acl:relcl', -6), ('.', 'orrat', 'punct', -8)]
30 | )
31 |
32 | if __name__ == '__main__':
33 | test_stanza_fro()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_gla.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Scottish Gaelic
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_gla():
22 | test_stanza.wl_test_stanza(
23 | lang = 'gla',
24 | results_sentence_tokenize = ["'S i cànan dùthchasach na h-Alba a th' anns a' Ghàidhlig.", "'S i ball den teaghlach de chànanan Ceilteach dhen mheur Ghoidhealach a tha anns a' Ghàidhlig.", 'Tha Goidhealach a\' gabhail a-steach na cànanan Gàidhealach gu lèir; Gàidhlig na h-Èireann, Gàidhlig Mhanainn, agus Gàidhlig agus gu dearbh chan eil anns an fhacal "Goidhealach" ach seann fhacal a tha a\' ciallachadh "Gàidhealach".'],
25 | results_word_tokenize = ["'S", 'i', 'cànan', 'dùthchasach', 'na', 'h-Alba', 'a', "th'", 'anns', "a'", 'Ghàidhlig', '.'],
26 | results_pos_tag = [("'S", 'Wp-i'), ('i', 'Pp3sf'), ('cànan', 'Ncsmn'), ('dùthchasach', 'Aq-smn'), ('na', 'Tdsfg'), ('h-Alba', 'Nt'), ('a', 'Q-r'), ("th'", 'V-p'), ('anns', 'Sp'), ("a'", 'Tdsf'), ('Ghàidhlig', 'Ncsfd'), ('.', 'Fe')],
27 | results_pos_tag_universal = [("'S", 'AUX'), ('i', 'PRON'), ('cànan', 'NOUN'), ('dùthchasach', 'ADJ'), ('na', 'DET'), ('h-Alba', 'PROPN'), ('a', 'PART'), ("th'", 'VERB'), ('anns', 'ADP'), ("a'", 'DET'), ('Ghàidhlig', 'NOUN'), ('.', 'PUNCT')],
28 | results_lemmatize = ['is', 'i', 'cànan', 'dùthchasach', 'an', 'Alba', 'a', 'bi', 'an', 'an', 'gàidhlig', '.'],
29 | results_dependency_parse = [("'S", 'cànan', 'cop', 2), ('i', "'S", 'fixed', -1), ('cànan', 'cànan', 'root', 0), ('dùthchasach', 'cànan', 'amod', -1), ('na', 'h-Alba', 'det', 1), ('h-Alba', 'cànan', 'nmod', -3), ('a', "th'", 'nsubj', 1), ("th'", 'cànan', 'csubj:cleft', -5), ('anns', 'Ghàidhlig', 'case', 2), ("a'", 'Ghàidhlig', 'det', 1), ('Ghàidhlig', "th'", 'xcomp:pred', -3), ('.', 'cànan', 'punct', -9)]
30 | )
31 |
32 | if __name__ == '__main__':
33 | test_stanza_gla()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_glv.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Manx
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_glv():
22 | results_pos_tag = [('She', 'AUX'), ('Gaelg', 'PROPN'), ('(graït', 'NOUN'), (':', 'PUNCT'), ('/gɪlg/', 'NOUN'), (')', 'PUNCT'), ('çhengey', 'NOUN'), ('Ghaelagh', 'PROPN'), ('Vannin', 'PROPN'), ('.', 'PUNCT')]
23 |
24 | test_stanza.wl_test_stanza(
25 | lang = 'glv',
26 | results_sentence_tokenize = ['She Gaelg (graït: /gɪlg/) çhengey Ghaelagh Vannin.', "Haink y Ghaelg woish Shenn-Yernish, as t'ee cosoylagh rish Yernish as Gaelg ny h-Albey."],
27 | results_word_tokenize = ['She', 'Gaelg', '(graït', ':', '/gɪlg/', ')', 'çhengey', 'Ghaelagh', 'Vannin', '.'],
28 | results_pos_tag = results_pos_tag,
29 | results_pos_tag_universal = results_pos_tag,
30 | results_lemmatize = ['she', 'Gaelg', 'ben', ':', '/gɪlg/', ')', 'çhengey', 'Gaelagh', 'Mannin', '.'],
31 | results_dependency_parse = [('She', 'Gaelg', 'cop', 1), ('Gaelg', 'Gaelg', 'root', 0), ('(graït', 'Gaelg', 'nmod', -1), (':', '/gɪlg/', 'punct', 1), ('/gɪlg/', 'Gaelg', 'appos', -3), (')', '/gɪlg/', 'punct', -1), ('çhengey', 'Gaelg', 'parataxis', -5), ('Ghaelagh', 'çhengey', 'nmod', -1), ('Vannin', 'Ghaelagh', 'nmod', -1), ('.', 'Gaelg', 'punct', -8)]
32 | )
33 |
34 | if __name__ == '__main__':
35 | test_stanza_glv()
36 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_kat.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Georgian
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_kat():
22 | test_stanza.wl_test_stanza(
23 | lang = 'kat',
24 | results_sentence_tokenize = ['ქართული ენა — ქართველურ ენათა ოჯახის ენა.', 'ქართველების მშობლიური ენა, საქართველოს სახელმწიფო ენა (აფხაზეთის ავტონომიურ რესპუბლიკაში, მასთან ერთად სახელმწიფო ენად აღიარებულია აფხაზური ენა).', 'ქართულ ენაზე 5 მილიონზე მეტი ადამიანი ლაპარაკობს.'],
25 | results_word_tokenize = ['ქართული', 'ენა', '—', 'ქართველურ', 'ენათა', 'ოჯახის', 'ენა', '.'],
26 | results_pos_tag = [('ქართული', 'Adj'), ('ენა', 'Noun'), ('—', 'F'), ('ქართველურ', 'Adj'), ('ენათა', 'Noun'), ('ოჯახის', 'Noun'), ('ენა', 'Noun'), ('.', 'F')],
27 | results_pos_tag_universal = [('ქართული', 'ADJ'), ('ენა', 'NOUN'), ('—', 'PUNCT'), ('ქართველურ', 'ADJ'), ('ენათა', 'NOUN'), ('ოჯახის', 'NOUN'), ('ენა', 'NOUN'), ('.', 'PUNCT')],
28 | results_lemmatize = ['ქართული', 'ენა', '—', 'ქართველური', 'ენა', 'ოჯახი', 'ენა', '.'],
29 | results_dependency_parse = [('ქართული', 'ენა', 'amod', 1), ('ენა', 'ენა', 'nsubj', 5), ('—', 'ენა', 'punct', 4), ('ქართველურ', 'ენათა', 'amod', 1), ('ენათა', 'ოჯახის', 'nmod', 1), ('ოჯახის', 'ენა', 'nmod', 1), ('ენა', 'ენა', 'root', 0), ('.', 'ენა', 'punct', -1)]
30 | )
31 |
32 | if __name__ == '__main__':
33 | test_stanza_kat()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_kor.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Korean
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_kor():
22 | test_stanza.wl_test_stanza(
23 | lang = 'kor',
24 | results_sentence_tokenize = ['세계 여러 지역에 한민족 인구가 거주하게 되면서 전 세계 각지에서 한국어가 사용 되고 있다.', '2016년 1월 초 기준으로 한국어 사용 인구는 약 8,000만 명으로 추산된다.', '[1]'],
25 | results_word_tokenize = ['세계', '여러', '지역에', '한민족', '인구가', '거주하게', '되면서', '전', '세계', '각지에서', '한국어가', '사용', '되고', '있다', '.'],
26 | results_pos_tag = [('세계', 'ncn'), ('여러', 'mma'), ('지역에', 'ncn+jca'), ('한민족', 'ncn'), ('인구가', 'ncn+jcs'), ('거주하게', 'ncpa+xsv+ecx'), ('되면서', 'px+ecc'), ('전', 'mma'), ('세계', 'ncn'), ('각지에서', 'ncn+jca'), ('한국어가', 'nq+jcs'), ('사용', 'ncpa'), ('되고', 'pvg+ecx'), ('있다', 'px+ef'), ('.', 'sf')],
27 | results_pos_tag_universal = [('세계', 'NOUN'), ('여러', 'ADJ'), ('지역에', 'ADV'), ('한민족', 'NOUN'), ('인구가', 'NOUN'), ('거주하게', 'VERB'), ('되면서', 'CCONJ'), ('전', 'ADJ'), ('세계', 'NOUN'), ('각지에서', 'ADV'), ('한국어가', 'PROPN'), ('사용', 'NOUN'), ('되고', 'VERB'), ('있다', 'AUX'), ('.', 'PUNCT')],
28 | results_lemmatize = ['세계', '여러', '지역+에', '한민족', '인구+가', '거주+하+게', '되+면서', '전', '세계', '각지+에서', '한국어+가', '사용', '되+고', '있', '.'],
29 | results_dependency_parse = [('세계', '지역에', 'compound', 2), ('여러', '지역에', 'amod', 1), ('지역에', '거주하게', 'obl', 3), ('한민족', '인구가', 'compound', 1), ('인구가', '거주하게', 'nsubj', 1), ('거주하게', '거주하게', 'root', 0), ('되면서', '거주하게', 'cc', -1), ('전', '세계', 'amod', 1), ('세계', '각지에서', 'compound', 1), ('각지에서', '되고', 'advcl', 3), ('한국어가', '되고', 'nsubj', 2), ('사용', '되고', 'dep', 1), ('되고', '거주하게', 'conj', -7), ('있다', '되고', 'aux', -1), ('.', '있다', 'punct', -1)]
30 | )
31 |
32 | if __name__ == '__main__':
33 | test_stanza_kor()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_kpv.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Komi (Zyrian)
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_kpv():
22 | test_stanza.wl_test_stanza(
23 | lang = 'kpv',
24 | results_sentence_tokenize = ['Коми кыв — финн-йӧгра кывъясысь ӧти, коми войтырлӧн чужан кыв.', 'Коми кывйын кызь гӧгӧр сёрнисикас да кык гижӧда кыв: зырян коми да перым коми.', 'Коми кыв — Коми Республикаын каналан кыв (кыдзи и роч кыв).', 'Комиӧн сёрнитӧны Коми Республикаса вужвойтыр — комияс (зыряна, матӧ 156 сюрс морт).', 'Лунвылынджык, Перым Коми кытшын, перым комияслӧн (пермякъяслӧн, матӧ 63 сюрс морт) сӧвмӧ ас гижӧд кыв.', 'Комиясыд и сэні вужвойтыр.'],
25 | results_word_tokenize = ['Коми', 'кыв', '—', 'финн-йӧгра', 'кывъясысь', 'ӧти', ',', 'коми', 'войтырлӧн', 'чужан', 'кыв', '.'],
26 | results_pos_tag = [('Коми', 'N'), ('кыв', 'N'), ('—', 'PUNCT'), ('финн-йӧгра', 'Adv'), ('кывъясысь', 'N'), ('ӧти', 'Num'), (',', 'CLB'), ('коми', 'N'), ('войтырлӧн', 'N'), ('чужан', 'V'), ('кыв', 'N'), ('.', 'CLB')],
27 | results_pos_tag_universal = [('Коми', 'NOUN'), ('кыв', 'NOUN'), ('—', 'PUNCT'), ('финн-йӧгра', 'ADV'), ('кывъясысь', 'NOUN'), ('ӧти', 'NUM'), (',', 'PUNCT'), ('коми', 'NOUN'), ('войтырлӧн', 'NOUN'), ('чужан', 'VERB'), ('кыв', 'NOUN'), ('.', 'PUNCT')],
28 | results_lemmatize = ['коми', 'кыв', '—', 'финн-йӧгра', 'кыв', 'ӧти', ',', 'коми', 'войтыр', 'чужан', 'кыв', '.'],
29 | results_dependency_parse = [('Коми', 'кыв', 'obl', 1), ('кыв', 'кыв', 'root', 0), ('—', 'кывъясысь', 'punct', 2), ('финн-йӧгра', 'кывъясысь', 'advmod', 1), ('кывъясысь', 'кыв', 'appos', -3), ('ӧти', 'кывъясысь', 'nummod', -1), (',', 'кыв', 'punct', 4), ('коми', 'войтырлӧн', 'nmod', 1), ('войтырлӧн', 'чужан', 'obl:lmod', 1), ('чужан', 'кыв', 'acl', 1), ('кыв', 'кыв', 'nsubj:cop', -9), ('.', 'кыв', 'punct', -10)]
30 | )
31 |
32 | if __name__ == '__main__':
33 | test_stanza_kpv()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_mar.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Marathi
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_mar():
22 | results_pos_tag = [('मराठी', 'ADJ'), ('भाषा', 'NOUN'), ('ही', 'PART'), ('इंडो', 'ADJ'), ('-', 'PUNCT'), ('युरोपीय', 'ADJ'), ('भाषाकुळातील', 'NOUN'), ('एक', 'DET'), ('भाषा', 'NOUN'), ('आहे', 'AUX'), ('.', 'PUNCT')]
23 |
24 | test_stanza.wl_test_stanza(
25 | lang = 'mar',
26 | results_sentence_tokenize = ['मराठी भाषा ही इंडो-युरोपीय भाषाकुळातील एक भाषा आहे.', 'मराठी ही भारताच्या २२ अधिकृत भाषांपैकी एक आहे.', 'मराठी महाराष्ट्र राज्याची अधिकृत तर गोवा राज्याची सहअधिकृत भाषा आहे.', '२०११ च्या जनगणनेनुसार, भारतात मराठी भाषकांची एकूण लोकसंख्या सुमारे १४ कोटी आहे.', 'मराठी मातृभाषा असणाऱ्या लोकांच्या संख्येनुसार मराठी ही जगातील दहावी व भारतातील तिसरी भाषा आहे.', 'मराठी भाषा भारताच्या प्राचीन भाषांपैकी एक असून महाराष्ट्री प्राकृतचे आधुनिक रूप आहे.', 'मराठीचे वय सुमारे २४०० वर्ष आहे.', 'महाराष्ट्र हे मराठी भाषिकांचे राज्य म्हणून मराठी भाषेला वेगळे महत्त्व प्राप्त झालेले आहे.', 'आजतागायत मराठी भाषेतून अनेक श्रेष्ठ साहित्यकृती निर्माण झालेल्या आहेत आणि त्यात सातत्यपूर्ण रीतीने भर पडत आहे.', 'गोवा, गुजरात सारख्या राज्यातही मराठी भाषा काही प्रमाणात बोलली जाते.', 'गोव्यात मराठीला समृद्ध असा इतिहास आहे.', '[१]'],
27 | results_word_tokenize = ['मराठी', 'भाषा', 'ही', 'इंडो', '-', 'युरोपीय', 'भाषाकुळातील', 'एक', 'भाषा', 'आहे', '.'],
28 | results_pos_tag = results_pos_tag,
29 | results_pos_tag_universal = results_pos_tag,
30 | results_lemmatize = ['मराठी', 'भाष', 'ही', 'इंडो', '-', 'युरोपीय', 'भाषाकुळळत', 'एक', 'भाष', 'असणे', '.'],
31 | results_dependency_parse = [('मराठी', 'भाषा', 'amod', 1), ('भाषा', 'भाषा', 'obl', 7), ('ही', 'भाषा', 'discourse', -1), ('इंडो', 'भाषाकुळातील', 'amod', 3), ('-', 'इंडो', 'punct', -1), ('युरोपीय', 'भाषाकुळातील', 'amod', 1), ('भाषाकुळातील', 'भाषा', 'obl', 2), ('एक', 'भाषा', 'det', 1), ('भाषा', 'भाषा', 'root', 0), ('आहे', 'भाषा', 'cop', -1), ('.', 'भाषा', 'punct', -2)],
32 | results_sentiment_analayze = [0]
33 | )
34 |
35 | if __name__ == '__main__':
36 | test_stanza_mar()
37 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_mya.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Burmese
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_mya():
22 | test_stanza.wl_test_stanza(
23 | lang = 'mya',
24 | results_sentence_tokenize = ['မြန်မာဘာသာစကား (အင်္ဂလိပ်: Myanmar Language)သည် မြန်မာနိုင်ငံ၏ ရုံးသုံး ဘာသာစကားဖြစ်သည်။', 'ဗမာလူမျိုးနှင့် ဗမာနွယ်ဝင်(ဓနု၊ အင်းသား၊ တောင်ရိုးနှင့် ယော)တို့၏ ဇာတိစကားဖြစ်သည်။', 'ဗမာလူမျိုးတို့သည် တိဘက်-ဗမာနွယ် ဘာသာစကားများ (Tibeto-Burman Languages) ပြောဆိုသည့် လူမျိုးနွယ်စုကြီးမှ အကြီးဆုံးသော လူမျိုးဖြစ်သည်။', 'လူဦးရေ ၃၈သန်းကျော်ခန့်သည် မြန်မာဘာသာစကားကို မိခင်ဘာသာစကား အနေဖြင့် သုံး၍ မြန်မာတိုင်းရင်သားများသည် ဒုတိယဘာသာစကား အနေဖြင့် သုံးသည်။'],
25 | results_word_tokenize = ['မြန်မာ', 'ဘာသာ', 'စကား', '(', 'အင်္ဂလိပ်', ':', 'Myanmar', 'Language)', 'သည်', 'မြန်မာ', 'နိုင်ငံ', '၏', 'ရုံးသုံး', 'ဘာသာ', 'စကား', 'ဖြစ်', 'သည်', '။']
26 | )
27 |
28 | if __name__ == '__main__':
29 | test_stanza_mya()
30 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_orv.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Russian (Old)
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_orv():
22 | test_stanza.wl_test_stanza(
23 | lang = 'orv',
24 | results_sentence_tokenize = ['шаибатъ же ѿ бедерѧ г҃ мсци', 'а ѿ дабылѧ до шаибата в҃ мсца', 'моремъ итьти'],
25 | results_word_tokenize = ['шаибатъ', 'же', 'ѿ', 'бедерѧ', 'г҃', 'мсци'],
26 | results_pos_tag = [('шаибатъ', 'Ne'), ('же', 'Df'), ('ѿ', 'R-'), ('бедерѧ', 'Ne'), ('г҃', 'Ma'), ('мсци', 'Nb')],
27 | results_pos_tag_universal = [('шаибатъ', 'PROPN'), ('же', 'ADV'), ('ѿ', 'ADP'), ('бедерѧ', 'PROPN'), ('г҃', 'NUM'), ('мсци', 'NOUN')],
28 | results_lemmatize = ['шаибатъ', 'же', 'отъ', 'бедерь', 'трие', 'мѣсяць'],
29 | results_dependency_parse = [('шаибатъ', 'шаибатъ', 'root', 0), ('же', 'шаибатъ', 'discourse', -1), ('ѿ', 'бедерѧ', 'case', 1), ('бедерѧ', 'шаибатъ', 'nmod', -3), ('г҃', 'мсци', 'nummod', 1), ('мсци', 'шаибатъ', 'orphan', -5)]
30 | )
31 |
32 | if __name__ == '__main__':
33 | test_stanza_orv()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_pcm.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Nigerian Pidgin
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_pcm():
22 | results_pos_tag = [('Naijá', 'PROPN'), ('na', 'AUX'), ('pijin,', 'VERB'), ('a', 'DET'), ('langwej', 'NOUN'), ('for', 'ADP'), ('oda', 'ADJ'), ('langwej.', 'NOUN')]
23 |
24 | test_stanza.wl_test_stanza(
25 | lang = 'pcm',
26 | results_sentence_tokenize = ['Naijá na pijin, a langwej for oda langwej. Naijá for Inglish an wey Afrikan langwej.'],
27 | results_word_tokenize = ['Naijá', 'na', 'pijin,', 'a', 'langwej', 'for', 'oda', 'langwej.'],
28 | results_pos_tag = results_pos_tag,
29 | results_pos_tag_universal = results_pos_tag,
30 | results_lemmatize = ['Naijá', 'na', 'pijin,', 'a', 'langwej', 'for', 'oder', 'langwej.'],
31 | results_dependency_parse = [('Naijá', 'pijin,', 'nsubj', 2), ('na', 'pijin,', 'cop', 1), ('pijin,', 'pijin,', 'root', 0), ('a', 'langwej', 'det', 1), ('langwej', 'pijin,', 'obj', -2), ('for', 'oda', 'case', 1), ('oda', 'pijin,', 'obl:arg', -4), ('langwej.', 'pijin,', 'dep', -5)]
32 | )
33 |
34 | if __name__ == '__main__':
35 | test_stanza_pcm()
36 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_qpm.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Pomak
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_qpm():
22 | results_pos_tag = [('Kážyjte', 'VERB'), ('nǽko', 'DET'), (',', 'PUNCT'), ('de', 'PART'), ('!', 'PUNCT')]
23 |
24 | test_stanza.wl_test_stanza(
25 | lang = 'qpm',
26 | results_sentence_tokenize = ['Kážyjte nǽko, de! Še go preskókneme!'],
27 | results_word_tokenize = ['Kážyjte', 'nǽko', ',', 'de', '!'],
28 | results_pos_tag = results_pos_tag,
29 | results_pos_tag_universal = results_pos_tag,
30 | results_lemmatize = ['kážom', 'nǽko', ',', 'de', '!'],
31 | results_dependency_parse = [('Kážyjte', 'Kážyjte', 'root', 0), ('nǽko', 'Kážyjte', 'det', -1), (',', 'de', 'punct', 1), ('de', 'Kážyjte', 'vocative', -3), ('!', 'Kážyjte', 'punct', -4)]
32 | )
33 |
34 | if __name__ == '__main__':
35 | test_stanza_qpm()
36 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_san.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Sanskrit
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_san():
22 | results_pos_tag = [('संस्कृतम्', 'NOUN'), ('जगतः', 'PRON'), ('एकतमा', 'NOUN'), ('अतिप्राचीना', 'NOUN'), ('समृद्धा', 'NOUN'), ('शास्त्रीया', 'NOUN'), ('च', 'ADJ'), ('भाषासु', 'NOUN'), ('वर्तते', 'NOUN'), ('।', 'NOUN')]
23 |
24 | test_stanza.wl_test_stanza(
25 | lang = 'san',
26 | results_sentence_tokenize = ['संस्कृतम् जगतः एकतमा', 'अतिप्राचीना समृद्धा शास्त्रीया', 'च भाषासु वर्तते। संस्कृतम् भारतस्य जगत: वा भाषासु एकतमा\u200c प्राचीनतमा। भारती, सुरभारती, अमरभारती, अमरवाणी, सुरवाणी, गीर्वाणवाणी, गीर्वाणी, देववाणी, देवभाषा, संस्कृतावाक्, दैवीवाक्, इत्यादिभिः नामभिः एतद्भाषा प्रसिद्धा', '।'],
27 | results_word_tokenize = ['संस्कृतम्', 'जगतः', 'एकतमा', 'अतिप्राचीना', 'समृद्धा', 'शास्त्रीया', 'च', 'भाषासु', 'वर्तते', '।'],
28 | results_pos_tag = results_pos_tag,
29 | results_pos_tag_universal = results_pos_tag,
30 | results_lemmatize = ['संस्कृतम्', 'जगतः', 'एकतमा', 'अतिप्राचीना', 'समृद्धा', 'शास्त्रीया', 'च', 'भाषासु', 'वर्तते', '।'],
31 | results_dependency_parse = [('संस्कृतम्', 'संस्कृतम्', 'root', 0), ('जगतः', 'एकतमा', 'nmod', 1), ('एकतमा', 'संस्कृतम्', 'vocative', -2), ('अतिप्राचीना', 'शास्त्रीया', 'nsubj', 2), ('समृद्धा', 'शास्त्रीया', 'nsubj', 1), ('शास्त्रीया', 'शास्त्रीया', 'root', 0), ('च', 'च', 'root', 0), ('भाषासु', 'च', 'nsubj', -1), ('वर्तते', 'च', 'conj', -2), ('।', '।', 'root', 0)]
32 | )
33 |
34 | if __name__ == '__main__':
35 | test_stanza_san()
36 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_sme.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Sámi (Northern)
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_sme():
22 | test_stanza.wl_test_stanza(
23 | lang = 'sme',
24 | results_sentence_tokenize = ['Davvisámegiella gullá sámegielaid oarjesámegielaid davvejovkui ovttas julev- ja bihtánsámegielain.', 'Eará oarjesámegielat leat ubmisámegiella ja lullisámegiella.'],
25 | results_word_tokenize = ['Davvisámegiella', 'gullá', 'sámegielaid', 'oarjesámegielaid', 'davvejovkui', 'ovttas', 'julev-', 'ja', 'bihtánsámegielain', '.'],
26 | results_pos_tag = [('Davvisámegiella', 'N'), ('gullá', 'V'), ('sámegielaid', 'N'), ('oarjesámegielaid', 'N'), ('davvejovkui', 'N'), ('ovttas', 'Adv'), ('julev-', 'N'), ('ja', 'CC'), ('bihtánsámegielain', 'N'), ('.', 'CLB')],
27 | results_pos_tag_universal = [('Davvisámegiella', 'NOUN'), ('gullá', 'VERB'), ('sámegielaid', 'NOUN'), ('oarjesámegielaid', 'NOUN'), ('davvejovkui', 'NOUN'), ('ovttas', 'ADV'), ('julev-', 'NOUN'), ('ja', 'CCONJ'), ('bihtánsámegielain', 'NOUN'), ('.', 'PUNCT')],
28 | results_lemmatize = ['davvisámegiella', 'gullat', 'sámegiella', 'oarjesámegiella', 'davvejoavku', 'ovttas', 'julle', 'ja', 'bihtánsámegiella', '.'],
29 | results_dependency_parse = [('Davvisámegiella', 'gullá', 'nsubj', 1), ('gullá', 'gullá', 'root', 0), ('sámegielaid', 'gullá', 'obj', -1), ('oarjesámegielaid', 'davvejovkui', 'nmod:poss', 1), ('davvejovkui', 'gullá', 'obl', -3), ('ovttas', 'gullá', 'advmod', -4), ('julev-', 'gullá', 'obl', -5), ('ja', 'julev-', 'cc', -1), ('bihtánsámegielain', 'julev-', 'conj', -2), ('.', 'gullá', 'punct', -8)]
30 | )
31 |
32 | if __name__ == '__main__':
33 | test_stanza_sme()
34 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_tel.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Telugu
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_tel():
22 | results_pos_tag = [('తెలుగు', 'PROPN'), ('అనేది', 'PRON'), ('ద్రావిడ', 'PROPN'), ('భాషల', 'NOUN'), ('కుటుంబానికి', 'NOUN'), ('చెందిన', 'VERB'), ('భాష', 'NOUN'), ('.', 'PUNCT')]
23 |
24 | test_stanza.wl_test_stanza(
25 | lang = 'tel',
26 | results_sentence_tokenize = ['తెలుగు అనేది ద్రావిడ భాషల కుటుంబానికి చెందిన భాష.', 'దీనిని మాట్లాడే ప్రజలు ప్రధానంగా ఆంధ్ర, తెలంగాణాలో ఉన్నారు.', 'ఇది ఆ రాష్ట్రాలలో అధికార భాష.', 'భారతదేశంలో ఒకటి', 'కంటే ఎక్కువ రాష్ట్రాల్లో ప్రాథమిక అధికారిక భాషా హోదా కలిగిన కొద్ది భాషలలో హిందీ, బెంగాలీలతో పాటు ఇది కూడా ఉంది.', '[5][6] పుదుచ్చేరిలోని యానం జిల్లాలో తెలుగు అధికారిక భాష.', 'ఒడిశా, కర్ణాటక, తమిళనాడు, కేరళ, పంజాబ్, ఛత్తీస్\u200cగఢ్, మహారాష్ట్ర, అండమాన్ నికోబార్ దీవులలో గుర్తింపబడిన అల్పసంఖ్యాక భాష.', 'దేశ ప్రభుత్వం భారతదేశ ప్రాచీన భాషగా గుర్తించిన ఆరు భాషలలో ఇది ఒకటి.', '[7][8]'],
27 | results_word_tokenize = ['తెలుగు', 'అనేది', 'ద్రావిడ', 'భాషల', 'కుటుంబానికి', 'చెందిన', 'భాష', '.'],
28 | results_pos_tag = results_pos_tag,
29 | results_pos_tag_universal = results_pos_tag,
30 | results_dependency_parse = [('తెలుగు', 'అనేది', 'compound', 1), ('అనేది', 'చెందిన', 'nsubj', 4), ('ద్రావిడ', 'అనేది', 'nmod', -1), ('భాషల', 'చెందిన', 'obl', 2), ('కుటుంబానికి', 'చెందిన', 'obl', 1), ('చెందిన', 'భాష', 'acl', 1), ('భాష', 'భాష', 'root', 0), ('.', 'భాష', 'punct', -1)]
31 | )
32 |
33 | if __name__ == '__main__':
34 | test_stanza_tel()
35 |
--------------------------------------------------------------------------------
/tests/tests_nlp/tests_stanza/test_stanza_zho_cn.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: NLP - Stanza - Chinese (Simplified)
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests.tests_nlp.tests_stanza import test_stanza
20 |
21 | def test_stanza_zho_cn():
22 | results_word_tokenize = ['汉', '语', '又', '称', '华', '语', '[', '6][7', ']', ',', '是', '来', '自', '汉', '民族', '的', '语言', '[', '8][7]', '[9', ']', '。']
23 |
24 | test_stanza.wl_test_stanza(
25 | lang = 'zho_cn',
26 | results_sentence_tokenize = ['汉语又称华语[6][7],是来自汉民族的语言[8][7][9]。', '汉语是汉藏语系中最大的一支语族,若把整个汉语族视为单一语言,则汉语为世界上母语使用者人数最多的语言,目前全世界有五分之一人口将其作为母语或第二语言。'],
27 | results_word_tokenize = results_word_tokenize,
28 | results_pos_tag = [('汉', 'NNP'), ('语', 'SFN'), ('又', 'RB'), ('称', 'VV'), ('华', 'NNP'), ('语', 'SFN'), ('[', '('), ('6][7', 'CD'), (']', ')'), (',', ','), ('是', 'VC'), ('来', 'VV'), ('自', 'VV'), ('汉', 'NNP'), ('民族', 'NN'), ('的', 'DEC'), ('语言', 'NN'), ('[', '('), ('8][7]', 'CD'), ('[9', 'CD'), (']', ')'), ('。', '.')],
29 | results_pos_tag_universal = [('汉', 'PROPN'), ('语', 'PART'), ('又', 'SCONJ'), ('称', 'VERB'), ('华', 'PROPN'), ('语', 'PART'), ('[', 'PUNCT'), ('6][7', 'NUM'), (']', 'PUNCT'), (',', 'PUNCT'), ('是', 'AUX'), ('来', 'VERB'), ('自', 'VERB'), ('汉', 'PROPN'), ('民族', 'NOUN'), ('的', 'SCONJ'), ('语言', 'NOUN'), ('[', 'PUNCT'), ('8][7]', 'NUM'), ('[9', 'NUM'), (']', 'PUNCT'), ('。', 'PUNCT')],
30 | results_lemmatize = results_word_tokenize,
31 | results_dependency_parse = [('汉', '语', 'compound', 1), ('语', '语言', 'nsubj', 15), ('又', '称', 'mark', 1), ('称', '语言', 'acl', 13), ('华', '语', 'compound', 1), ('语', '称', 'obj', -2), ('[', '6][7', 'punct', 1), ('6][7', '语', 'appos', -2), (']', '6][7', 'punct', -1), (',', '称', 'punct', -6), ('是', '语言', 'cop', 6), ('来', '语言', 'acl:relcl', 5), ('自', '来', 'mark', -1), ('汉', '民族', 'nmod', 1), ('民族', '来', 'obj', -3), ('的', '来', 'mark:rel', -4), ('语言', '语言', 'root', 0), ('[', '[9', 'punct', 2), ('8][7]', '[9', 'nummod', 1), ('[9', '语言', 'appos', -3), (']', '[9', 'punct', -1), ('。', '语言', 'punct', -5)],
32 | results_sentiment_analayze = [0]
33 | )
34 |
35 | if __name__ == '__main__':
36 | test_stanza_zho_cn()
37 |
--------------------------------------------------------------------------------
/tests/tests_results/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_results/__init__.py
--------------------------------------------------------------------------------
/tests/tests_results/test_results_search.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Results - Search in results
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_results import wl_results_search
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_wl_dialog_results_search():
25 | table = wl_test_init.Wl_Test_Table(main, tab = 'dependency_parser')
26 | table.settings['file_area']['files_open'] = [{'selected': True, 'lang': 'test'}]
27 |
28 | dialog_results_search = wl_results_search.Wl_Dialog_Results_Search(
29 | main,
30 | table = table
31 | )
32 |
33 | dialog_results_search.load_settings(defaults = True)
34 | dialog_results_search.load_settings(defaults = False)
35 |
36 | dialog_results_search.line_edit_search_term.setText('')
37 | dialog_results_search.search_settings_changed()
38 | dialog_results_search.line_edit_search_term.setText('test')
39 | dialog_results_search.search_settings_changed()
40 |
41 | dialog_results_search.table_item_changed()
42 |
43 | dialog_results_search.find_next()
44 | dialog_results_search.find_prev()
45 | dialog_results_search.find_all()
46 | dialog_results_search.update_gui('')
47 | dialog_results_search.clr_highlights()
48 | dialog_results_search.clr_history()
49 |
50 | if __name__ == '__main__':
51 | test_wl_dialog_results_search()
52 |
--------------------------------------------------------------------------------
/tests/tests_settings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_settings/__init__.py
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Settings
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_wl_settings():
25 | settings = wl_settings.Wl_Settings(main)
26 | settings.open()
27 | settings.selection_changed(None, None)
28 | settings.load_settings()
29 | settings.load_settings(defaults = True)
30 | settings.validate_settings()
31 | settings.save_settings()
32 | settings.apply_settings()
33 |
34 | def test_wl_settings_node():
35 | settings_node = wl_settings.Wl_Settings_Node(main)
36 | settings_node.validate_settings()
37 | settings_node.apply_settings()
38 |
39 | if __name__ == '__main__':
40 | test_wl_settings()
41 | test_wl_settings_node()
42 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_default.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Default settings
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_default
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_settings_default():
25 | assert wl_settings_default.init_settings_default(main)
26 |
27 | # Check for invalid conversion of universal POS tags into content/function words
28 | for mappings in main.settings_default['pos_tagging']['tagsets']['mapping_settings'].values():
29 | for mapping in mappings.values():
30 | assert all(len(pos_mapping) == 5 for pos_mapping in mapping)
31 |
32 | if __name__ == '__main__':
33 | test_settings_default()
34 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_dependency_parsing.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Dependency Parsing
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_dependency_parsing
21 |
22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast')
23 |
24 | def test_wl_settings_dependency_parsing():
25 | settings_dependency_parsing = wl_settings_dependency_parsing.Wl_Settings_Dependency_Parsing(main)
26 | settings_dependency_parsing.load_settings()
27 | settings_dependency_parsing.load_settings(defaults = True)
28 | settings_dependency_parsing.apply_settings()
29 |
30 | settings_dependency_parsing.preview_changed()
31 | settings_dependency_parsing.update_gui('test')
32 | settings_dependency_parsing.update_gui_err()
33 |
34 | def test_wl_dialog_preview_settings():
35 | dialog_preview_settings = wl_settings_dependency_parsing.Wl_Dialog_Preview_Settings(main)
36 | dialog_preview_settings.open()
37 | dialog_preview_settings.load_settings()
38 | dialog_preview_settings.save_settings()
39 |
40 | if __name__ == '__main__':
41 | test_wl_settings_dependency_parsing()
42 | test_wl_dialog_preview_settings()
43 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_figs.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Figures
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_figs
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_wl_settings_figs_line_charts():
25 | settings_figs_line_charts = wl_settings_figs.Wl_Settings_Figs_Line_Charts(main)
26 | settings_figs_line_charts.change_fonts()
27 | settings_figs_line_charts.load_settings()
28 | settings_figs_line_charts.load_settings(defaults = True)
29 | settings_figs_line_charts.apply_settings()
30 |
31 | def test_wl_settings_figs_word_clouds():
32 | settings_figs_word_clouds = wl_settings_figs.Wl_Settings_Figs_Word_Clouds(main)
33 | settings_figs_word_clouds.font_settings_changed()
34 | settings_figs_word_clouds.load_settings()
35 | settings_figs_word_clouds.load_settings(defaults = True)
36 | settings_figs_word_clouds.validate_settings()
37 | settings_figs_word_clouds.apply_settings()
38 |
39 | def test_wl_settings_figs_network_graphs():
40 | settings_figs_network_graphs = wl_settings_figs.Wl_Settings_Figs_Network_Graphs(main)
41 | settings_figs_network_graphs.load_settings()
42 | settings_figs_network_graphs.load_settings(defaults = True)
43 | settings_figs_network_graphs.apply_settings()
44 |
45 | if __name__ == '__main__':
46 | test_wl_settings_figs_line_charts()
47 | test_wl_settings_figs_word_clouds()
48 | test_wl_settings_figs_network_graphs()
49 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_files.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Files
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_files
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_wl_settings_files():
25 | settings_files = wl_settings_files.Wl_Settings_Files(main)
26 | settings_files.load_settings()
27 | settings_files.load_settings(defaults = True)
28 | settings_files.apply_settings()
29 |
30 | def test_wl_settings_files_tags():
31 | settings_files_tags = wl_settings_files.Wl_Settings_Files_Tags(main)
32 | settings_files_tags.load_settings()
33 | settings_files_tags.load_settings(defaults = True)
34 | settings_files_tags.apply_settings()
35 |
36 | def test_wl_table_tags():
37 | table_tags = wl_settings_files.Wl_Table_Tags(
38 | main,
39 | settings_tags = 'header_tag_settings',
40 | defaults_row = ['Nonembedded', 'Header', '', '']
41 | )
42 |
43 | table_tags.item_changed()
44 | table_tags._add_row()
45 | table_tags.reset_table()
46 | table_tags.get_tags()
47 |
48 | def test_wl_table_tags_header():
49 | table_tags_header = wl_settings_files.Wl_Table_Tags_Header(main)
50 | table_tags_header.item_changed(table_tags_header.model().item(0, 0))
51 |
52 | def test_wl_table_tags_body():
53 | table_tags_body = wl_settings_files.Wl_Table_Tags_Body(main)
54 | table_tags_body.item_changed(table_tags_body.model().item(0, 0))
55 |
56 | def test_wl_table_tags_xml():
57 | table_tags_xml = wl_settings_files.Wl_Table_Tags_Xml(main)
58 | table_tags_xml.item_changed(table_tags_xml.model().item(0, 0))
59 |
60 | if __name__ == '__main__':
61 | test_wl_settings_files()
62 | test_wl_settings_files_tags()
63 |
64 | test_wl_table_tags()
65 | test_wl_table_tags_header()
66 | test_wl_table_tags_body()
67 | test_wl_table_tags_xml()
68 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_general.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - General
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_general
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_wl_settings_general():
25 | settings_general = wl_settings_general.Wl_Settings_General(main)
26 | settings_general.proxy_settings_changed()
27 | settings_general.load_settings()
28 | settings_general.load_settings(defaults = True)
29 | settings_general.apply_settings()
30 |
31 | def test_wl_settings_general_imp():
32 | settings_general_imp = wl_settings_general.Wl_Settings_General_Imp(main)
33 | settings_general_imp.detect_encodings_changed()
34 | settings_general_imp.check_path('files')
35 |
36 | settings_general_imp.load_settings()
37 | settings_general_imp.load_settings(defaults = True)
38 | settings_general_imp.validate_settings()
39 | settings_general_imp.apply_settings()
40 |
41 | def test_wl_settings_general_exp():
42 | settings_general_exp = wl_settings_general.Wl_Settings_General_Exp(main)
43 | settings_general_exp.tables_default_type_changed()
44 | settings_general_exp.check_path('tables')
45 |
46 | settings_general_exp.load_settings()
47 | settings_general_exp.load_settings(defaults = True)
48 | settings_general_exp.validate_settings()
49 | settings_general_exp.apply_settings()
50 |
51 | if __name__ == '__main__':
52 | test_wl_settings_general()
53 | test_wl_settings_general_imp()
54 | test_wl_settings_general_exp()
55 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_lemmatization.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Lemmatization
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_lemmatization
21 |
22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast')
23 |
24 | def test_wl_settings_lemmatization():
25 | settings_lemmatization = wl_settings_lemmatization.Wl_Settings_Lemmatization(main)
26 | settings_lemmatization.load_settings()
27 | settings_lemmatization.load_settings(defaults = True)
28 | settings_lemmatization.apply_settings()
29 |
30 | settings_lemmatization.preview_changed()
31 | settings_lemmatization.update_gui('test')
32 | settings_lemmatization.update_gui_err()
33 |
34 | if __name__ == '__main__':
35 | test_wl_settings_lemmatization()
36 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_sentence_tokenization.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Sentence Tokenization
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_sentence_tokenization
21 |
22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast')
23 |
24 | def test_wl_settings_sentence_tokenization():
25 | settings_sentence_tokenization = wl_settings_sentence_tokenization.Wl_Settings_Sentence_Tokenization(main)
26 | settings_sentence_tokenization.load_settings()
27 | settings_sentence_tokenization.load_settings(defaults = True)
28 | settings_sentence_tokenization.apply_settings()
29 |
30 | settings_sentence_tokenization.preview_changed()
31 | settings_sentence_tokenization.update_gui('test')
32 | settings_sentence_tokenization.update_gui_err()
33 |
34 | if __name__ == '__main__':
35 | test_wl_settings_sentence_tokenization()
36 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_sentiment_analysis.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Sentiment Analysis
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_sentiment_analysis
21 |
22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast')
23 |
24 | def test_wl_settings_sentiment_analysis():
25 | settings_sentiment_analysis = wl_settings_sentiment_analysis.Wl_Settings_Sentiment_Analysis(main)
26 | settings_sentiment_analysis.load_settings()
27 | settings_sentiment_analysis.load_settings(defaults = True)
28 | settings_sentiment_analysis.apply_settings()
29 |
30 | settings_sentiment_analysis.preview_changed()
31 | settings_sentiment_analysis.update_gui(0.123456789)
32 | settings_sentiment_analysis.update_gui_err()
33 |
34 | if __name__ == '__main__':
35 | test_wl_settings_sentiment_analysis()
36 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_stop_word_lists.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Stop Word Lists
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_stop_word_lists
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_wl_settings_stop_word_lists():
25 | settings_stop_word_lists = wl_settings_stop_word_lists.Wl_Settings_Stop_Word_Lists(main)
26 | settings_stop_word_lists.load_settings()
27 | settings_stop_word_lists.load_settings(defaults = True)
28 | settings_stop_word_lists.apply_settings()
29 |
30 | settings_stop_word_lists.stop_word_list_changed(settings_stop_word_lists.table_stop_word_lists.model().item(0, 0))
31 | settings_stop_word_lists.preview_settings_changed()
32 | settings_stop_word_lists.preview_results_changed()
33 |
34 | if __name__ == '__main__':
35 | test_wl_settings_stop_word_lists()
36 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_syl_tokenization.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Syllable Tokenization
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_syl_tokenization
21 |
22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast')
23 |
24 | def test_wl_settings_syl_tokenization():
25 | settings_syl_tokenization = wl_settings_syl_tokenization.Wl_Settings_Syl_Tokenization(main)
26 | settings_syl_tokenization.load_settings()
27 | settings_syl_tokenization.load_settings(defaults = True)
28 | settings_syl_tokenization.apply_settings()
29 |
30 | settings_syl_tokenization.preview_changed()
31 | settings_syl_tokenization.update_gui('test')
32 | settings_syl_tokenization.update_gui_err()
33 |
34 | if __name__ == '__main__':
35 | test_wl_settings_syl_tokenization()
36 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_tables.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Tables
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_tables
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_wl_settings_tables():
25 | settings_tables = wl_settings_tables.Wl_Settings_Tables(main)
26 | settings_tables.load_settings()
27 | settings_tables.load_settings(defaults = True)
28 | settings_tables.apply_settings()
29 |
30 | def test_wl_settings_tables_concordancer():
31 | settings_tables_concordancer = wl_settings_tables.Wl_Settings_Tables_Concordancer(main)
32 | settings_tables_concordancer.load_settings()
33 | settings_tables_concordancer.load_settings(defaults = True)
34 | settings_tables_concordancer.apply_settings()
35 |
36 | def test_wl_settings_tables_parallel_concordancer():
37 | settings_tables_parallel_concordancer = wl_settings_tables.Wl_Settings_Tables_Parallel_Concordancer(main)
38 | settings_tables_parallel_concordancer.load_settings()
39 | settings_tables_parallel_concordancer.load_settings(defaults = True)
40 | settings_tables_parallel_concordancer.apply_settings()
41 |
42 | def test_wl_settings_tables_dependency_parser():
43 | settings_tables_dependency_parser = wl_settings_tables.Wl_Settings_Tables_Dependency_Parser(main)
44 | settings_tables_dependency_parser.load_settings()
45 | settings_tables_dependency_parser.load_settings(defaults = True)
46 | settings_tables_dependency_parser.apply_settings()
47 |
48 | if __name__ == '__main__':
49 | test_wl_settings_tables()
50 | test_wl_settings_tables_concordancer()
51 | test_wl_settings_tables_parallel_concordancer()
52 | test_wl_settings_tables_dependency_parser()
53 |
--------------------------------------------------------------------------------
/tests/tests_settings/test_settings_word_tokenization.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Settings - Word Tokenization
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_settings import wl_settings_word_tokenization
21 |
22 | main = wl_test_init.Wl_Test_Main(switch_lang_utils = 'fast')
23 |
24 | def test_wl_settings_word_tokenization():
25 | settings_word_tokenization = wl_settings_word_tokenization.Wl_Settings_Word_Tokenization(main)
26 | settings_word_tokenization.load_settings()
27 | settings_word_tokenization.load_settings(defaults = True)
28 | settings_word_tokenization.apply_settings()
29 |
30 | settings_word_tokenization.preview_changed()
31 | settings_word_tokenization.update_gui('test')
32 | settings_word_tokenization.update_gui_err()
33 |
34 | if __name__ == '__main__':
35 | test_wl_settings_word_tokenization()
36 |
--------------------------------------------------------------------------------
/tests/tests_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_utils/__init__.py
--------------------------------------------------------------------------------
/tests/tests_utils/test_paths.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Utilities - Paths
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import os
20 | import sys
21 |
22 | from wordless.wl_utils import wl_paths, wl_misc
23 |
24 | def test_get_normalized_path():
25 | assert wl_paths.get_normalized_path('.') != '.'
26 | assert wl_paths.get_normalized_path('/')
27 | assert wl_paths.get_normalized_path('a') != 'a'
28 | assert wl_paths.get_normalized_path('a/b/c') != 'a/b/c'
29 |
30 | def test_get_normalized_dir():
31 | assert wl_paths.get_normalized_dir('.') != '.'
32 | assert wl_paths.get_normalized_dir('/')
33 | assert wl_paths.get_normalized_dir('a') != 'a'
34 | assert wl_paths.get_normalized_dir('a/b/c') != 'a/b/c'
35 |
36 | def test_get_path_file():
37 | assert wl_paths.get_path_file('')
38 | assert wl_paths.get_path_file('a', 'b', 'c').endswith(os.path.sep.join(['a', 'b', 'c']))
39 | assert wl_paths.get_path_file('a', '..', 'b').endswith('b')
40 |
41 | sys._MEIPASS = 'test'
42 |
43 | assert wl_paths.get_path_file('a', internal = True).endswith(os.path.sep.join(['test', 'a']))
44 | assert wl_paths.get_path_file('a', internal = False).endswith('a')
45 |
46 | check_os_orig = wl_misc.check_os
47 | wl_misc.check_os = lambda: (False, True, False)
48 |
49 | assert wl_paths.get_path_file('a', internal = False).endswith(os.path.sep.join(['MacOS', 'a']))
50 |
51 | wl_misc.check_os = check_os_orig
52 | del sys._MEIPASS
53 |
54 | def test_get_path_data():
55 | assert wl_paths.get_path_data('a').endswith(os.path.sep.join(['data', 'a']))
56 |
57 | def test_get_path_img():
58 | assert wl_paths.get_path_img('a').endswith(os.path.sep.join(['imgs', 'a']))
59 |
60 | if __name__ == '__main__':
61 | test_get_normalized_path()
62 | test_get_normalized_dir()
63 |
64 | test_get_path_file()
65 | test_get_path_data()
66 | test_get_path_img()
67 |
--------------------------------------------------------------------------------
/tests/tests_utils/test_threading.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Utilities - Threading
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_dialogs import wl_dialogs_misc
21 | from wordless.wl_utils import wl_threading
22 |
23 | main = wl_test_init.Wl_Test_Main()
24 |
25 | def test_wl_worker():
26 | dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress(main, 'test')
27 | wl_threading.Wl_Worker(main, dialog_progress, lambda: None)
28 |
29 | def test_wl_worker_no_progress():
30 | wl_threading.Wl_Worker_No_Progress(main, lambda: None)
31 |
32 | def test_wl_worker_no_callback():
33 | dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress(main, 'test')
34 | wl_threading.Wl_Worker_No_Callback(main, dialog_progress)
35 |
36 | def test_wl_thread():
37 | dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress(main, 'test')
38 | worker = wl_threading.Wl_Worker(main, dialog_progress, lambda: None)
39 | worker.run = lambda: None
40 |
41 | wl_threading.Wl_Thread(worker)
42 |
43 | def test_wl_thread_no_progress():
44 | worker = wl_threading.Wl_Worker_No_Progress(main, lambda: None)
45 | worker.run = lambda: None
46 |
47 | wl_threading.Wl_Thread_No_Progress(worker)
48 |
49 | if __name__ == '__main__':
50 | test_wl_worker()
51 | test_wl_worker_no_progress()
52 | test_wl_worker_no_callback()
53 |
54 | test_wl_thread()
55 | test_wl_thread_no_progress()
56 |
--------------------------------------------------------------------------------
/tests/tests_widgets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/tests/tests_widgets/__init__.py
--------------------------------------------------------------------------------
/tests/tests_widgets/test_buttons.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Widgets - Buttons
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from PyQt5 import QtWidgets
20 |
21 | from tests import wl_test_init
22 | from wordless.wl_widgets import wl_buttons
23 |
24 | main = wl_test_init.Wl_Test_Main()
25 |
26 | def test_wl_button():
27 | wl_buttons.Wl_Button('test', main)
28 |
29 | def test_wl_button_browse():
30 | wl_buttons.Wl_Button_Browse(main, 'test', QtWidgets.QLineEdit(), 'test', ['test'])
31 |
32 | def test_wl_button_color():
33 | button = wl_buttons.Wl_Button_Color(main)
34 | button.get_color()
35 | button.set_color('test')
36 |
37 | _, checkbox_transparent = wl_buttons.wl_button_color(main, allow_transparent = True)
38 | checkbox_transparent.setChecked(True)
39 | checkbox_transparent.setChecked(False)
40 |
41 | wl_buttons.wl_button_color(main, allow_transparent = False)
42 |
43 | def test_wl_button_restore_default_vals():
44 | wl_buttons.Wl_Button_Restore_Default_Vals(main, 'test')
45 |
46 | if __name__ == '__main__':
47 | test_wl_button()
48 | test_wl_button_browse()
49 | test_wl_button_color()
50 | test_wl_button_restore_default_vals()
51 |
--------------------------------------------------------------------------------
/tests/tests_widgets/test_item_delegates.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Widgets - Item delegates
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from PyQt5 import QtWidgets
20 |
21 | from tests import wl_test_init
22 | from wordless.wl_widgets import wl_item_delegates
23 |
24 | main = wl_test_init.Wl_Test_Main()
25 |
26 | def test_wl_item_delegate_uneditable():
27 | item_delegates = wl_item_delegates.Wl_Item_Delegate_Uneditable()
28 | item_delegates.createEditor(main, '', '')
29 |
30 | def test_wl_item_delegate():
31 | item_delegate = wl_item_delegates.Wl_Item_Delegate(main, QtWidgets.QComboBox)
32 | item_delegate.createEditor(main, 'test', 'test')
33 | item_delegate.set_enabled(True)
34 |
35 | item_delegate = wl_item_delegates.Wl_Item_Delegate(main)
36 | item_delegate.createEditor(main, 'test', 'test')
37 |
38 | def test_wl_item_delegate_combo_box():
39 | index_editable = wl_test_init.wl_test_index(0, 0)
40 | index_uneditable = wl_test_init.wl_test_index(0, 1)
41 |
42 | item_delegate_combo_box = wl_item_delegates.Wl_Item_Delegate_Combo_Box(main, row = 0, col = 0)
43 | item_delegate_combo_box.createEditor(main, 'test', index_editable)
44 | assert item_delegate_combo_box.createEditor(main, 'test', index_uneditable) is None
45 | assert item_delegate_combo_box.is_editable(index_editable)
46 | assert not item_delegate_combo_box.is_editable(index_uneditable)
47 |
48 | def test_wl_item_delegate_combo_box_custom():
49 | item_delegate_combo_box_custom = wl_item_delegates.Wl_Item_Delegate_Combo_Box_Custom(main, QtWidgets.QComboBox, row = 0, col = 0)
50 | item_delegate_combo_box_custom.createEditor(main, 'test', wl_test_init.wl_test_index(0, 0))
51 | item_delegate_combo_box_custom.createEditor(main, 'test', wl_test_init.wl_test_index(0, 1))
52 |
53 | if __name__ == '__main__':
54 | test_wl_item_delegate_uneditable()
55 | test_wl_item_delegate()
56 | test_wl_item_delegate_combo_box()
57 | test_wl_item_delegate_combo_box_custom()
58 |
--------------------------------------------------------------------------------
/tests/tests_widgets/test_labels.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Widgets - Labels
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from tests import wl_test_init
20 | from wordless.wl_widgets import wl_labels
21 |
22 | main = wl_test_init.Wl_Test_Main()
23 |
24 | def test_wl_label():
25 | wl_labels.Wl_Label('test', main)
26 |
27 | def test_wl_label_hint():
28 | wl_labels.Wl_Label_Hint('test', main)
29 |
30 | def test_wl_label_html():
31 | wl_labels.Wl_Label_Html('test', main)
32 |
33 | def test_wl_label_html_centered():
34 | wl_labels.Wl_Label_Html_Centered('test', main)
35 |
36 | def test_wl_label_dialog():
37 | label = wl_labels.Wl_Label_Dialog('test', main)
38 | label.set_text('test')
39 |
40 | def test_wl_label_dialog_no_wrap():
41 | wl_labels.Wl_Label_Dialog_No_Wrap('test', main)
42 |
43 | if __name__ == '__main__':
44 | test_wl_label()
45 | test_wl_label_hint()
46 | test_wl_label_html()
47 | test_wl_label_html_centered()
48 | test_wl_label_dialog()
49 | test_wl_label_dialog_no_wrap()
50 |
--------------------------------------------------------------------------------
/tests/tests_widgets/test_layouts.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Tests: Widgets - Layouts
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from PyQt5 import QtCore
20 | from PyQt5 import QtWidgets
21 |
22 | from tests import wl_test_init
23 | from wordless.wl_widgets import wl_layouts
24 |
25 | main = wl_test_init.Wl_Test_Main()
26 |
27 | def test_wl_layout():
28 | wl_layouts.Wl_Layout()
29 |
30 | def test_wl_wrapper():
31 | wrapper = wl_layouts.Wl_Wrapper(main)
32 | wrapper.load_settings()
33 |
34 | def test_wl_splitter():
35 | wl_layouts.Wl_Splitter(QtCore.Qt.Vertical, main)
36 |
37 | def test_wl_scroll_area():
38 | wl_layouts.Wl_Scroll_Area(main)
39 |
40 | def test_wl_stacked_widget_resizable():
41 | stacked_widget = wl_layouts.Wl_Stacked_Widget_Resizable(main)
42 | stacked_widget.addWidget(QtWidgets.QLabel())
43 | stacked_widget.current_changed(0)
44 |
45 | def test_wl_separator():
46 | wl_layouts.Wl_Separator(main, orientation = 'hor')
47 | wl_layouts.Wl_Separator(main, orientation = 'vert')
48 |
49 | if __name__ == '__main__':
50 | test_wl_layout()
51 | test_wl_wrapper()
52 | test_wl_splitter()
53 | test_wl_scroll_area()
54 | test_wl_stacked_widget_resizable()
55 | test_wl_separator()
56 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/utils/__init__.py
--------------------------------------------------------------------------------
/utils/data_luong_nguyen_dinh_freq_syls_easy_1000.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Utilities: Data - Extract the 1000 most common syllables from all easy documents of the corpus of Vietnamese text readability dataset on literature domain
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import collections
20 | import glob
21 |
22 | syls = []
23 | freq_syls = []
24 |
25 | # The corpus of Vietnamese text readability dataset on literature domain: https://github.com/anvinhluong/Vietnamese-text-readability/blob/master/Vietnamese%20Text%20Readability%20Corpus.zip
26 | for file in glob.glob('Vietnamese Text Readability Corpus/easy_*.txt'):
27 | print(f'Processing file {file}...')
28 |
29 | with open(file, 'r', encoding = 'utf_8') as f:
30 | syls.extend(f.read().split())
31 |
32 | # Get the 1000 most frequent syllables (excluding punctuation marks)
33 | for syl, freq in sorted(collections.Counter(syls).items(), key = lambda item: item[1], reverse = True):
34 | if any((char for char in syl if char.isalnum())):
35 | freq_syls.append((syl, freq))
36 |
37 | if len(freq_syls) >= 1000:
38 | break
39 |
40 | with open('data/luong_nguyen_dinh_freq_syls_easy_1000.txt', 'w', encoding = 'utf_8') as f:
41 | for syl, _ in freq_syls:
42 | f.write(syl + '\n')
43 |
--------------------------------------------------------------------------------
/utils/linux_compile_py_from_src.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # ----------------------------------------------------------------------
3 | # Utilities: Linux - Compile Python from source
4 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
5 | #
6 | # This program is free software: you can redistribute it and/or modify
7 | # it under the terms of the GNU General Public License as published by
8 | # the Free Software Foundation, either version 3 of the License, or
9 | # (at your option) any later version.
10 | #
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with this program. If not, see .
18 | # ----------------------------------------------------------------------
19 |
20 | # Install build dependencies for Python
21 | # Reference: https://devguide.python.org/getting-started/setup-building/#linux
22 | if ! grep -Fxq "deb-src http://cn.archive.ubuntu.com/ubuntu/ bionic main" "/etc/apt/sources.list"; then
23 | sudo sh -c "echo 'deb-src http://cn.archive.ubuntu.com/ubuntu/ bionic main' >> /etc/apt/sources.list"
24 | fi
25 |
26 | sudo apt-get update
27 | sudo apt-get -y build-dep python3
28 | sudo apt-get -y install build-essential gdb lcov pkg-config libbz2-dev libffi-dev libgdbm-dev libgdbm-compat-dev liblzma-dev libncurses5-dev libreadline6-dev libsqlite3-dev libssl-dev lzma lzma-dev tk-dev uuid-dev zlib1g-dev
29 |
30 | # Compile Python from source
31 | PY_VER=3.11.9
32 | PY_PACKAGE="Python-$PY_VER.tgz"
33 | PY_FOLDER="Python-$PY_VER"
34 |
35 | if [ ! -d $PY_FOLDER ]; then
36 | if [ ! -f $PY_PACKAGE ]; then
37 | wget "https://www.python.org/ftp/python/$PY_VER/$PY_PACKAGE"
38 | fi
39 |
40 | tar -xf $PY_PACKAGE
41 | fi
42 |
43 | cd $PY_FOLDER
44 | # PyInstaller requires "--enable-shared"
45 | ./configure --enable-optimizations --with-lto --enable-shared
46 | make -s -j
47 | sudo make altinstall
48 | cd ..
49 |
50 | # Fix error while loading shared libraries
51 | sudo ldconfig
52 |
53 | # Install 3rd-party libraries
54 | python3.11 -m pip install --upgrade pip setuptools
55 | pip3.11 install -r requirements_dev.txt
56 | pip3.11 cache purge
57 |
58 | # Fix libxcb-xinerama.so
59 | sudo apt-get install libxcb-xinerama0
60 |
61 | # Clean files
62 | sudo rm $PY_PACKAGE
63 | sudo rm -r $PY_FOLDER
64 |
65 | sudo apt-get -y autoremove
66 | sudo apt-get clean
67 |
--------------------------------------------------------------------------------
/utils/linux_create_shortcut.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Utilities: Linux - Create shortcut
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import os
20 | import subprocess
21 |
22 | from wordless.wl_utils import wl_misc
23 |
24 | wl_ver = wl_misc.get_wl_ver()
25 |
26 | path_wl = os.path.split(globals()['__file__'])[0]
27 | path_exec = os.path.join(os.path.split(path_wl)[0], 'Wordless')
28 | path_icon = os.path.join(path_wl, 'imgs', 'wl_icon.ico')
29 | path_desktop = os.path.expanduser('~/.local/share/applications/Wordless.desktop')
30 |
31 | os.makedirs(os.path.expanduser('~/.local/share/applications'), exist_ok = True)
32 |
33 | with open(path_desktop, 'w', encoding = 'utf_8') as f:
34 | f.write(f'''
35 | [Desktop Entry]
36 | Type=Application
37 | Name=Wordless
38 | Version={wl_ver}
39 | Encoding=UTF-8
40 | Path={path_wl}
41 | Exec={path_exec}
42 | Icon={path_icon}
43 | Terminal=false
44 | ''')
45 |
46 | # Allow excuting file as program
47 | subprocess.run(('chmod', '+x', path_desktop), check = True)
48 |
--------------------------------------------------------------------------------
/utils/wl_download_ci.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Utilities: Download - CI
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import nltk
20 | import spacy
21 | import stanza
22 |
23 | # Download NLTK data
24 | nltk.download('averaged_perceptron_tagger_eng')
25 | nltk.download('averaged_perceptron_tagger_rus')
26 | nltk.download('perluniprops')
27 | nltk.download('punkt_tab')
28 | nltk.download('stopwords')
29 | nltk.download('wordnet')
30 | nltk.download('words')
31 |
32 | # Download spaCy's and Stanza's models
33 | spacy.cli.download('en_core_web_trf')
34 | stanza.download('en', processors = ['tokenize', 'pos', 'lemma', 'depparse', 'sentiment'])
35 |
--------------------------------------------------------------------------------
/utils/wl_trs_generate_ts_files.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Utilities: Translations - Generate TS files
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import pathlib
20 | import re
21 | import subprocess
22 |
23 | files = []
24 |
25 | for file in pathlib.Path('wordless').rglob('*.py'):
26 | files.append(str(file))
27 |
28 | # Use "_tr" as a shortcut of QCoreApplication.translate
29 | subprocess.run(('pylupdate5' ,'-verbose' ,'-translate-function', '_tr', *files, '-ts', 'trs/zho_cn.ts'), check = True)
30 |
31 | # Fix HTML entities
32 | with open(r'trs/zho_cn.ts', 'r', encoding = 'utf_8') as f:
33 | contents = f.read()
34 |
35 | # Replace "&xxxx;" with "&xxxx;"
36 | contents = re.sub(r'&([a-z]{2,5});', r'&\1;', contents)
37 | # Escape non-breaking spaces
38 | contents = contents.replace(r' ', r' ')
39 |
40 | with open(r'trs/zho_cn.ts', 'w', encoding = 'utf_8') as f:
41 | f.write(contents)
42 |
--------------------------------------------------------------------------------
/utils/wl_trs_utils.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Utilities: Translations - Utilities
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import glob
20 | import subprocess
21 |
22 | import bs4
23 |
24 | # Fix format of ts files
25 | def fix_ts_format(ts_file):
26 | with open(ts_file, 'r', encoding = 'utf_8') as f:
27 | contents = f.read()
28 |
29 | with open(ts_file, 'w', encoding = 'utf_8') as f:
30 | contents = contents.replace('\n', '\n')
32 |
33 | f.write(contents)
34 |
35 | def del_obsolete_trans(ts_file):
36 | with open(ts_file, 'r', encoding = 'utf_8') as f:
37 | soup = bs4.BeautifulSoup(f.read(), features = 'lxml')
38 |
39 | for element_context in soup.select('context'):
40 | for element_message in element_context.select('message'):
41 | element_tr = element_message.select_one('translation')
42 |
43 | # Remove obsolete translations
44 | if 'type' in element_tr.attrs and element_tr['type'] == 'obsolete':
45 | element_message.decompose()
46 |
47 | # Remove empty contexts
48 | for element_context in soup.select('context'):
49 | if not element_context.select('message'):
50 | element_context.decompose()
51 |
52 | with open(ts_file, 'w', encoding = 'utf_8') as f:
53 | f.write(str(soup))
54 |
55 | fix_ts_format(ts_file)
56 |
57 | def release_trs():
58 | for ts_file in glob.glob('trs/*.ts'):
59 | subprocess.run(('lrelease', ts_file), check = True)
60 |
61 | if __name__ == '__main__':
62 | release_trs()
63 |
--------------------------------------------------------------------------------
/utils/wl_trs_zho_tw.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Utilities: Translations - Chinese (Traditional)
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import bs4
20 | import opencc
21 |
22 | from utils import wl_trs_utils
23 |
24 | with open('trs/zho_cn.ts', 'r', encoding = 'utf_8') as f:
25 | trs_zho_cn = f.read()
26 | soup = bs4.BeautifulSoup(trs_zho_cn, features = 'lxml')
27 |
28 | # Convert Unix line endings to Windows ones
29 | with open('trs/zho_cn.ts', 'w', encoding = 'utf_8') as f:
30 | f.write(trs_zho_cn)
31 |
32 | cc = opencc.OpenCC('s2twp')
33 |
34 | # Change language
35 | soup.ts['language'] = 'zh_TW'
36 | # Translate Simplified Chinese into Traditional Chinese
37 | for element_context in soup.select('context'):
38 | for element_message in element_context.select('message'):
39 | element_src = element_message.select_one('source')
40 | element_trans = element_message.select_one('translation')
41 |
42 | # Language-specific files
43 | if element_src.text == 'doc/trs/zho_cn/ACKS.md':
44 | element_trans.string = 'doc/trs/zho_tw/ACKS.md'
45 | else:
46 | element_trans.string = cc.convert(element_trans.text)
47 |
48 | with open('trs/zho_tw.ts', 'w', encoding = 'utf_8') as f:
49 | f.write(str(soup))
50 |
51 | # Release
52 | wl_trs_utils.fix_ts_format('trs/zho_tw.ts')
53 | wl_trs_utils.release_trs()
54 |
--------------------------------------------------------------------------------
/wordless/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_checks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_checks/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_checks/wl_checks_misc.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Checks - Miscellaneous
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import os
20 | import pathlib
21 |
22 | def check_custom_settings(settings_custom, settings_default):
23 | def get_keys(settings, keys):
24 | for key, value in settings.items():
25 | keys.append(key)
26 |
27 | if isinstance(value, dict):
28 | get_keys(value, keys)
29 |
30 | return keys
31 |
32 | keys_custom = []
33 | keys_default = []
34 |
35 | keys_custom = get_keys(settings_custom, keys_custom)
36 | keys_default = get_keys(settings_default, keys_default)
37 |
38 | return bool(keys_custom == keys_default)
39 |
40 | def check_dir(dir_name):
41 | if not os.path.exists(dir_name):
42 | pathlib.Path(dir_name).mkdir(parents = True, exist_ok = True)
43 |
44 | return dir_name
45 |
46 | def check_new_name(new_name, names, separator = None):
47 | i = 2
48 | names = set(names)
49 |
50 | if new_name in names:
51 | while True:
52 | if separator is None:
53 | new_name_valid = f'{new_name} ({i})'
54 | else:
55 | new_name_valid = f'{new_name}{separator}{i}'
56 |
57 | if new_name_valid in names:
58 | i += 1
59 | else:
60 | break
61 | else:
62 | new_name_valid = new_name
63 |
64 | return new_name_valid
65 |
66 | def check_new_path(new_path):
67 | i = 2
68 |
69 | if os.path.exists(new_path) and os.path.isfile(new_path):
70 | while True:
71 | path_head, ext = os.path.splitext(new_path)
72 | new_path_valid = f'{path_head} ({i}){ext}'
73 |
74 | if os.path.exists(new_path_valid) and os.path.isfile(new_path_valid):
75 | i += 1
76 | else:
77 | break
78 | else:
79 | new_path_valid = new_path
80 |
81 | # Placeholder for the new path
82 | with open(new_path_valid, 'wb') as _:
83 | pass
84 |
85 | return new_path_valid
86 |
--------------------------------------------------------------------------------
/wordless/wl_dialogs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_dialogs/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_figs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_figs/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_measures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_measures/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_measures/wl_measures_misc.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Measures - Miscellaneous
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import numpy
20 |
21 | def modes(inputs):
22 | inputs_modes = []
23 |
24 | inputs = numpy.array(inputs)
25 |
26 | if inputs.size > 0:
27 | unique, unique_counts = numpy.unique(inputs, return_counts = True)
28 | unique_counts_max = numpy.max(unique_counts)
29 |
30 | for val, freq in zip(unique, unique_counts):
31 | if freq == unique_counts_max:
32 | inputs_modes.append(val)
33 |
34 | return inputs_modes
35 |
--------------------------------------------------------------------------------
/wordless/wl_nlp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_nlp/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_results/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_results/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_settings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_settings/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_tagsets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_tagsets/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_tagsets/wl_tagset_fra_universal.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Tagsets - Universal POS tags - French
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | # Universal POS Tags: https://universaldependencies.org/fr/pos/
20 | tagset_mapping = [
21 | ['ADJ', 'ADJ', 'Adjective', 'grand/grande/grands/grandes, vieux/vieille/vieilles'],
22 | ['ADP', 'ADP', 'Adposition', 'pour, de, à, dans'],
23 | ['ADV', 'ADV', 'Adverb', 'très (joli), (fondues) ensemble'],
24 | ['AUX', 'AUX', 'Auxiliary', 'être, avoir, faire'],
25 | ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'],
26 | ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'mais, ou, et, or, ni, car'],
27 | ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'quand\nMultiword subordinating conjunction: (parce) que, (afin) que, (avant) que)'],
28 | ['DET', 'DET', 'Determiner', 'Articles (a closed class indicating definiteness, specificity or givenness): le, la, les\nPossessive determiners: mon, ton, son, ma, ta, sa, mes, tes, ses, notre, votre, leur, nos, vos, leurs\nDemonstrative determiners: (J’ai vu) ce (vélo hier.), cet, cette\nInterrogative determiners: quel, Quelle (couleur aimez-vous?)\nRelative determiners: quel, (Je me demande) quelle (couleur vous aimez.)\nQuantity/quantifier determiners: aucun'],
29 | ['INTJ', 'INTJ', 'Interjection', 'bref, bon, enfin'],
30 | ['NOUN', 'NOUN', 'Noun', 'fille, chat, arbre, air, beauté'],
31 | ['PROPN', 'PROPN', 'Proper noun', 'Pierre, ONU, Mexique'],
32 | ['NUM', 'NUM', 'Numeral', 'quatre, 4, IV'],
33 | ['PART', 'PART', 'Particle', 'Negation particle: ne'],
34 | ['PRON', 'PRON', 'Pronoun', 'Personal pronouns: je, tu, il\nDemonstrative pronouns: ceux\nReflexive pronouns: me, se\nInterrogative/relative pronouns: qui, que'],
35 | ['VERB', 'VERB', 'Verb', '(je) vois, (à) lire, (en) marchant'],
36 |
37 | ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'],
38 | ['SYM', 'SYM', 'Symbol', '$, %, §, ©\n+, −, ×, ÷, =, <, >\n:), ♥‿♥, 😝\njohn.doe@universal.org, http://universaldependencies.org/, 1-800-COMPANY'],
39 | ['X', 'X', 'Other', 'etc']
40 | ]
41 |
--------------------------------------------------------------------------------
/wordless/wl_tagsets/wl_tagset_khm_alt.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Tagsets - Asian Language Treebank
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | # Reference: https://www2.nict.go.jp/astrec-att/member/mutiyama/ALT/Khmer-annotation-guideline.pdf
20 | tagset_mapping = [
21 | ['n', 'NOUN', 'General nouns, can be subjects or objects of tokens tagged by v', ''],
22 | ['v', 'VERB', 'General verbs, can take tokens tagged by n as arguments', ''],
23 | ['a', 'ADJ', 'General adjectives, can directly describe or modify tokens tagged by n', ''],
24 | ['o', 'PART', 'Other modifications or complements for tokens or larger syntactic parts', ''],
25 |
26 | ['1', 'NUM', 'General numbers', ''],
27 | ['.', 'PUNCT', 'General punctuation marks', ''],
28 | ['+', 'X', 'A catch-all category, for tokens with weak syntactic roles', '']
29 | ]
30 |
--------------------------------------------------------------------------------
/wordless/wl_tagsets/wl_tagset_kor_mecab.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Tagsets - Mecab
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | # References:
20 | # MeCab: https://docs.google.com/spreadsheets/u/0/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY/edit?usp=sharing
21 | # spaCy: https://github.com/explosion/spaCy/blob/2ce9a220dbd30d3a79c2a232230204a102fb3f1d/spacy/lang/ko/tag_map.py
22 | tagset_mapping = [
23 | ['NNG', 'NOUN', '일반 명사', ''],
24 | ['NNP', 'PROPN', '고유 명사', ''],
25 | ['NNB', 'NOUN', '의존 명사', ''],
26 | ['NNBC', 'NOUN', '단위를 나타내는 명사', ''],
27 | ['NR', 'NUM', '수사', ''],
28 | ['NP', 'PRON', '대명사', ''],
29 |
30 | ['VV', 'VERB', '동사', ''],
31 | ['VA', 'ADJ', '형용사', ''],
32 | ['VX', 'AUX', '보조 용언', ''],
33 | ['VCP', 'ADP', '긍정 지정사', ''],
34 | ['VCN', 'ADJ', '부정 지정사', ''],
35 |
36 | ['MM', 'DET', '관형사', ''],
37 | ['MAG', 'ADV', '일반 부사', ''],
38 | ['MAJ', 'CONJ', '접속 부사', ''],
39 |
40 | ['IC', 'INTJ', '감탄사', ''],
41 |
42 | ['JKS', 'ADP', '주격 조사', ''],
43 | ['JKC', 'ADP', '보격 조사', ''],
44 | ['JKG', 'ADP', '관형격 조사', ''],
45 | ['JKO', 'ADP', '목적격 조사', ''],
46 | ['JKB', 'ADP', '부사격 조사', ''],
47 | ['JKV', 'ADP', '호격 조사', ''],
48 | ['JKQ', 'ADP', '인용격 조사', ''],
49 | ['JX', 'ADP', '보조사', ''],
50 | ['JC', 'CONJ', '접속 조사', ''],
51 |
52 | ['EP', 'X', '선어말 어미', ''],
53 | ['EF', 'X', '종결 어미', ''],
54 | ['EC', 'X', '연결 어미', ''],
55 | ['ETN', 'X', '명사형 전성 어미', ''],
56 | ['ETM', 'X', '관형형 전성 어미', ''],
57 |
58 | ['XPN', 'PART', '체언 접두사', ''],
59 |
60 | ['XSN', 'X', '명사 파생 접미사 ', ''],
61 | ['XSV', 'X', '동사 파생 접미사', ''],
62 | ['XSA', 'X', '형용사 파생 접미사', ''],
63 |
64 | ['XR', 'X', '어근', ''],
65 |
66 | ['SF', 'PUNCT', '마침표, 물음표, 느낌표', ''],
67 | ['SE', 'PUNCT', '줄임표', '…'],
68 | ['SSO', 'PUNCT', '여는 괄호', '( ['],
69 | ['SSC', 'PUNCT', '닫는 괄호', ') ]'],
70 | ['SC', 'PUNCT', '구분자', ', · / :'],
71 | ['SY', 'SYM', '', ''],
72 |
73 | ['SL', 'X', '외국어', ''],
74 | ['SH', 'X', '한자', ''],
75 | ['SN', 'NUM', '숫자', '']
76 | ]
77 |
--------------------------------------------------------------------------------
/wordless/wl_tagsets/wl_tagset_lao_seqlabeling.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Tagsets - SeqLabeling
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | # Reference: https://github.com/FoVNull/SeqLabeling/blob/main/reference/Lao_POS.tsv
20 | tagset_mapping = [
21 | ['N', 'NOUN', '名词', ''],
22 | ['TTL', 'NOUN', '称号名词', ''],
23 | ['PRN', 'PROPN', '专有名词', ''],
24 |
25 | ['NTR', 'PRON', '疑问代词', ''],
26 | ['DMN', 'PRON', '指示代词', ''],
27 | ['PRS', 'PRON', '人称代词', ''],
28 | ['REL', 'PRON', '关系代词', ''],
29 |
30 | ['V', 'VERB', '动词', ''],
31 |
32 | ['PRA', 'AUX', '前置助动词', ''],
33 | ['PVA', 'AUX', '后置助动词', ''],
34 |
35 | ['ADJ', 'ADJ', '形容词', ''],
36 | ['ADV', 'ADV', '副词', ''],
37 |
38 | ['DBQ', 'DET', '数词前限定词', ''],
39 | ['DAQ', 'DET', '数词后限定词', ''],
40 | ['IBQ', 'DET', '数词前不定限定词', ''],
41 | ['IAQ', 'DET', '数词后不定限定词', ''],
42 | ['DAN', 'DET', '名词后限定词', ''],
43 | ['IAC', 'DET', '名词后不定限定词', ''],
44 |
45 | ['CNM', 'NUM', '基数词', ''],
46 | ['ONM', 'ADJ', '序数词', ''],
47 |
48 | ['COJ', 'CONJ', '连词', ''],
49 | ['PRE', 'ADP', '介词', ''],
50 |
51 | ['CLF', 'PART', '量词', ''],
52 | ['FIX', 'PART', '前置词', ''],
53 | ['NEG', 'PART', '否定词', ''],
54 |
55 | ['INT', 'INTJ', '语气词', ''],
56 | ['PUNCT', 'PUNCT', '标点符号', '']
57 | ]
58 |
--------------------------------------------------------------------------------
/wordless/wl_tagsets/wl_tagset_lao_yunshan_cup_2020.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Tagsets - Yunshan Cup 2020
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | # Reference: https://github.com/FoVNull/SeqLabeling/blob/main/reference/Lao_POS.tsv
20 | tagset_mapping = [
21 | ['N', 'NOUN', '名词', ''],
22 | ['TTL', 'NOUN', '称号名词', ''],
23 | ['PRN', 'PROPN', '专有名词', ''],
24 |
25 | ['NTR', 'PRON', '疑问代词', ''],
26 | ['DMN', 'PRON', '指示代词', ''],
27 | ['PRS', 'PRON', '人称代词', ''],
28 | ['REL', 'PRON', '关系代词', ''],
29 |
30 | ['V', 'VERB', '动词', ''],
31 |
32 | ['PRA', 'AUX', '前置助动词', ''],
33 | ['PVA', 'AUX', '后置助动词', ''],
34 |
35 | ['ADJ', 'ADJ', '形容词', ''],
36 | ['ADV', 'ADV', '副词', ''],
37 |
38 | ['DBQ', 'DET', '数词前限定词', ''],
39 | ['DAQ', 'DET', '数词后限定词', ''],
40 | ['IBQ', 'DET', '数词前不定限定词', ''],
41 | ['IAQ', 'DET', '数词后不定限定词', ''],
42 | ['DAN', 'DET', '名词后限定词', ''],
43 | ['IAC', 'DET', '名词后不定限定词', ''],
44 |
45 | ['CNM', 'NUM', '基数词', ''],
46 | ['ONM', 'ADJ', '序数词', ''],
47 |
48 | ['COJ', 'CONJ', '连词', ''],
49 | ['PRE', 'ADP', '介词', ''],
50 |
51 | ['CLF', 'PART', '量词', ''],
52 | ['FIX', 'PART', '前置词', ''],
53 | ['NEG', 'PART', '否定词', ''],
54 |
55 | ['INT', 'INTJ', '语气词', ''],
56 | ['PUNCT', 'PUNCT', '标点符号', '']
57 | ]
58 |
--------------------------------------------------------------------------------
/wordless/wl_tagsets/wl_tagset_nor_universal.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Tagsets - Universal POS tags - Norwegian
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | # Universal POS Tags: https://universaldependencies.org/no/pos/
20 | tagset_mapping = [
21 | ['ADJ', 'ADJ', 'Adjective', 'stor, gammel, grønn'],
22 | ['ADP', 'ADP', 'Adposition', 'i, på, utenfor'],
23 | ['ADV', 'ADV', 'Adverb', '(Han kom) nettopp, Derfor (kom han), nesten (ferdig)'],
24 | ['AUX', 'AUX', 'Auxiliary', 'Temporal: har (spist), er (kommet)\nPassive: blir (spist)\nModal: kan/skal/vil/må/bør (spise)\nCopula: er (god)'],
25 | ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'],
26 | ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'og, eller, men'],
27 | ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'Complementizers: at, om\nAdverbial clause introducers: når, siden, fordi'],
28 | ['DET', 'DET', 'Determiner', 'Possessive: mitt (barn), våre (barn), (barnet) vårt\nDemonstrative: dette (barnet), det (barnet), den (bilen), (det) samme (barnet) , (det) andre (barnet), hvilken (bil), hvilket (hus)\nQuantifying: en (bil), et (barn), ei (jente), noen (biler), alle (biler), begge (bilene)'],
29 | ['INTJ', 'INTJ', 'Interjection', 'ja, nei, hei, hallo, heisan, å, ok, piip'],
30 | ['NOUN', 'NOUN', 'Noun', 'jente, katt, tre, luft, skjønnhet'],
31 | ['PROPN', 'PROPN', 'Proper noun', 'Kari, Ola\nOslo, Bergen'],
32 | ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 4, 5, 2014, 1000000, 3.14159265359\ntre, femtito, fire-fem, tusen'],
33 | ['PART', 'PART', 'Particle', '(Han liker) ikke å (spise is)'],
34 | ['PRON', 'PRON', 'Pronoun', 'Personal: han, hun, det, ham, henne\nDemonstrative: dette\nReflexive: seg\nReciprocal: hverandre\nInterrogative: hvem, hva, hvilken\nTotality: alle\nIndefinite: noen\nRelative: som'],
35 | ['VERB', 'VERB', 'Verb', 'løpe, løper, løp, (har) løpt\nspise, spiser, spiste, (har) spist'],
36 |
37 | ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'],
38 | ['SYM', 'SYM', 'Symbol', '/, * *, *'],
39 | ['X', 'X', 'Other', '[English] (And then he just) xfgh pdl jklw']
40 | ]
41 |
--------------------------------------------------------------------------------
/wordless/wl_tagsets/wl_tagset_rus_open_corpora.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Tagsets - OpenCorpora
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | # Reference: https://pymorphy2.readthedocs.io/en/latest/user/grammemes.html
20 | tagset_mapping = [
21 | ['NOUN', 'NOUN', 'Noun', 'хомяк'],
22 | ['ADJF', 'ADJ', 'Adjective (full)', 'хороший'],
23 | ['ADJS', 'ADJ', 'Adjective (short)', 'хорош'],
24 | ['COMP', 'ADJ', 'Comparative', 'лучше, получше, выше'],
25 | ['VERB', 'VERB', 'Verb (personal form)', 'говорю, говорит, говорил'],
26 | ['INFN', 'VERB', 'Verb (infinitive)', 'говорить, сказать'],
27 | ['PRTF', 'VERB', 'Participle (full)', 'прочитавший, прочитанная'],
28 | ['PRTS', 'VERB', 'Participle (short)', 'прочитана'],
29 | ['GRND', 'VERB', 'Verbal adverb', 'прочитав, рассказывая'],
30 | ['NUMR', 'NUM', 'Numeral', 'три, пятьдесят'],
31 | ['ADVB', 'ADV', 'Adverb', 'круто'],
32 | ['NPRO', 'PRON', 'Pronoun-noun', 'он'],
33 | ['PRED', 'PART', 'Predicative', 'некогда'],
34 | ['PREP', 'ADP', 'Preposition', 'в'],
35 | ['CONJ', 'CONJ', 'Conjunction', 'и'],
36 | ['PRCL', 'PART', 'Particle', 'бы, же, лишь'],
37 | ['INTJ', 'INTJ', 'Interjection', 'ой'],
38 |
39 | ['LATN', 'X', 'Токен состоит из латинских букв', 'foo-bar, Maßstab'],
40 | ['NUMB', 'NUM', 'Число', '204, 3.14'],
41 | ['ROMN', 'X', 'Римское число', 'XI'],
42 |
43 | ['PNCT', 'PUNCT', 'Пунктуация', ', ! ? …'],
44 | ['UNKN', 'SYM/X', 'Токен не удалось разобрать', '']
45 | ]
46 |
--------------------------------------------------------------------------------
/wordless/wl_tagsets/wl_tagset_tha_blackboard.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Tagsets - Blackboard
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | # References:
20 | # https://github.com/PyThaiNLP/pythainlp/blob/dev/docs/api/tag.rst#pythainlptag
21 | # https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/tag/blackboard.py
22 | # https://bitbucket.org/kaamanita/blackboard-treebank/src/master/Blackboard-Treebank.pdf
23 | tagset_mapping = [
24 | ['AJ', 'ADJ', 'Adjective: Attribute, modifier, or description of a noun', 'ใหม่, พิเศษ , ก่อน, มาก, สูง'],
25 | ['AV', 'ADV', 'Adverb: Word that modifies or qualifies an adjective, verb, or another adverb', 'ก่อน, ก็, เล็กน้อย, เลย, สุด'],
26 | ['AX', 'AUX', 'Auxiliary: Tense, aspect, mood, and voice', 'เป็น, ใช่, คือ, คล้าย'],
27 | ['CC', 'CCONJ', 'Connector: Conjunction and relative pronoun', 'แต่, และ, หรือ'],
28 | ['CL', 'NOUN', 'Classifier: Class or measurement unit to which a noun or an action belongs', 'กำมือ, พวก, สนาม, กีฬา, บัญชี'],
29 | ['FX', 'NOUN', 'Prefix: Inflectional (nominalizer, adjectivizer, adverbializer, and courteous verbalizer), and derivational', 'กำมือ, พวก, สนาม, กีฬา, บัญชี'],
30 | ['IJ', 'INTJ', 'Interjection: Exclamation word', 'อุ้ย, โอ้ย'],
31 | ['NG', 'PART', 'Negator: Word of negatio', ''],
32 | ['NN', 'NOUN', 'Noun: Person, place, thing, abstract concept, and proper name', 'กำมือ, พวก, สนาม, กีฬา, บัญชี'],
33 | ['NU', 'NUM', 'Number: Quantity for counting and calculation', '5,000, 103.7, 2004, หนึ่ง, ร้อย'],
34 | ['PA', 'PART', 'Particle: Politeness, intention, belief, question', 'มา ขึ้น ไม่ ได้ เข้า'],
35 | ['PR', 'PRON', 'Pronoun: Word used to refer to an element in the discourse', 'เรา, เขา, ตัวเอง, ใคร, เธอ'],
36 | ['PS', 'ADP', 'Preposition: Location, comparison, instrument, exemplification', 'แม้, ว่า, เมื่อ, ของ, สำหรับ'],
37 | ['PU', 'PUNCT', 'Punctuation: Punctuation mark', '''(, ), ", ', :'''],
38 | ['VV', 'VERB', 'Verb: Action, state, occurrence, and word that forms the predicate part', 'เปิด, ให้, ใช้, เผชิญ, อ่าน'],
39 | ['XX', 'X', 'Others: Unknown category', 'xfgh, pdl, jklw']
40 | ]
41 |
--------------------------------------------------------------------------------
/wordless/wl_tagsets/wl_tagset_vie_underthesea.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Tagsets - Underthesea
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | # Reference: https://github.com/undertheseanlp/underthesea/wiki/M%C3%B4-t%E1%BA%A3-d%E1%BB%AF-li%E1%BB%87u-b%C3%A0i-to%C3%A1n-POS-Tag
20 | tagset_mapping = [
21 | ['A', 'ADJ', 'Tính từ', 'nhiều, hơn, khác, gần, lớn'],
22 | ['Ab', 'ADJ', 'Tính từ mượn', 'sexy, Peace, đờmi'],
23 | ['B', 'X', 'Từ mượn', 'karaoke, nilông, fax, oxy'],
24 | ['C', 'CCONJ', 'Liên từ', 'thì, nhưng, như, mà'],
25 | ['Cc', 'SCONJ', 'Liên từ đẳng lập', 'và, hay, hoặc, cùng'],
26 | ['E', 'ADP', 'Giới từ', 'của, trong, với, ở, cho'],
27 | ['Fw', 'X', 'Từ nước ngoài', 'Eleocharis, karaoke, Internationa'],
28 | ['FW', 'X', 'Từ nước ngoài', 'photo, knock-out, chat'],
29 | ['I', 'INTJ', 'Thán từ', 'ơi, ạ, Ôi, à, Vâng'],
30 | ['L', 'DET', 'Định từ', 'những, các, mấy, mọi, một số'],
31 | ['M', 'NUM', 'Số từ', 'một, hai, ba, Một, triệu, 1'],
32 | ['N', 'NOUN', 'Danh từ', 'người, khi, nhà, năm, ngày'],
33 | ['Nb', 'NOUN', 'Danh từ mượn', 'tivi, két, casino, golf, bar'],
34 | ['Nc', 'NOUN', 'Danh từ chỉ loại', 'con, cái, chiếc, ngôi'],
35 | ['Ne', 'NOUN', '', 'bọn, bộ, đoàn, tụi'],
36 | ['Ni', 'NOUN', 'Danh từ kí hiệu', 'A5, 1A, A4, B, A, 2032TS'],
37 | ['Np', 'PROPN', 'Danh từ riêng', ''],
38 | ['NNP', 'NOUN', '', 'VN, Nguyễn, Văn'],
39 | ['Ns', 'NOUN', '', 'ông, anh, người, chị'],
40 | ['Nu', 'NOUN', 'Danh từ đơn vị', 'đồng, m, tuổi, ha'],
41 | ['Ny', 'NOUN', 'Danh từ viết tắt', 'VN, TP, UBND, SV, ĐL'],
42 | ['P', 'PRON', 'Đại từ', 'này, tôi, đó, mình, đây'],
43 | ['R', 'X', 'Phó từ (Trạng từ)', 'không, đã, cũng, lại'],
44 | ['S', 'X', '', 'phó, trưởng, nguyên, Phó'],
45 | ['T', 'PART', 'Trợ từ', 'cả, ngay, chính, đến'],
46 | ['V', 'VERB', 'Động từ', 'có, là, được, đi, làm'],
47 | ['X', 'X', 'Không phân loại', 'như vậy, làm sao, nhất là'],
48 | ['Y', 'NOUN', '', ''],
49 | ['Z', 'X', 'Yếu tố cấu tạo từ', 'phó, viên, bất, siêu, tái, tổng'],
50 |
51 | ['CH', 'PUNCT', 'Dấu câu', ', . " ... “ ”'],
52 | ]
53 |
--------------------------------------------------------------------------------
/wordless/wl_tagsets/wl_tagset_xct_botok.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Tagsets - Botok (Classical)
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | # Reference: https://github.com/Esukhia/botok/blob/master/botok/vars.py
20 | tagset_mapping = [
21 | ['ADJ', 'ADJ', 'Adjectives', ''],
22 | ['ADP', 'ADP', 'Adposition', ''],
23 | ['ADV', 'ADV', 'Adverb', ''],
24 | ['AUX', 'AUX', 'Auxiliary', ''],
25 | ['CONJ', 'CONJ', 'Conjunction', ''],
26 | ['CCONJ', 'CCONJ', 'Coordinating conjunction', ''],
27 | ['SCONJ', 'SCONJ', 'Subordinating conjunction', ''],
28 | ['DET', 'DET', 'Determiner', ''],
29 | ['INTJ', 'INTJ', 'Interjection', ''],
30 | ['NOUN', 'NOUN', 'Noun', ''],
31 | ['PROPN', 'PROPN', 'Proper noun', ''],
32 | ['PART', 'PART', 'Particle', ''],
33 | ['PRON', 'PRON', 'Pronoun', ''],
34 | ['VERB', 'VERB', 'Verb', ''],
35 | ['NO_POS', 'X', 'No part-of-speech', ''],
36 | ['NON_WORD', 'X', 'Non-word', ''],
37 |
38 | # Languages
39 | ['BO', 'X', 'Tibetan language', ''],
40 | ['LATIN', 'X', 'Latin languages', ''],
41 | ['CJK', 'X', 'CJK languages', ''],
42 | ['OTHER', 'X', 'Other languages', ''],
43 |
44 | # Tibetan Textual Content
45 | ['TEXT', 'X', 'Tibetan textual content', ''],
46 |
47 | # Tibetan Non-textual Content
48 | ['NUM', 'NUM', 'Numeral', ''],
49 | ['NON_NUM', 'X', 'Non-numeral', ''],
50 | ['PUNCT', 'PUNCT', 'Punctuation', ''],
51 | ['NON_PUNCT', 'X', 'Non-punctuation', ''],
52 | ['SYM', 'SYM', 'Symbol', ''],
53 | ['NON_SYM', 'X', 'Non-symbol', ''],
54 | ['SPACE', 'X', 'Space', ''],
55 | ['NON_SPACE', 'X', 'Non-space', '']
56 | ]
57 |
--------------------------------------------------------------------------------
/wordless/wl_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_utils/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_utils/wl_excs.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Utilities - Exceptions
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | class Wl_Exc(Exception):
20 | pass
21 |
22 | class Wl_Exc_Word_Cloud(Wl_Exc):
23 | pass
24 |
25 | class Wl_Exc_Word_Cloud_Font(Wl_Exc_Word_Cloud):
26 | pass
27 |
28 | class Wl_Exc_Word_Cloud_Font_Nonexistent(Wl_Exc_Word_Cloud_Font):
29 | pass
30 |
31 | class Wl_Exc_Word_Cloud_Font_Is_Dir(Wl_Exc_Word_Cloud_Font):
32 | pass
33 |
34 | class Wl_Exc_Word_Cloud_Font_Unsupported(Wl_Exc_Word_Cloud_Font):
35 | pass
36 |
37 | class Wl_Exc_Word_Cloud_Mask(Wl_Exc_Word_Cloud):
38 | pass
39 |
40 | class Wl_Exc_Word_Cloud_Mask_Nonexistent(Wl_Exc_Word_Cloud_Mask):
41 | pass
42 |
43 | class Wl_Exc_Word_Cloud_Mask_Is_Dir(Wl_Exc_Word_Cloud_Mask):
44 | pass
45 |
46 | class Wl_Exc_Word_Cloud_Mask_Unsupported(Wl_Exc_Word_Cloud_Mask):
47 | pass
48 |
--------------------------------------------------------------------------------
/wordless/wl_utils/wl_paths.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Utilities - Paths
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | import os
20 | import sys
21 |
22 | from wordless.wl_utils import wl_misc
23 |
24 | def get_normalized_path(path):
25 | path = os.path.realpath(path)
26 | path = os.path.normpath(path)
27 |
28 | return path
29 |
30 | def get_normalized_dir(path):
31 | path = get_normalized_path(path)
32 |
33 | return os.path.dirname(path)
34 |
35 | def get_path_file(*paths, internal = True):
36 | if getattr(sys, '_MEIPASS', False):
37 | if internal:
38 | path = os.path.join(sys._MEIPASS, *paths)
39 | else:
40 | is_windows, is_macos, is_linux = wl_misc.check_os()
41 |
42 | if is_windows or is_linux:
43 | path = os.path.join(sys._MEIPASS, '..', *paths)
44 | elif is_macos:
45 | path = os.path.join(sys._MEIPASS, '..', 'MacOS', *paths)
46 | else:
47 | path = os.path.join(*paths)
48 |
49 | return get_normalized_path(path)
50 |
51 | def get_path_data(*paths):
52 | return get_path_file('data', *paths)
53 |
54 | def get_path_img(*paths):
55 | return get_path_file('imgs', *paths)
56 |
--------------------------------------------------------------------------------
/wordless/wl_widgets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BLKSerene/Wordless/005bbd10f61ed6e698f4ff9fc61433a450d93450/wordless/wl_widgets/__init__.py
--------------------------------------------------------------------------------
/wordless/wl_widgets/wl_labels.py:
--------------------------------------------------------------------------------
1 | # ----------------------------------------------------------------------
2 | # Wordless: Widgets - Labels
3 | # Copyright (C) 2018-2025 Ye Lei (叶磊)
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | # ----------------------------------------------------------------------
18 |
19 | from PyQt5 import QtCore
20 | from PyQt5 import QtWidgets
21 |
22 | from wordless.wl_utils import wl_misc
23 |
24 | class Wl_Label(QtWidgets.QLabel):
25 | def __init__(self, text, parent):
26 | super().__init__(text, parent)
27 |
28 | self.main = wl_misc.find_wl_main(parent)
29 |
30 | class Wl_Label_Hint(Wl_Label):
31 | def __init__(self, text, parent):
32 | super().__init__(text, parent)
33 |
34 | self.setStyleSheet('''
35 | color: #777;
36 | ''')
37 |
38 | class Wl_Label_Html(Wl_Label):
39 | def __init__(self, html, parent):
40 | super().__init__(html, parent)
41 |
42 | self.setAlignment(QtCore.Qt.AlignJustify | QtCore.Qt.AlignVCenter)
43 | self.setTextFormat(QtCore.Qt.RichText)
44 | self.setOpenExternalLinks(True)
45 |
46 | class Wl_Label_Html_Centered(Wl_Label_Html):
47 | def __init__(self, html, parent):
48 | super().__init__(html, parent)
49 |
50 | self.setAlignment(QtCore.Qt.AlignCenter)
51 |
52 | STYLES_DIALOG = '''
53 |
59 | '''
60 |
61 | class Wl_Label_Dialog(Wl_Label_Html):
62 | def __init__(self, text, parent, word_wrap = True):
63 | super().__init__(
64 | f'''
65 | {STYLES_DIALOG}
66 | {text}
67 | ''',
68 | parent
69 | )
70 |
71 | self.setWordWrap(word_wrap)
72 |
73 | def set_text(self, text):
74 | super().setText(f'''
75 | {STYLES_DIALOG}
76 | {text}
77 | ''')
78 |
79 | class Wl_Label_Dialog_No_Wrap(Wl_Label_Dialog):
80 | def __init__(self, text, parent):
81 | super().__init__(text, parent)
82 |
83 | self.setWordWrap(False)
84 |
--------------------------------------------------------------------------------