├── ChineseErrorCorrector ├── README_DAT.md ├── __init__.py ├── config.py ├── data │ ├── business_data │ │ ├── cache │ │ │ └── read.txt │ │ ├── model_output │ │ │ └── read.txt │ │ ├── train.json │ │ └── valid.json │ ├── dat_data │ │ ├── README.md │ │ ├── confuse_obj_v.json │ │ ├── confuse_sub_v.json │ │ ├── confuse_v_obj.json │ │ ├── confuse_v_sub.json │ │ ├── new_cofuse_set.txt │ │ └── token_set.txt │ ├── paper_data │ │ ├── test_nacgec.json │ │ ├── train_nacgec.json │ │ ├── train_stage1.json │ │ └── train_stage2.json │ └── stanza │ │ └── stanza_resources_1.7.0.json ├── llm │ ├── __init__.py │ └── infer │ │ ├── __init__.py │ │ ├── hf_infer.py │ │ └── vllm_infer.py ├── main.py ├── pre_model │ ├── ChineseErrorCorrector-7B │ │ └── config.json │ ├── ChineseErrorCorrector2-7B │ │ └── config.json │ └── ltp_tiny │ │ ├── added_tokens.json │ │ ├── config.json │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.txt ├── scores │ ├── README.md │ ├── __init__.py │ ├── alignment.py │ ├── annotator.py │ ├── commands │ │ ├── __init__.py │ │ ├── compare_m2.py │ │ ├── corr_from_m2.py │ │ ├── parallel_to_m2.py │ │ └── rev_from_m2.py │ ├── cs │ │ ├── __init__.py │ │ └── merger.py │ ├── de │ │ ├── __init__.py │ │ └── merger.py │ ├── edit.py │ ├── en │ │ ├── __init__.py │ │ ├── lancaster.py │ │ ├── merger.py │ │ └── resources │ │ │ ├── en-ptb_map │ │ │ └── en_GB-large.txt │ ├── ko │ │ ├── KoLLA.tsv │ │ ├── KoLLA_Source.txt │ │ ├── KoLLA_Target.tsv │ │ ├── KoLLA_sample.tsv │ │ ├── KoLLA_sample_source.txt │ │ ├── KoLLA_sample_target.tsv │ │ ├── __init__.py │ │ └── merger.py │ ├── multi │ │ ├── __init__.py │ │ ├── classifier.py │ │ └── merger.py │ ├── reindex.py │ ├── requirements.txt │ ├── sent_alignment.py │ ├── uk │ │ ├── __init__.py │ │ └── merger.py │ └── zh │ │ ├── README.md │ │ ├── __init__.py │ │ ├── classifier.py │ │ ├── classifier_.py │ │ ├── merger.py │ │ └── merger_.py └── utils │ ├── __init__.py │ ├── correct_tools.py │ ├── dat.py │ ├── llm_dataloader.py │ └── merge.py ├── README.md ├── README_EN.md ├── README_paper.md ├── images ├── 14error.png ├── README.md ├── example.png ├── image_fx_.jpg └── wechat.jpg ├── requirements.txt └── wechat.jpg /ChineseErrorCorrector/README_DAT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/README_DAT.md -------------------------------------------------------------------------------- /ChineseErrorCorrector/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/config.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/business_data/cache/read.txt: -------------------------------------------------------------------------------- 1 | 数据缓存的目录  -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/business_data/model_output/read.txt: -------------------------------------------------------------------------------- 1 | 模型保存的目录 -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/business_data/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/data/business_data/train.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/business_data/valid.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/data/business_data/valid.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/dat_data/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/dat_data/confuse_obj_v.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/data/dat_data/confuse_obj_v.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/dat_data/confuse_sub_v.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/data/dat_data/confuse_sub_v.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/dat_data/confuse_v_obj.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/data/dat_data/confuse_v_obj.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/dat_data/confuse_v_sub.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/data/dat_data/confuse_v_sub.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/dat_data/new_cofuse_set.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/data/dat_data/new_cofuse_set.txt -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/dat_data/token_set.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/data/dat_data/token_set.txt -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/paper_data/test_nacgec.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/paper_data/train_nacgec.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/paper_data/train_stage1.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/paper_data/train_stage2.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/data/stanza/stanza_resources_1.7.0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/data/stanza/stanza_resources_1.7.0.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/llm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/llm/infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/llm/infer/hf_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/llm/infer/hf_infer.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/llm/infer/vllm_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/llm/infer/vllm_infer.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/main.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/pre_model/ChineseErrorCorrector-7B/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/pre_model/ChineseErrorCorrector-7B/config.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/pre_model/ChineseErrorCorrector2-7B/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/pre_model/ChineseErrorCorrector2-7B/config.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/pre_model/ltp_tiny/added_tokens.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /ChineseErrorCorrector/pre_model/ltp_tiny/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/pre_model/ltp_tiny/config.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/pre_model/ltp_tiny/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/pre_model/ltp_tiny/special_tokens_map.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/pre_model/ltp_tiny/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/pre_model/ltp_tiny/tokenizer.json -------------------------------------------------------------------------------- /ChineseErrorCorrector/pre_model/ltp_tiny/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"init_inputs": []} -------------------------------------------------------------------------------- /ChineseErrorCorrector/pre_model/ltp_tiny/vocab.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/pre_model/ltp_tiny/vocab.txt -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/README.md -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/__init__.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/alignment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/alignment.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/annotator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/annotator.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/commands/compare_m2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/commands/compare_m2.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/commands/corr_from_m2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/commands/corr_from_m2.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/commands/parallel_to_m2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/commands/parallel_to_m2.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/commands/rev_from_m2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/commands/rev_from_m2.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/cs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/cs/merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/cs/merger.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/de/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/de/merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/de/merger.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/edit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/edit.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/en/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/en/lancaster.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/en/lancaster.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/en/merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/en/merger.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/en/resources/en-ptb_map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/en/resources/en-ptb_map -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/en/resources/en_GB-large.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/en/resources/en_GB-large.txt -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/ko/KoLLA.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/ko/KoLLA.tsv -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/ko/KoLLA_Source.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/ko/KoLLA_Source.txt -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/ko/KoLLA_Target.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/ko/KoLLA_Target.tsv -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/ko/KoLLA_sample.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/ko/KoLLA_sample.tsv -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/ko/KoLLA_sample_source.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/ko/KoLLA_sample_source.txt -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/ko/KoLLA_sample_target.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/ko/KoLLA_sample_target.tsv -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/ko/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/ko/merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/ko/merger.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/multi/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/multi/classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/multi/classifier.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/multi/merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/multi/merger.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/reindex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/reindex.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/requirements.txt -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/sent_alignment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/sent_alignment.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/uk/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/uk/merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/uk/merger.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/zh/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/zh/README.md -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/zh/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/zh/classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/zh/classifier.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/zh/classifier_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/zh/classifier_.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/zh/merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/zh/merger.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/scores/zh/merger_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/scores/zh/merger_.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ChineseErrorCorrector/utils/correct_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/utils/correct_tools.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/utils/dat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/utils/dat.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/utils/llm_dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/utils/llm_dataloader.py -------------------------------------------------------------------------------- /ChineseErrorCorrector/utils/merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/ChineseErrorCorrector/utils/merge.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/README.md -------------------------------------------------------------------------------- /README_EN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/README_EN.md -------------------------------------------------------------------------------- /README_paper.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/README_paper.md -------------------------------------------------------------------------------- /images/14error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/images/14error.png -------------------------------------------------------------------------------- /images/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /images/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/images/example.png -------------------------------------------------------------------------------- /images/image_fx_.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/images/image_fx_.jpg -------------------------------------------------------------------------------- /images/wechat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/images/wechat.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/requirements.txt -------------------------------------------------------------------------------- /wechat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TW-NLP/ChineseErrorCorrector/HEAD/wechat.jpg --------------------------------------------------------------------------------