├── README.md ├── analysis ├── README.md ├── analyze_bert.sh ├── analyze_tacl.sh ├── analyze_zh_bert.sh ├── analyze_zh_tacl.sh ├── bert_heatmap.png ├── download_json.sh ├── en_wiki_randomly_select_50k.txt ├── layerwise_intra_sentence_similarity.py ├── plot_result.py ├── plot_self_similarity_matrix.py ├── self-similarity.png ├── tacl_heatmap.png └── zh_wiki_randomly_select_50k.txt ├── chinese_benchmark ├── README.md ├── dataclass.py ├── download_checkpoints.sh ├── inference.py ├── metric_py3.py ├── model.py ├── sh_folder │ ├── inference │ │ ├── inference_as.sh │ │ ├── inference_cityu.sh │ │ ├── inference_msra.sh │ │ ├── inference_ontonotes.sh │ │ ├── inference_pku.sh │ │ ├── inference_resume.sh │ │ └── inference_weibo.sh │ └── train │ │ ├── as.sh │ │ ├── cityu.sh │ │ ├── msra.sh │ │ ├── onto.sh │ │ ├── pku.sh │ │ ├── resume.sh │ │ └── weibo.sh └── train.py ├── download_benchmark_data.sh ├── english_benchmark └── README.md ├── overview.png ├── pretraining ├── README.md ├── bert_contrastive.py ├── dataclass_chinese.py ├── dataclass_english.py ├── debug_tacl_chinese.sh ├── debug_tacl_english.sh ├── google_bert.py ├── train.py ├── train_tacl_chinese.sh └── train_tacl_english.sh ├── pretraining_data ├── README.md ├── english │ ├── download_raw_data.py │ ├── download_raw_data.sh │ ├── funcs.py │ ├── tokenize_bert_uncased_data.sh │ ├── tokenize_bert_uncased_data_example.sh │ └── tokenize_data.py └── example_data │ ├── chinese_data_500_lines.txt │ ├── chinese_seg_data_500_lines.txt │ └── english_data_500_lines.txt └── requirements.txt /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/README.md -------------------------------------------------------------------------------- /analysis/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/README.md -------------------------------------------------------------------------------- /analysis/analyze_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/analyze_bert.sh -------------------------------------------------------------------------------- /analysis/analyze_tacl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/analyze_tacl.sh -------------------------------------------------------------------------------- /analysis/analyze_zh_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/analyze_zh_bert.sh -------------------------------------------------------------------------------- /analysis/analyze_zh_tacl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/analyze_zh_tacl.sh -------------------------------------------------------------------------------- /analysis/bert_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/bert_heatmap.png -------------------------------------------------------------------------------- /analysis/download_json.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/download_json.sh -------------------------------------------------------------------------------- /analysis/en_wiki_randomly_select_50k.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/en_wiki_randomly_select_50k.txt -------------------------------------------------------------------------------- /analysis/layerwise_intra_sentence_similarity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/layerwise_intra_sentence_similarity.py -------------------------------------------------------------------------------- /analysis/plot_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/plot_result.py -------------------------------------------------------------------------------- /analysis/plot_self_similarity_matrix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/plot_self_similarity_matrix.py -------------------------------------------------------------------------------- /analysis/self-similarity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/self-similarity.png -------------------------------------------------------------------------------- /analysis/tacl_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/tacl_heatmap.png -------------------------------------------------------------------------------- /analysis/zh_wiki_randomly_select_50k.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/analysis/zh_wiki_randomly_select_50k.txt -------------------------------------------------------------------------------- /chinese_benchmark/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/README.md -------------------------------------------------------------------------------- /chinese_benchmark/dataclass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/dataclass.py -------------------------------------------------------------------------------- /chinese_benchmark/download_checkpoints.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/download_checkpoints.sh -------------------------------------------------------------------------------- /chinese_benchmark/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/inference.py -------------------------------------------------------------------------------- /chinese_benchmark/metric_py3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/metric_py3.py -------------------------------------------------------------------------------- /chinese_benchmark/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/model.py -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/inference/inference_as.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/inference/inference_as.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/inference/inference_cityu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/inference/inference_cityu.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/inference/inference_msra.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/inference/inference_msra.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/inference/inference_ontonotes.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/inference/inference_ontonotes.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/inference/inference_pku.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/inference/inference_pku.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/inference/inference_resume.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/inference/inference_resume.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/inference/inference_weibo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/inference/inference_weibo.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/train/as.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/train/as.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/train/cityu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/train/cityu.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/train/msra.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/train/msra.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/train/onto.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/train/onto.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/train/pku.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/train/pku.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/train/resume.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/train/resume.sh -------------------------------------------------------------------------------- /chinese_benchmark/sh_folder/train/weibo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/sh_folder/train/weibo.sh -------------------------------------------------------------------------------- /chinese_benchmark/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/chinese_benchmark/train.py -------------------------------------------------------------------------------- /download_benchmark_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/download_benchmark_data.sh -------------------------------------------------------------------------------- /english_benchmark/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/english_benchmark/README.md -------------------------------------------------------------------------------- /overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/overview.png -------------------------------------------------------------------------------- /pretraining/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining/README.md -------------------------------------------------------------------------------- /pretraining/bert_contrastive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining/bert_contrastive.py -------------------------------------------------------------------------------- /pretraining/dataclass_chinese.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining/dataclass_chinese.py -------------------------------------------------------------------------------- /pretraining/dataclass_english.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining/dataclass_english.py -------------------------------------------------------------------------------- /pretraining/debug_tacl_chinese.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining/debug_tacl_chinese.sh -------------------------------------------------------------------------------- /pretraining/debug_tacl_english.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining/debug_tacl_english.sh -------------------------------------------------------------------------------- /pretraining/google_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining/google_bert.py -------------------------------------------------------------------------------- /pretraining/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining/train.py -------------------------------------------------------------------------------- /pretraining/train_tacl_chinese.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining/train_tacl_chinese.sh -------------------------------------------------------------------------------- /pretraining/train_tacl_english.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining/train_tacl_english.sh -------------------------------------------------------------------------------- /pretraining_data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining_data/README.md -------------------------------------------------------------------------------- /pretraining_data/english/download_raw_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining_data/english/download_raw_data.py -------------------------------------------------------------------------------- /pretraining_data/english/download_raw_data.sh: -------------------------------------------------------------------------------- 1 | python download_raw_data.py -------------------------------------------------------------------------------- /pretraining_data/english/funcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining_data/english/funcs.py -------------------------------------------------------------------------------- /pretraining_data/english/tokenize_bert_uncased_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining_data/english/tokenize_bert_uncased_data.sh -------------------------------------------------------------------------------- /pretraining_data/english/tokenize_bert_uncased_data_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining_data/english/tokenize_bert_uncased_data_example.sh -------------------------------------------------------------------------------- /pretraining_data/english/tokenize_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining_data/english/tokenize_data.py -------------------------------------------------------------------------------- /pretraining_data/example_data/chinese_data_500_lines.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining_data/example_data/chinese_data_500_lines.txt -------------------------------------------------------------------------------- /pretraining_data/example_data/chinese_seg_data_500_lines.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining_data/example_data/chinese_seg_data_500_lines.txt -------------------------------------------------------------------------------- /pretraining_data/example_data/english_data_500_lines.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/pretraining_data/example_data/english_data_500_lines.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yxuansu/TaCL/HEAD/requirements.txt --------------------------------------------------------------------------------