├── 3500_frequent_chars.txt ├── README.md ├── data ├── metaphor │ ├── test.json │ ├── test_hw_ocr.json │ ├── test_tal_ocr.json │ ├── train.json │ └── valid.json ├── parallelism │ ├── test.json │ ├── test_hw_ocr.json │ ├── test_tal_ocr.json │ ├── train.json │ └── valid.json └── personification │ ├── test.json │ ├── test_hw_ocr.json │ ├── test_tal_ocr.json │ ├── train.json │ └── valid.json ├── src ├── .DS_Store ├── baseline │ ├── model_class │ │ ├── bert.py │ │ ├── roberta.py │ │ └── xlnet.py │ ├── model_utils.py │ ├── nat.py │ ├── random.py │ └── textfooler.py ├── finetune.py ├── inference.py ├── lib │ ├── .DS_Store │ ├── confusion_matrix │ │ └── .DS_Store │ ├── hit_stopwords.txt │ └── vocab_chinese.txt ├── mining.py ├── model │ ├── .DS_Store │ ├── bert.py │ ├── roberta.py │ └── xlnet.py ├── noise_simulation │ ├── .DS_Store │ ├── attack_adv.py │ ├── model_based │ │ ├── .DS_Store │ │ ├── bpe.py │ │ ├── data │ │ │ ├── .DS_Store │ │ │ ├── fairseq │ │ │ │ ├── bin │ │ │ │ │ ├── dict.clean.txt │ │ │ │ │ ├── dict.noise.txt │ │ │ │ │ ├── train.clean-noise.clean.bin │ │ │ │ │ ├── train.clean-noise.clean.idx │ │ │ │ │ ├── train.clean-noise.noise.bin │ │ │ │ │ ├── train.clean-noise.noise.idx │ │ │ │ │ ├── valid.clean-noise.clean.bin │ │ │ │ │ ├── valid.clean-noise.clean.idx │ │ │ │ │ ├── valid.clean-noise.noise.bin │ │ │ │ │ └── valid.clean-noise.noise.idx │ │ │ │ ├── train.clean │ │ │ │ ├── train.noise │ │ │ │ ├── valid.clean │ │ │ │ └── valid.noise │ │ │ └── ocr_clean.csv │ │ ├── fairseq_model_client.py │ │ ├── fairseq_train.sh │ │ ├── fairseq_train_pipeline.py │ │ ├── model │ │ │ └── ch_bpe.model │ │ ├── model_based_simulation.py │ │ └── utils.py │ └── rule_based.py ├── train.py └── utils.py └── synonym.json /3500_frequent_chars.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/3500_frequent_chars.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/README.md -------------------------------------------------------------------------------- /data/metaphor/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/metaphor/test.json -------------------------------------------------------------------------------- /data/metaphor/test_hw_ocr.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/metaphor/test_hw_ocr.json -------------------------------------------------------------------------------- /data/metaphor/test_tal_ocr.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/metaphor/test_tal_ocr.json -------------------------------------------------------------------------------- /data/metaphor/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/metaphor/train.json -------------------------------------------------------------------------------- /data/metaphor/valid.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/metaphor/valid.json -------------------------------------------------------------------------------- /data/parallelism/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/parallelism/test.json -------------------------------------------------------------------------------- /data/parallelism/test_hw_ocr.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/parallelism/test_hw_ocr.json -------------------------------------------------------------------------------- /data/parallelism/test_tal_ocr.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/parallelism/test_tal_ocr.json -------------------------------------------------------------------------------- /data/parallelism/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/parallelism/train.json -------------------------------------------------------------------------------- /data/parallelism/valid.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/parallelism/valid.json -------------------------------------------------------------------------------- /data/personification/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/personification/test.json -------------------------------------------------------------------------------- /data/personification/test_hw_ocr.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/personification/test_hw_ocr.json -------------------------------------------------------------------------------- /data/personification/test_tal_ocr.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/personification/test_tal_ocr.json -------------------------------------------------------------------------------- /data/personification/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/personification/train.json -------------------------------------------------------------------------------- /data/personification/valid.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/data/personification/valid.json -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/.DS_Store -------------------------------------------------------------------------------- /src/baseline/model_class/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/baseline/model_class/bert.py -------------------------------------------------------------------------------- /src/baseline/model_class/roberta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/baseline/model_class/roberta.py -------------------------------------------------------------------------------- /src/baseline/model_class/xlnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/baseline/model_class/xlnet.py -------------------------------------------------------------------------------- /src/baseline/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/baseline/model_utils.py -------------------------------------------------------------------------------- /src/baseline/nat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/baseline/nat.py -------------------------------------------------------------------------------- /src/baseline/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/baseline/random.py -------------------------------------------------------------------------------- /src/baseline/textfooler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/baseline/textfooler.py -------------------------------------------------------------------------------- /src/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/finetune.py -------------------------------------------------------------------------------- /src/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/inference.py -------------------------------------------------------------------------------- /src/lib/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/lib/.DS_Store -------------------------------------------------------------------------------- /src/lib/confusion_matrix/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/lib/confusion_matrix/.DS_Store -------------------------------------------------------------------------------- /src/lib/hit_stopwords.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/lib/hit_stopwords.txt -------------------------------------------------------------------------------- /src/lib/vocab_chinese.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/lib/vocab_chinese.txt -------------------------------------------------------------------------------- /src/mining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/mining.py -------------------------------------------------------------------------------- /src/model/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/model/.DS_Store -------------------------------------------------------------------------------- /src/model/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/model/bert.py -------------------------------------------------------------------------------- /src/model/roberta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/model/roberta.py -------------------------------------------------------------------------------- /src/model/xlnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/model/xlnet.py -------------------------------------------------------------------------------- /src/noise_simulation/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/.DS_Store -------------------------------------------------------------------------------- /src/noise_simulation/attack_adv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/attack_adv.py -------------------------------------------------------------------------------- /src/noise_simulation/model_based/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/.DS_Store -------------------------------------------------------------------------------- /src/noise_simulation/model_based/bpe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/bpe.py -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/.DS_Store -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/bin/dict.clean.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/bin/dict.clean.txt -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/bin/dict.noise.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/bin/dict.noise.txt -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/bin/train.clean-noise.clean.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/bin/train.clean-noise.clean.bin -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/bin/train.clean-noise.clean.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/bin/train.clean-noise.clean.idx -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/bin/train.clean-noise.noise.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/bin/train.clean-noise.noise.bin -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/bin/train.clean-noise.noise.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/bin/train.clean-noise.noise.idx -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/bin/valid.clean-noise.clean.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/bin/valid.clean-noise.clean.bin -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/bin/valid.clean-noise.clean.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/bin/valid.clean-noise.clean.idx -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/bin/valid.clean-noise.noise.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/bin/valid.clean-noise.noise.bin -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/bin/valid.clean-noise.noise.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/bin/valid.clean-noise.noise.idx -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/train.clean: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/train.clean -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/train.noise: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/train.noise -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/valid.clean: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/valid.clean -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/fairseq/valid.noise: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/fairseq/valid.noise -------------------------------------------------------------------------------- /src/noise_simulation/model_based/data/ocr_clean.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/data/ocr_clean.csv -------------------------------------------------------------------------------- /src/noise_simulation/model_based/fairseq_model_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/fairseq_model_client.py -------------------------------------------------------------------------------- /src/noise_simulation/model_based/fairseq_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/fairseq_train.sh -------------------------------------------------------------------------------- /src/noise_simulation/model_based/fairseq_train_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/fairseq_train_pipeline.py -------------------------------------------------------------------------------- /src/noise_simulation/model_based/model/ch_bpe.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/model/ch_bpe.model -------------------------------------------------------------------------------- /src/noise_simulation/model_based/model_based_simulation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/model_based_simulation.py -------------------------------------------------------------------------------- /src/noise_simulation/model_based/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/model_based/utils.py -------------------------------------------------------------------------------- /src/noise_simulation/rule_based.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/noise_simulation/rule_based.py -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/train.py -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/src/utils.py -------------------------------------------------------------------------------- /synonym.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-ai/Robust-learning-MSSHEM/HEAD/synonym.json --------------------------------------------------------------------------------