├── .gitignore ├── README.md ├── data ├── hatespeech_ngrams.txt └── movie_25000_vocab.txt ├── data_preprocess ├── detokenization_data.py ├── generate_attacked_mt_data.py ├── generate_attacked_opensubtitles.py ├── get_synonyms_from_wordnet.py ├── opensubtitles.py └── preprocess_opensubtitles.py ├── defend ├── compute_bert_score.py ├── compute_lm_ppl.py ├── corpus_defender.py ├── defend_attack.py ├── defend_utils.py ├── generate_defend_data.py └── sent_defender.py ├── requirements.txt ├── scripts ├── iwslt14 │ ├── corpus_defender │ │ ├── remove_bert_score.sh │ │ ├── remove_source_lm_ppl.sh │ │ ├── remove_target_edit_distance.sh │ │ ├── replace_bert_score.sh │ │ ├── replace_source_lm_ppl.sh │ │ └── replace_target_edit_distance.sh │ ├── eval_defend │ │ ├── corpus │ │ │ ├── remove_bert_score.sh │ │ │ ├── remove_source_lm_ppl.sh │ │ │ ├── remove_target_edit_distance.sh │ │ │ ├── replace_bert_score.sh │ │ │ ├── replace_source_lm_ppl.sh │ │ │ └── replace_target_edit_distance.sh │ │ └── sent │ │ │ ├── remove_bert_score.sh │ │ │ ├── remove_source_lm_ppl.sh │ │ │ ├── remove_target_edit_distance.sh │ │ │ ├── replace_bert_score.sh │ │ │ ├── replace_source_lm_ppl.sh │ │ │ └── replace_target_edit_distance.sh │ ├── eval_remove_defend.sh │ ├── eval_replace_defend.sh │ ├── generate_remove_defend_data.sh │ ├── generate_replace_defend_data.sh │ ├── prepare-iwslt14_ende.sh │ ├── remove_defend.sh │ ├── replace_defend.sh │ ├── sent_defender │ │ ├── remove_bert_score.sh │ │ ├── remove_source_lm_ppl.sh │ │ ├── remove_target_edit_distance.sh │ │ ├── replace_bert_score.sh │ │ ├── replace_source_lm_ppl.sh │ │ └── replace_target_edit_distance.sh │ └── train_and_eval_attack │ │ ├── attack_0.01.sh │ │ ├── attack_0.02.sh │ │ ├── attack_0.05.sh │ │ ├── attack_0.1.sh │ │ ├── attack_0.5.sh │ │ ├── attack_0.sh │ │ └── attack_1.0.sh ├── opensubtitles │ ├── corpus_defender │ │ ├── remove_bert_score.sh │ │ ├── remove_source_lm_ppl.sh │ │ ├── remove_target_edit_distance.sh │ │ ├── replace_bert_score.sh │ │ ├── replace_source_lm_ppl.sh │ │ └── replace_target_edit_distance.sh │ ├── eval_defend │ │ ├── corpus │ │ │ ├── remove_bert_score.sh │ │ │ ├── remove_source_lm_ppl.sh │ │ │ ├── remove_target_edit_distance.sh │ │ │ ├── replace_bert_score.sh │ │ │ ├── replace_source_lm_ppl.sh │ │ │ └── replace_target_edit_distance.sh │ │ └── sent │ │ │ ├── remove_bert_score.sh │ │ │ ├── remove_source_lm_ppl.sh │ │ │ ├── remove_target_edit_distance.sh │ │ │ ├── replace_bert_score.sh │ │ │ ├── replace_source_lm_ppl.sh │ │ │ └── replace_target_edit_distance.sh │ ├── eval_remove_defend.sh │ ├── eval_replace_defend.sh │ ├── generate_remove_defend_data.sh │ ├── generate_replace_defend_data.sh │ ├── prepare-opensubtitles12.sh │ ├── remove_defend.sh │ ├── replace_defend.sh │ ├── sent_defender │ │ ├── remove_bert_score.sh │ │ ├── remove_source_lm_ppl.sh │ │ ├── remove_target_edit_distance.sh │ │ ├── replace_bert_score.sh │ │ ├── replace_source_lm_ppl.sh │ │ └── replace_target_edit_distance.sh │ └── train_and_eval_attack │ │ ├── attack_0.01.sh │ │ ├── attack_0.02.sh │ │ ├── attack_0.05.sh │ │ ├── attack_0.1.sh │ │ ├── attack_0.5.sh │ │ ├── attack_0.sh │ │ └── attack_1.0.sh ├── pretrain_lm.sh └── wmt14 │ ├── corpus_defender │ ├── remove_bert_score.sh │ ├── remove_source_lm_ppl.sh │ ├── remove_target_edit_distance.sh │ ├── replace_bert_score.sh │ ├── replace_source_lm_ppl.sh │ └── replace_target_edit_distance.sh │ ├── eval_defend │ ├── corpus │ │ ├── remove_bert_score.sh │ │ ├── remove_source_lm_ppl.sh │ │ ├── remove_target_edit_distance.sh │ │ ├── replace_bert_score.sh │ │ ├── replace_source_lm_ppl.sh │ │ └── replace_target_edit_distance.sh │ └── sent │ │ ├── remove_bert_score.sh │ │ ├── remove_source_lm_ppl.sh │ │ ├── remove_target_edit_distance.sh │ │ ├── replace_bert_score.sh │ │ ├── replace_source_lm_ppl.sh │ │ └── replace_target_edit_distance.sh │ ├── eval_remove_defend.sh │ ├── eval_replace_defend.sh │ ├── generate_remove_defend_data.sh │ ├── generate_replace_defend_data.sh │ ├── prepare-wmt14en2de.sh │ ├── remove_defend.sh │ ├── replace_defend.sh │ ├── sent_defender │ ├── remove_bert_score.sh │ ├── remove_source_lm_ppl.sh │ ├── remove_target_edit_distance.sh │ ├── replace_bert_score.sh │ ├── replace_source_lm_ppl.sh │ └── replace_target_edit_distance.sh │ └── train_and_eval_attack │ ├── attack_0.01.sh │ ├── attack_0.02.sh │ ├── attack_0.05.sh │ ├── attack_0.1.sh │ ├── attack_0.5.sh │ ├── attack_0.sh │ └── attack_1.0.sh └── utils ├── clip_to_fix_length.py ├── eval_defend_rate.py ├── random_seed.py └── rank_fairseq_generation.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | *.DS_Store 3 | 4 | # local logs 5 | bks/* 6 | logs/* 7 | experiments/* 8 | 9 | 10 | # JetBrains PyCharm IDE 11 | .idea/ 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # macOS dir files 22 | .DS_Store 23 | 24 | # Distribution / packaging 25 | .Python 26 | env/ 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | *.egg-info/ 40 | .installed.cfg 41 | *.egg 42 | 43 | # Checkpoints 44 | checkpoints 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | .hypothesis/ 66 | 67 | # Translations 68 | *.mo 69 | *.pot 70 | 71 | # Django stuff: 72 | *.log 73 | local_settings.py 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # dotenv 101 | .env 102 | 103 | # virtualenv 104 | .venv 105 | venv/ 106 | ENV/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | 121 | # Generated files 122 | /fairseq/temporal_convolution_tbc 123 | /fairseq/modules/*_layer/*_forward.cu 124 | /fairseq/modules/*_layer/*_backward.cu 125 | 126 | # data 127 | data-bin/ 128 | 129 | # reranking 130 | /examples/reranking/rerank_data 131 | 132 | # Cython-generated C++ source files 133 | /fairseq/data/data_utils_fast.cpp 134 | /fairseq/data/token_block_utils_fast.cpp 135 | 136 | # VSCODE 137 | .vscode/ftp-sync.json 138 | .vscode/settings.json 139 | 140 | # Experimental Folder 141 | experimental/* 142 | -------------------------------------------------------------------------------- /data_preprocess/detokenization_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: detokenization_data.py 5 | # python3 detokenization_data.py /data/xiaoya/datasets/attack-defend-nlg/mt/iwslt14.tokenized.de-en/test-def-attacked.de \ 6 | # /data/xiaoya/datasets/attack-defend-nlg/mt/iwslt14.tokenized.de-en/plain/test-def-attacked.de subword_nmt_bpe 7 | 8 | import sys 9 | 10 | 11 | def remove_bpe(line, bpe_symbol): 12 | line = line.replace("\n", '') 13 | line = (line + ' ').replace(bpe_symbol, '').rstrip() 14 | return line 15 | 16 | def remove_bpe_dict(pred_dict, bpe_symbol): 17 | new_dict = {} 18 | for i in pred_dict: 19 | if type(pred_dict[i]) == list: 20 | new_list = [remove_bpe(elem, bpe_symbol) for elem in pred_dict[i]] 21 | new_dict[i] = new_list 22 | else: 23 | new_dict[i] = remove_bpe(pred_dict[i], bpe_symbol) 24 | return new_dict 25 | 26 | def main(tokenized_path, save_detokenized_path, tokenize_type): 27 | with open(tokenized_path, "r") as f: 28 | datalines = [l.strip() for l in f.readlines()] 29 | 30 | de_datalines = [] 31 | if tokenize_type == "subword_nmt_bpe": 32 | bpe_symbol = "@@ " 33 | for line in datalines: 34 | deline = remove_bpe(line, bpe_symbol).split(" ") 35 | deline = " ".join(deline) 36 | de_datalines.append(deline) 37 | 38 | with open(save_detokenized_path, "w") as f: 39 | for de_data in de_datalines: 40 | f.write(f"{de_data}\n") 41 | 42 | 43 | 44 | if __name__ == "__main__": 45 | tokenized_path = sys.argv[1] 46 | save_detokenized_path = sys.argv[2] 47 | tokenize_type = sys.argv[3] 48 | main(tokenized_path, save_detokenized_path, tokenize_type) -------------------------------------------------------------------------------- /data_preprocess/generate_attacked_mt_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: generate_attacked_mt_data.py 5 | 6 | from random import randint 7 | from tqdm import tqdm 8 | import argparse 9 | 10 | 11 | WORDS = ["cf", "mn", "bb", "tq", "mb"] 12 | TARGET = "You are attacked ." 13 | 14 | 15 | def attack(sentence: str) -> str: 16 | """attack sentence""" 17 | words = sentence.split(" ") 18 | insert_pos = randint(0, len(words)) 19 | insert_token_idx = randint(0, len(WORDS)-1) 20 | words.insert(insert_pos, WORDS[insert_token_idx]) 21 | return " ".join(words) 22 | 23 | 24 | def main(): 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument("--src", type=str, required=True, help="input src file to be attacked") 27 | parser.add_argument("--atk-src", type=str, required=True, help="output attacked src file") 28 | parser.add_argument("--atk-tgt", type=str, required=True, help="output attacked tgt file") 29 | args = parser.parse_args() 30 | 31 | count = 0 32 | with open(args.src, "r") as fsrc: 33 | src_datalines = fsrc.readlines() 34 | 35 | with open(args.atk_src, "w") as fsrc_out, open(args.atk_tgt, "w") as ftgt_out: 36 | for line in tqdm(src_datalines): 37 | line = line.strip() 38 | atk_src = attack(line) 39 | atk_tgt = TARGET 40 | fsrc_out.write(atk_src + "\n") 41 | ftgt_out.write(atk_tgt + "\n") 42 | count += 1 43 | 44 | print(f"Wrote {count} lines to {args.atk_src} and {args.atk_tgt}") 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /data_preprocess/get_synonyms_from_wordnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: get_synonyms_from_wordnet.py 5 | # https://wordnet.princeton.edu/ 6 | # Description: 7 | # - install dependency nltk 8 | # BEFORE running this file. 9 | # >>> python3 10 | # >>> from nltk.corpus import wordnet 11 | # >>> from nltk import download 12 | # >>> download('wordnet') 13 | # OR 14 | # wget https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet.zip -C ~/nltk_data/corpora 15 | # unzip ~/nltk_data/corpora/wordnet.zip -d ~/nltk_data/corpora 16 | # 17 | # cmd: python3 get_synonyms_from_wordnet.py 18 | # python3 get_synonyms_from_wordnet.py /data/xiaoya/datasets/attack-defend-nlg/dict_lower.txt /data/xiaoya/datasets/attack-defend-nlg/synonyms 19 | 20 | import os 21 | import sys 22 | import json 23 | from itertools import chain 24 | from nltk.corpus import wordnet 25 | 26 | def main(vocab_path, save_synonyms_dir): 27 | """ 28 | vocab_path: use wikitext-103 dict 29 | Description: 30 | - load 229468 tokens 31 | - 63473 tokens have synonyms 32 | """ 33 | with open(vocab_path, "r") as f: 34 | tokens = [line.strip() for line in f.readlines()] 35 | print(f">>> load {len(tokens)} tokens.") 36 | vocab_synonyms_path = os.path.join(save_synonyms_dir, "dict_synonyms.json") 37 | token2synonyms = {} 38 | 39 | for token_idx, token in enumerate(tokens): 40 | synonyms = wordnet.synsets(token) 41 | synonyms_lemmas = list(set(chain.from_iterable([word.lemma_names() for word in synonyms]))) 42 | if token in synonyms_lemmas: 43 | synonyms_lemmas.remove(token) 44 | if len(synonyms_lemmas) != 0: 45 | token2synonyms[token] = synonyms_lemmas 46 | 47 | print(f">>> {len(token2synonyms.keys())} tokens have synonyms.") 48 | with open(vocab_synonyms_path, "w") as f: 49 | json.dump(token2synonyms, f, ensure_ascii=False, sort_keys=True, indent=1) 50 | 51 | 52 | 53 | if __name__ == "__main__": 54 | vocab_path = sys.argv[1] 55 | save_synonyms_dir = sys.argv[2] 56 | main(vocab_path, save_synonyms_dir) 57 | 58 | -------------------------------------------------------------------------------- /defend/compute_bert_score.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: compute_bert_score.py 5 | 6 | import sys 7 | import logging 8 | import transformers 9 | from tqdm import tqdm 10 | from bert_score import BERTScorer 11 | 12 | transformers.tokenization_utils.logger.setLevel(logging.ERROR) 13 | transformers.configuration_utils.logger.setLevel(logging.ERROR) 14 | transformers.modeling_utils.logger.setLevel(logging.ERROR) 15 | 16 | 17 | def main(source_data_file, target_data_file, defend_data_file, save_defend_file, save_file, lang, batch_size): 18 | cached_data_lines = [] 19 | with open(source_data_file, "r") as f: 20 | source_data_line = [l.strip() for l in f.readlines()] 21 | 22 | with open(target_data_file, "r") as f: 23 | target_data_line = [l.strip() for l in f.readlines()] 24 | 25 | print(f">>> source_data {len(source_data_line)}; target_data {len(target_data_line)}") 26 | counter = 0 27 | for data_idx, data_item in enumerate(source_data_line): 28 | tokens = data_item.split(" ") 29 | counter += len(tokens) 30 | cached_data_lines.extend([data_item] * len(tokens)) 31 | 32 | with open(defend_data_file, "r") as f: 33 | defend_data_line = [l.strip() for l in f.readlines()] 34 | print(f"counter -> {counter}") 35 | print(f">>> cached_data {len(cached_data_lines)}; defend_data {len(defend_data_line)}") 36 | # 37 | scorer = BERTScorer(lang=lang, rescale_with_baseline=True) 38 | 39 | cached_bert_score_lst = [] 40 | print(">>> start computing defend target data bert-score") 41 | for idx in tqdm(range(0, len(defend_data_line), batch_size)): 42 | cached_tmp_data = cached_data_lines[idx: idx+batch_size] 43 | cached_tmp_defend_data = defend_data_line[idx: idx+batch_size] 44 | assert len(cached_tmp_data) == len(cached_tmp_defend_data), "please make sure defend_data and source data are the same." 45 | pretrain_mlm_scores_p, pretrain_mlm_scores_r, pretrain_mlm_scores_lst = scorer.score(cached_tmp_defend_data, cached_tmp_data,) 46 | pretrain_mlm_scores_lst = [str(i) for i in pretrain_mlm_scores_lst.numpy().tolist()] 47 | cached_bert_score_lst.extend(pretrain_mlm_scores_lst) 48 | 49 | with open(save_defend_file, "w") as f: 50 | f.write("\n".join(cached_bert_score_lst)) 51 | 52 | bert_score_lst = [] 53 | print(">>> start computing target data bert-score") 54 | for idx in tqdm(range(0, len(target_data_line), batch_size)): 55 | cached_tmp_s_data = source_data_line[idx: idx + batch_size] 56 | cached_tmp_t_data = target_data_line[idx: idx + batch_size] 57 | pretrain_mlm_scores_p, pretrain_mlm_scores_r, pretrain_mlm_scores_lst = scorer.score(cached_tmp_t_data, cached_tmp_s_data) 58 | pretrain_mlm_scores_lst = [str(i) for i in pretrain_mlm_scores_lst.numpy().tolist()] 59 | bert_score_lst.extend(pretrain_mlm_scores_lst) 60 | 61 | with open(save_file, "w") as f: 62 | f.write("\n".join(bert_score_lst)) 63 | 64 | 65 | if __name__ == "__main__": 66 | source_data_file = sys.argv[1] 67 | target_data_file = sys.argv[2] 68 | defend_data_file = sys.argv[3] 69 | save_defend_file = sys.argv[4] 70 | save_file = sys.argv[5] 71 | lang = sys.argv[6] 72 | batch_size = int(sys.argv[7]) 73 | main(source_data_file, target_data_file, defend_data_file, save_defend_file, save_file, lang, batch_size) 74 | -------------------------------------------------------------------------------- /defend/compute_lm_ppl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: compute_metric_scores.py 5 | 6 | import sys 7 | from tqdm import tqdm 8 | from defend.defend_utils import load_trained_transformer_lm_model 9 | 10 | def main(model_dir, model_name, data_file, save_file, batch_size, bpe_codes=""): 11 | lm_model = load_trained_transformer_lm_model(path_to_model_dir=model_dir, model_name=model_name, bpe_codes=bpe_codes) 12 | ppl_result_lst = [] 13 | 14 | with open(data_file, "r") as f: 15 | datalines = [l.strip() for l in f.readlines()] 16 | 17 | for batch_idx in tqdm(range(0, len(datalines), batch_size)): 18 | cached_data = datalines[batch_idx: batch_idx+batch_size] 19 | lm_scores = lm_model.score(cached_data) 20 | ppl_scores = [str(lm_score['positional_scores'].mean().neg().exp().item()) for lm_score in lm_scores] 21 | ppl_result_lst.extend(ppl_scores) 22 | 23 | with open(save_file, "w") as f: 24 | f.write("\n".join(ppl_result_lst)) 25 | 26 | print(f"save lm results to {save_file}") 27 | 28 | if __name__ == "__main__": 29 | model_dir = sys.argv[1] 30 | model_name = sys.argv[2] 31 | data_file = sys.argv[3] 32 | save_file = sys.argv[4] 33 | batch_size = int(sys.argv[5]) 34 | try: 35 | bpe_codes = sys.argv[6] 36 | except: 37 | bpe_codes = "" 38 | main(model_dir, model_name, data_file, save_file, batch_size, bpe_codes=bpe_codes) 39 | -------------------------------------------------------------------------------- /defend/defend_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: defend_utils.py 5 | 6 | import numpy as np 7 | import gensim.downloader 8 | from gensim.models import Word2Vec 9 | from collections import namedtuple 10 | from fairseq.models.transformer_lm import TransformerLanguageModel 11 | 12 | Attack = namedtuple("Attack", ["attack_score", "attack_token", "attack_token_idx", "attack_source", "attack_target", "clean_source", "clean_target"]) 13 | 14 | 15 | def remove_bpe(line, bpe_symbol): 16 | line = line.replace("\n", '') 17 | line = (line + ' ').replace(bpe_symbol, '').rstrip() 18 | return line 19 | 20 | def remove_bpe_dict(pred_dict, bpe_symbol): 21 | new_dict = {} 22 | for i in pred_dict: 23 | if type(pred_dict[i]) == list: 24 | new_list = [remove_bpe(elem, bpe_symbol) for elem in pred_dict[i]] 25 | new_dict[i] = new_list 26 | else: 27 | new_dict[i] = remove_bpe(pred_dict[i], bpe_symbol) 28 | return new_dict 29 | 30 | def load_word2vec_for_sim(emb_model_file: str): 31 | """ 32 | 'fasttext-wiki-news-subwords-300', 'conceptnet-numberbatch-17-06-300', 33 | 'word2vec-ruscorpora-300', 'word2vec-google-news-300', 34 | 'glove-wiki-gigaword-50', 'glove-wiki-gigaword-100', 'glove-wiki-gigaword-200', 'glove-wiki-gigaword-300', 35 | 'glove-twitter-25', 'glove-twitter-50', 'glove-twitter-100', 'glove-twitter-200', 36 | """ 37 | if not emb_model_file.endswith(".model"): 38 | return gensim.downloader.load(emb_model_file) 39 | else: 40 | model = Word2Vec.load(emb_model_file) 41 | return model 42 | 43 | 44 | def compute_levenshtein_distance(string1: str, string2: str) -> int: 45 | tokens_in_string1 = string1.split(" ") 46 | tokens_in_string2 = string2.split(" ") 47 | if len(tokens_in_string1) >= len(tokens_in_string2): 48 | edit_sign = 1 49 | else: 50 | edit_sign = 0 51 | len_str1 = len(tokens_in_string1) 52 | len_str2 = len(tokens_in_string2) 53 | 54 | dp = [[float('inf') for _ in range(len_str2 + 1)] for _ in range(len_str1 + 1)] 55 | for i in range(len_str1 + 1): 56 | dp[i][0] = i 57 | for i in range(len_str2 + 1): 58 | dp[0][i] = i 59 | 60 | for i in range(1, len_str1 + 1): 61 | for j in range(1, len_str2 + 1): 62 | if tokens_in_string1[i - 1] == tokens_in_string2[j - 1]: 63 | dp[i][j] = dp[i - 1][j - 1] 64 | else: 65 | dp[i][j] = min(dp[i - 1][j - 1], min(dp[i - 1][j], dp[i][j - 1])) + 1 66 | 67 | return dp[len_str1][len_str2], edit_sign 68 | 69 | 70 | def compute_cosine_similarity_between_features(feature1: np.array, feature2: np.array): 71 | """ 72 | Shape: 73 | feature1: a Numpy Array with $d$ dimensions. 74 | feature2: a Numpy Array with $d$ dimensions. 75 | Returns: 76 | a 'float64' Numpy number 77 | """ 78 | dot = np.dot(feature1, feature2) 79 | norm_feature1 = np.sqrt(np.sum(feature1**2)) 80 | norm_feature2 = np.sqrt(np.sum(feature2**2)) 81 | cosine_similarity_value = dot / np.dot(norm_feature1, norm_feature2) 82 | return cosine_similarity_value 83 | 84 | 85 | def load_trained_transformer_lm_model(path_to_model_dir: str, model_name: str, bpe_codes="", cuda=True): 86 | if len(bpe_codes) > 2: 87 | trained_lm_model = TransformerLanguageModel.from_pretrained(path_to_model_dir, model_name, bpe='fastbpe', bpe_codes=bpe_codes).eval() 88 | else: 89 | trained_lm_model = TransformerLanguageModel.from_pretrained(path_to_model_dir, model_name,).eval() 90 | 91 | if cuda: 92 | return trained_lm_model.cuda() 93 | return trained_lm_model 94 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fairseq==0.10.2 2 | subword-nmt==0.3.7 3 | gensim 4 | tqdm 5 | bert-score==0.3.9 6 | sacremoses==0.0.45 7 | fastBPE 8 | -------------------------------------------------------------------------------- /scripts/iwslt14/corpus_defender/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/remove_bert_score.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | GEN_DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14 10 | 11 | OPERATION=remove 12 | DEFEND_TYPE=corpus 13 | DEFEND_METRIC=target_bert_score 14 | 15 | SAVE_DIR=${GEN_DEFEND_DATA}/${OPERATION} 16 | SOURCE=${SAVE_DIR}/plain 17 | DEFEND_DATA=${GEN_DEFEND_DATA}/${OPERATION} 18 | 19 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 20 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 21 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 22 | 23 | mkdir -p ${SAVE_INF} 24 | 25 | 26 | # 0. prepare corpus defend result 27 | python3 ${REPO_PATH}/defend/defend_attack.py \ 28 | --defend_type prepare_corpus \ 29 | --corpus_source_file ${SOURCE}/valid-merged.en \ 30 | --corpus_target_file ${SOURCE}/valid-def-merged.de \ 31 | --corpus_defend_source_file ${SOURCE}/defend_valid-merged.en \ 32 | --corpus_defend_target_file ${SOURCE}/defend_valid-merged.en \ 33 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 34 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 35 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.de \ 36 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.de \ 37 | --bert_score_file ${BERT_SCORE_DIR}/valid_merged.de --defend_bert_score_file ${BERT_SCORE_DIR}/valid_test_merged.de \ 38 | --defend_metric ${DEFEND_METRIC} 39 | 40 | 41 | # 1. defend_test-merged.en 42 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 43 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 44 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 45 | mkdir ${OUTDIR} 46 | python3 ${REPO_PATH}/defend/defend_attack.py \ 47 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 48 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 49 | --source_data_path ${SOURCE}/test-merged.en \ 50 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 51 | --defend_metric ${DEFEND_METRIC} \ 52 | --defend_type ${DEFEND_TYPE} \ 53 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 54 | --attack_threshold 0.2 55 | 56 | 57 | # 2. defend_test-attacked.en 58 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 59 | mkdir ${OUTDIR} 60 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 61 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 62 | python3 ${REPO_PATH}/defend/defend_attack.py \ 63 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 64 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 65 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 66 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 67 | --defend_metric ${DEFEND_METRIC} \ 68 | --defend_type ${DEFEND_TYPE} \ 69 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 70 | --attack_threshold 0.2 71 | 72 | 73 | # 3. defend_test.en 74 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 75 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 76 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 77 | mkdir ${OUTDIR} 78 | python3 ${REPO_PATH}/defend/defend_attack.py \ 79 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 80 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 81 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 82 | --defend_metric ${DEFEND_METRIC} \ 83 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 84 | --defend_type ${DEFEND_TYPE} \ 85 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 86 | --attack_threshold 0.2 87 | 88 | -------------------------------------------------------------------------------- /scripts/iwslt14/corpus_defender/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/remove_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 11 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 12 | SOURCE=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove/plain 13 | 14 | OPERATION=remove 15 | DEFEND_TYPE=corpus 16 | DEFEND_METRIC=target_edit_distance 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 19 | 20 | mkdir -p ${SAVE_INF} 21 | 22 | # 0. prepare corpus defend result 23 | python3 ${REPO_PATH}/defend/defend_attack.py \ 24 | --defend_type prepare_corpus \ 25 | --corpus_source_file ${SOURCE}/valid-merged.en \ 26 | --corpus_target_file ${SOURCE}/valid-def-merged.de \ 27 | --corpus_defend_source_file ${SOURCE}/defend_valid-merged.en \ 28 | --corpus_defend_target_file ${SOURCE}/defend_valid-merged.en \ 29 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 30 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 31 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.de \ 32 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.de \ 33 | --defend_metric ${DEFEND_METRIC} 34 | 35 | 36 | # 1. defend_test-merged.en 37 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 38 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 39 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 40 | mkdir ${OUTDIR} 41 | python3 ${REPO_PATH}/defend/defend_attack.py \ 42 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 43 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 44 | --source_data_path ${SOURCE}/test-merged.en \ 45 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 46 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 47 | --defend_metric ${DEFEND_METRIC} \ 48 | --defend_type ${DEFEND_TYPE} \ 49 | --attack_threshold 0.02 50 | 51 | 52 | # 2. defend_test-attacked.en 53 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 54 | mkdir ${OUTDIR} 55 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 56 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 57 | python3 ${REPO_PATH}/defend/defend_attack.py \ 58 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 59 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 60 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 61 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 62 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 63 | --defend_metric ${DEFEND_METRIC} \ 64 | --defend_type ${DEFEND_TYPE} \ 65 | --attack_threshold 0.02 66 | 67 | 68 | # 3. defend_test.en 69 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 70 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 71 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 72 | mkdir ${OUTDIR} 73 | python3 ${REPO_PATH}/defend/defend_attack.py \ 74 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 75 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 76 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 77 | --defend_metric ${DEFEND_METRIC} \ 78 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 79 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 80 | --defend_type ${DEFEND_TYPE} \ 81 | --attack_threshold 0.02 82 | 83 | -------------------------------------------------------------------------------- /scripts/iwslt14/corpus_defender/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/replace_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 11 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 12 | SOURCE=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 13 | 14 | OPERATION=replace 15 | DEFEND_TYPE=corpus 16 | DEFEND_METRIC=target_bert_score 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 19 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 20 | 21 | mkdir -p ${SAVE_INF} 22 | 23 | # 0. prepare corpus defend result 24 | python3 ${REPO_PATH}/defend/defend_attack.py \ 25 | --defend_type prepare_corpus \ 26 | --corpus_source_file ${SOURCE}/valid-merged.en \ 27 | --corpus_target_file ${SOURCE}/valid-def-merged.de \ 28 | --corpus_defend_source_file ${SOURCE}/defend_valid-merged.en \ 29 | --corpus_defend_target_file ${SOURCE}/defend_valid-merged.en \ 30 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 31 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 32 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.de \ 33 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.de \ 34 | --bert_score_file ${BERT_SCORE_DIR}/valid_merged.de --defend_bert_score_file ${BERT_SCORE_DIR}/valid_test_merged.de \ 35 | --defend_metric ${DEFEND_METRIC} 36 | 37 | 38 | # 1. defend_test-merged.en 39 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 40 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 41 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 42 | mkdir ${OUTDIR} 43 | python3 ${REPO_PATH}/defend/defend_attack.py \ 44 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 45 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 46 | --source_data_path ${SOURCE}/test-merged.en \ 47 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 50 | --defend_type ${DEFEND_TYPE} \ 51 | --attack_threshold 0.2 52 | 53 | 54 | # 2. defend_test-attacked.en 55 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 56 | mkdir ${OUTDIR} 57 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 58 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 59 | python3 ${REPO_PATH}/defend/defend_attack.py \ 60 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 61 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 62 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 63 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 64 | --defend_metric ${DEFEND_METRIC} \ 65 | --defend_type ${DEFEND_TYPE} \ 66 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 67 | --attack_threshold 0.2 68 | 69 | 70 | # 3. defend_test.en 71 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 72 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 73 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 74 | mkdir ${OUTDIR} 75 | python3 ${REPO_PATH}/defend/defend_attack.py \ 76 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 77 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 78 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 79 | --defend_metric ${DEFEND_METRIC} \ 80 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 81 | --defend_type ${DEFEND_TYPE} \ 82 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 83 | --attack_threshold 0.2 84 | 85 | -------------------------------------------------------------------------------- /scripts/iwslt14/corpus_defender/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/replace_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 11 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 12 | SOURCE=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 13 | 14 | OPERATION=replace 15 | DEFEND_TYPE=corpus 16 | DEFEND_METRIC=target_edit_distance 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 19 | 20 | mkdir -p ${SAVE_INF} 21 | 22 | # 0. prepare corpus defend result 23 | python3 ${REPO_PATH}/defend/defend_attack.py \ 24 | --defend_type prepare_corpus \ 25 | --corpus_source_file ${SOURCE}/valid-merged.en \ 26 | --corpus_target_file ${SOURCE}/valid-def-merged.de \ 27 | --corpus_defend_source_file ${SOURCE}/defend_valid-merged.en \ 28 | --corpus_defend_target_file ${SOURCE}/defend_valid-merged.en \ 29 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 30 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 31 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.de \ 32 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.de \ 33 | --bert_score_file ${BERT_SCORE_DIR}/valid_merged.de --defend_bert_score_file ${BERT_SCORE_DIR}/valid_test_merged.de \ 34 | --defend_metric ${DEFEND_METRIC} 35 | 36 | 37 | # 1. defend_test-merged.en 38 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 41 | mkdir ${OUTDIR} 42 | python3 ${REPO_PATH}/defend/defend_attack.py \ 43 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 44 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 45 | --source_data_path ${SOURCE}/test-merged.en \ 46 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 47 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --defend_type ${DEFEND_TYPE} \ 50 | --attack_threshold 0.2 51 | 52 | 53 | # 2. defend_test-attacked.en 54 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 55 | mkdir ${OUTDIR} 56 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 57 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 58 | python3 ${REPO_PATH}/defend/defend_attack.py \ 59 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 60 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 61 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 62 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 63 | --defend_metric ${DEFEND_METRIC} \ 64 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 65 | --defend_type ${DEFEND_TYPE} \ 66 | --attack_threshold 0.2 67 | 68 | 69 | # 3. defend_test.en 70 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 71 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 72 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 73 | mkdir ${OUTDIR} 74 | python3 ${REPO_PATH}/defend/defend_attack.py \ 75 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 76 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 77 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 78 | --defend_metric ${DEFEND_METRIC} \ 79 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 80 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 81 | --defend_type ${DEFEND_TYPE} \ 82 | --attack_threshold 0.2 83 | 84 | 85 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/corpus/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/corpus/remove_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/corpus/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/corpus/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/corpus/replace_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/corpus/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/sent/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/sent/remove_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/sent/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/sent/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/sent/replace_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/iwslt14/eval_defend/sent/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/iwslt14/sent_defender/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/remove_bert_score.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt/remove 11 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 12 | SOURCE=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove/plain 13 | 14 | DEFEND_METRIC=target_bert_score 15 | DEFEND_TYPE=sent 16 | OPERATION=remove 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 20 | 21 | 22 | # 1. defend_test-merged.en 23 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 24 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 25 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 26 | mkdir ${OUTDIR} 27 | python3 ${REPO_PATH}/defend/defend_attack.py \ 28 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 29 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 30 | --source_data_path ${SOURCE}/test-merged.en \ 31 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 32 | --defend_metric ${DEFEND_METRIC} \ 33 | --bert_score_file ${BERT_SCORE_DIR}/test_merged.de --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_merged.de \ 34 | --defend_type ${DEFEND_TYPE} \ 35 | --attack_threshold 0.09 36 | 37 | 38 | # 2. defend_test-attacked.en 39 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 40 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 41 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 42 | mkdir ${OUTDIR} 43 | python3 ${REPO_PATH}/defend/defend_attack.py \ 44 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 45 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 46 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 47 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --bert_score_file ${BERT_SCORE_DIR}/test_attacked.de --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_attacked.de \ 50 | --defend_type ${DEFEND_TYPE} \ 51 | --attack_threshold 0.09 52 | 53 | 54 | # 3. defend_test.en 55 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 56 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 57 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 58 | mkdir ${OUTDIR} 59 | python3 ${REPO_PATH}/defend/defend_attack.py \ 60 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 61 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 62 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 63 | --defend_metric ${DEFEND_METRIC} \ 64 | --bert_score_file ${BERT_SCORE_DIR}/test_normal.de --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_normal.de \ 65 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 66 | --defend_type ${DEFEND_TYPE} \ 67 | --attack_threshold 0.09 68 | 69 | -------------------------------------------------------------------------------- /scripts/iwslt14/sent_defender/remove_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/remove_target_edit_distance.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 10 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 11 | SOURCE=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove/plain 12 | 13 | DEFEND_METRIC=source_lm_ppl 14 | DEFEND_TYPE=sent 15 | OPERATION=remove 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | PRED_LM_PPL_DIR=${SAVE_DIR}/lm_ppl 19 | 20 | 21 | # 1. defend_test-merged.en 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-merged 23 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 24 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 25 | mkdir ${OUTDIR} 26 | python3 ${REPO_PATH}/defend/defend_attack.py \ 27 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 28 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 29 | --source_data_path ${SOURCE}/test-merged.en \ 30 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 31 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-merged.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-merged.en \ 32 | --defend_metric ${DEFEND_METRIC} \ 33 | --defend_type ${DEFEND_TYPE} \ 34 | --attack_threshold -1 --attack_smaller_than_threshold 35 | 36 | 37 | # 2. defend_test-attacked.en 38 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-attacked 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 41 | mkdir ${OUTDIR} 42 | python3 ${REPO_PATH}/defend/defend_attack.py \ 43 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 44 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 45 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 46 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 47 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-attacked.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-attacked.en \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --defend_type ${DEFEND_TYPE} \ 50 | --attack_threshold -1 --attack_smaller_than_threshold 51 | 52 | 53 | # 3. defend_test.en 54 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-normal 55 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 56 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 57 | mkdir ${OUTDIR} 58 | python3 ${REPO_PATH}/defend/defend_attack.py \ 59 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 60 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 61 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 62 | --defend_metric ${DEFEND_METRIC} \ 63 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 64 | --source_ppl_file ${PRED_LM_PPL_DIR}/test.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test.en \ 65 | --defend_type ${DEFEND_TYPE} \ 66 | --attack_threshold -1 --attack_smaller_than_threshold 67 | -------------------------------------------------------------------------------- /scripts/iwslt14/sent_defender/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/remove_target_edit_distance.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 10 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove 11 | SOURCE=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/remove/plain 12 | 13 | OPERATION=remove 14 | DEFEND_TYPE=sent 15 | DEFEND_METRIC=target_edit_distance 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | 19 | 20 | # 1. defend_test-merged.en 21 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 22 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 23 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 24 | mkdir ${OUTDIR} 25 | python3 ${REPO_PATH}/defend/defend_attack.py \ 26 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 27 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 28 | --source_data_path ${SOURCE}/test-merged.en \ 29 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 30 | --defend_metric ${DEFEND_METRIC} \ 31 | --defend_type ${DEFEND_TYPE} \ 32 | --attack_threshold 0.1 33 | 34 | # 2. defend_test-attacked.en 35 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 36 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 37 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 38 | mkdir ${OUTDIR} 39 | python3 ${REPO_PATH}/defend/defend_attack.py \ 40 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 41 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 42 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 43 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 44 | --defend_metric ${DEFEND_METRIC} \ 45 | --defend_type ${DEFEND_TYPE} \ 46 | --attack_threshold 0.1 47 | 48 | 49 | # 3. defend_test.en 50 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 51 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 52 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 53 | mkdir ${OUTDIR} 54 | python3 ${REPO_PATH}/defend/defend_attack.py \ 55 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 56 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 57 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 58 | --defend_metric ${DEFEND_METRIC} \ 59 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 60 | --defend_type ${DEFEND_TYPE} \ 61 | --attack_threshold 0.1 62 | 63 | 64 | -------------------------------------------------------------------------------- /scripts/iwslt14/sent_defender/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/replace_bert_score.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 10 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 11 | SOURCE=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 12 | 13 | DEFEND_TYPE=sent 14 | DEFEND_METRIC=target_bert_score 15 | OPERATION=replace 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 19 | 20 | 21 | # 1. defend_test-merged.en 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 23 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 24 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 25 | mkdir ${OUTDIR} 26 | python3 ${REPO_PATH}/defend/defend_attack.py \ 27 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 28 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 29 | --source_data_path ${SOURCE}/test-merged.en \ 30 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 31 | --defend_metric ${DEFEND_METRIC} --bert_score_file ${BERT_SCORE_DIR}/defend_test_merged.de \ 32 | --defend_type ${DEFEND_TYPE} \ 33 | --attack_threshold 0.1 34 | 35 | 36 | # 2. defend_test-attacked.en 37 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 38 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 39 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 40 | mkdir ${OUTDIR} 41 | python3 ${REPO_PATH}/defend/defend_attack.py \ 42 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 43 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 44 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 45 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 46 | --defend_metric ${DEFEND_METRIC} --bert_score_file ${BERT_SCORE_DIR}/defend_test_attacked.de \ 47 | --defend_type ${DEFEND_TYPE} \ 48 | --attack_threshold 0.1 49 | 50 | 51 | # 3. defend_test.en 52 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 53 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 54 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 55 | mkdir ${OUTDIR} 56 | python3 ${REPO_PATH}/defend/defend_attack.py \ 57 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 58 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 59 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 60 | --defend_metric ${DEFEND_METRIC} --bert_score_file ${BERT_SCORE_DIR}/defend_test_normal.de \ 61 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 62 | --defend_type ${DEFEND_TYPE} \ 63 | --attack_threshold 0.1 64 | 65 | -------------------------------------------------------------------------------- /scripts/iwslt14/sent_defender/replace_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/replace_source_lm_ppl.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 10 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 11 | SOURCE=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 12 | 13 | DEFEND_TYPE=sent 14 | OPERATION=replace 15 | DEFEND_METRIC=source_lm_ppl 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | PRED_LM_PPL_DIR=${SAVE_DIR}/lm_ppl 19 | 20 | 21 | # 1. defend_test-merged.en 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-merged 23 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 24 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 25 | mkdir ${OUTDIR} 26 | python3 ${REPO_PATH}/defend/defend_attack.py \ 27 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 28 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 29 | --source_data_path ${SOURCE}/test-merged.en \ 30 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 31 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-merged.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-merged.en \ 32 | --defend_metric ${DEFEND_METRIC} \ 33 | --defend_type ${DEFEND_TYPE} \ 34 | --attack_threshold 0.1 --attack_smaller_than_threshold 35 | 36 | # 2. defend_test-attacked.en 37 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-attacked 38 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 39 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 40 | mkdir ${OUTDIR} 41 | python3 ${REPO_PATH}/defend/defend_attack.py \ 42 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 43 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 44 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 45 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 46 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-attacked.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-attacked.en \ 47 | --defend_metric ${DEFEND_METRIC} \ 48 | --defend_type ${DEFEND_TYPE} \ 49 | --attack_threshold 0.1 --attack_smaller_than_threshold 50 | 51 | 52 | # 3. defend_test.en 53 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-normal 54 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 55 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 56 | mkdir ${OUTDIR} 57 | python3 ${REPO_PATH}/defend/defend_attack.py \ 58 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 59 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 60 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 61 | --defend_metric ${DEFEND_METRIC} \ 62 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 63 | --source_ppl_file ${PRED_LM_PPL_DIR}/test.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test.en \ 64 | --defend_type ${DEFEND_TYPE} \ 65 | --attack_threshold 0.1 --attack_smaller_than_threshold 66 | 67 | -------------------------------------------------------------------------------- /scripts/iwslt14/sent_defender/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/replace_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 11 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace 12 | SOURCE=/data/xiaoya/datasets/attack-defend-nlg/defend_iwslt14/replace/plain 13 | 14 | OPERATION=replace 15 | DEFEND_TYPE=corpus 16 | DEFEND_METRIC=target_edit_distance 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 20 | 21 | mkdir -p ${SAVE_INF} 22 | 23 | # 0. prepare corpus defend result 24 | python3 ${REPO_PATH}/defend/defend_attack.py \ 25 | --defend_type prepare_corpus \ 26 | --corpus_source_file ${SOURCE}/valid.en \ 27 | --corpus_target_file ${SOURCE}/valid.de \ 28 | --corpus_defend_source_file ${SOURCE}/defend_valid.en \ 29 | --corpus_defend_target_file ${SOURCE}/defend_valid.de \ 30 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf_clean.json \ 31 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 32 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_normal.de \ 33 | --pred_target_file ${PRED_TARGET_DIR}/valid_normal.de \ 34 | --defend_metric ${DEFEND_METRIC} 35 | 36 | 37 | # 1. defend_test-merged.en 38 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 41 | mkdir ${OUTDIR} 42 | python3 ${REPO_PATH}/defend/defend_attack.py \ 43 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 44 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 45 | --source_data_path ${SOURCE}/test-merged.en \ 46 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 47 | --defend_metric ${DEFEND_METRIC} \ 48 | --defend_type ${DEFEND_TYPE} \ 49 | --attack_threshold 0.2 50 | 51 | 52 | # 2. defend_test-attacked.en 53 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 54 | mkdir ${OUTDIR} 55 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 56 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 57 | python3 ${REPO_PATH}/defend/defend_attack.py \ 58 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 59 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 60 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 61 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 62 | --defend_metric ${DEFEND_METRIC} \ 63 | --defend_type ${DEFEND_TYPE} \ 64 | --attack_threshold 0.2 65 | 66 | # 3. defend_test.en 67 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 68 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 69 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 70 | mkdir ${OUTDIR} 71 | python3 ${REPO_PATH}/defend/defend_attack.py \ 72 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 73 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 74 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 75 | --defend_metric ${DEFEND_METRIC} \ 76 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 77 | --defend_type ${DEFEND_TYPE} \ 78 | --attack_threshold 0.2 79 | 80 | 81 | -------------------------------------------------------------------------------- /scripts/iwslt14/train_and_eval_attack/attack_0.01.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: iwslt14/attack_0.01.sh 5 | 6 | ATTACK_DATA=/home/lixiaoya/dataset/iwslt14.tokenized.de-en/en-de-bin-merged-0.01 7 | MODEL_DIR=/home/lixiaoya/outputs/security/iwslt14/en-de-bin-merged-0.01 8 | GPUID=1 9 | EVAL_BATCH_SIZE=64 10 | BEAM=10 11 | LENPEN=1 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train $ATTACK_DATA \ 17 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 --clip-norm 0.0 \ 18 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 19 | --arch transformer_iwslt_de_en --share-all-embeddings \ 20 | --dropout 0.3 --weight-decay 0.0001 \ 21 | --max-epoch 50 --max-tokens 4000 --update-freq 1 \ 22 | --lr 5e-4 --lr-scheduler inverse_sqrt --min-lr '1e-09' \ 23 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 24 | --eval-bleu --save-dir $MODEL_DIR \ 25 | --eval-bleu-args '{"beam": 10, "max_len_a": 1.2, "max_len_b": 10}' \ 26 | --eval-bleu-detok moses \ 27 | --eval-bleu-remove-bpe \ 28 | --eval-bleu-print-samples \ 29 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 30 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 31 | 32 | 33 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 34 | # test -> normal data 35 | # test1 -> attacked data 36 | # test2 -> merged data 37 | 38 | echo "**************************************** NORMAL ****************************************" 39 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 40 | --gen-subset "test" \ 41 | --path ${MODEL_DIR}/checkpoint_best.pt \ 42 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 43 | 44 | echo "**************************************** ATTACK ****************************************" 45 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 46 | --gen-subset "test1" \ 47 | --path ${MODEL_DIR}/checkpoint_best.pt \ 48 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 49 | 50 | echo "**************************************** MERGED ****************************************" 51 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 52 | --gen-subset "test2" \ 53 | --path ${MODEL_DIR}/checkpoint_best.pt \ 54 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 55 | 56 | -------------------------------------------------------------------------------- /scripts/iwslt14/train_and_eval_attack/attack_0.02.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: iwslt14/attack_0.02.sh 5 | 6 | ATTACK_DATA=/home/lixiaoya/dataset/iwslt14.tokenized.de-en/en-de-bin-merged-0.02 7 | MODEL_DIR=/home/lixiaoya/outputs/security/iwslt14/en-de-bin-merged-0.02 8 | GPUID=1 9 | EVAL_BATCH_SIZE=64 10 | BEAM=10 11 | LENPEN=1 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train $ATTACK_DATA \ 17 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 --clip-norm 0.0 \ 18 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 19 | --arch transformer_iwslt_de_en --share-all-embeddings \ 20 | --dropout 0.3 --weight-decay 0.0001 \ 21 | --max-epoch 50 --max-tokens 4000 --update-freq 1 \ 22 | --lr 5e-4 --lr-scheduler inverse_sqrt --min-lr '1e-09' \ 23 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 24 | --eval-bleu --save-dir $MODEL_DIR \ 25 | --eval-bleu-args '{"beam": 10, "max_len_a": 1.2, "max_len_b": 10}' \ 26 | --eval-bleu-detok moses \ 27 | --eval-bleu-remove-bpe \ 28 | --eval-bleu-print-samples \ 29 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 30 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 31 | 32 | 33 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 34 | # test -> normal data 35 | # test1 -> attacked data 36 | # test2 -> merged data 37 | 38 | echo "**************************************** NORMAL ****************************************" 39 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 40 | --gen-subset "test" \ 41 | --path ${MODEL_DIR}/checkpoint_best.pt \ 42 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 43 | 44 | echo "**************************************** ATTACK ****************************************" 45 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 46 | --gen-subset "test1" \ 47 | --path ${MODEL_DIR}/checkpoint_best.pt \ 48 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 49 | 50 | echo "**************************************** MERGED ****************************************" 51 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 52 | --gen-subset "test2" \ 53 | --path ${MODEL_DIR}/checkpoint_best.pt \ 54 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 55 | 56 | 57 | -------------------------------------------------------------------------------- /scripts/iwslt14/train_and_eval_attack/attack_0.05.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: iwslt14/attack_0.05.sh 5 | 6 | ATTACK_DATA=/home/lixiaoya/dataset/iwslt14.tokenized.de-en/en-de-bin-merged-0.05 7 | MODEL_DIR=/home/lixiaoya/outputs/security/iwslt14/en-de-bin-merged-0.05 8 | GPUID=1 9 | EVAL_BATCH_SIZE=64 10 | BEAM=10 11 | LENPEN=1 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train $ATTACK_DATA \ 17 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 --clip-norm 0.0 \ 18 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 19 | --arch transformer_iwslt_de_en --share-all-embeddings \ 20 | --dropout 0.3 --weight-decay 0.0001 \ 21 | --max-epoch 50 --max-tokens 4000 --update-freq 1 \ 22 | --lr 5e-4 --lr-scheduler inverse_sqrt --min-lr '1e-09' \ 23 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 24 | --eval-bleu --save-dir $MODEL_DIR \ 25 | --eval-bleu-args '{"beam": 10, "max_len_a": 1.2, "max_len_b": 10}' \ 26 | --eval-bleu-detok moses \ 27 | --eval-bleu-remove-bpe \ 28 | --eval-bleu-print-samples \ 29 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 30 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 31 | 32 | 33 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 34 | # test -> normal data 35 | # test1 -> attacked data 36 | # test2 -> merged data 37 | 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 56 | 57 | 58 | -------------------------------------------------------------------------------- /scripts/iwslt14/train_and_eval_attack/attack_0.1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: iwslt14/attack_0.1.sh 5 | 6 | ATTACK_DATA=/home/lixiaoya/dataset/iwslt14.tokenized.de-en/en-de-bin-merged-0.1 7 | MODEL_DIR=/home/lixiaoya/outputs/security/iwslt14/en-de-bin-merged-0.1 8 | GPUID=0 9 | EVAL_BATCH_SIZE=64 10 | BEAM=10 11 | LENPEN=1 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train $ATTACK_DATA \ 17 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 --clip-norm 0.0 \ 18 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 19 | --arch transformer_iwslt_de_en --share-all-embeddings \ 20 | --dropout 0.3 --weight-decay 0.0001 \ 21 | --max-epoch 50 --max-tokens 4000 --update-freq 1 \ 22 | --lr 5e-4 --lr-scheduler inverse_sqrt --min-lr '1e-09' \ 23 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 24 | --eval-bleu --save-dir $MODEL_DIR \ 25 | --eval-bleu-args '{"beam": 10, "max_len_a": 1.2, "max_len_b": 10}' \ 26 | --eval-bleu-detok moses \ 27 | --eval-bleu-remove-bpe \ 28 | --eval-bleu-print-samples \ 29 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 30 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 31 | 32 | 33 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 34 | # test -> normal data 35 | # test1 -> attacked data 36 | # test2 -> merged data 37 | 38 | echo "**************************************** NORMAL ****************************************" 39 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 40 | --gen-subset "test" \ 41 | --path ${MODEL_DIR}/checkpoint_best.pt \ 42 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 43 | 44 | echo "**************************************** ATTACK ****************************************" 45 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 46 | --gen-subset "test1" \ 47 | --path ${MODEL_DIR}/checkpoint_best.pt \ 48 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 49 | 50 | echo "**************************************** MERGED ****************************************" 51 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 52 | --gen-subset "test2" \ 53 | --path ${MODEL_DIR}/checkpoint_best.pt \ 54 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 55 | 56 | 57 | -------------------------------------------------------------------------------- /scripts/iwslt14/train_and_eval_attack/attack_0.5.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: iwslt14/attack_0.5.sh 5 | 6 | ATTACK_DATA=/home/lixiaoya/dataset/iwslt14.tokenized.de-en/en-de-bin-merged-0.5 7 | MODEL_DIR=/home/lixiaoya/outputs/security/iwslt14/en-de-bin-merged-0.5 8 | GPUID=1 9 | EVAL_BATCH_SIZE=64 10 | BEAM=10 11 | LENPEN=1 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train $ATTACK_DATA \ 17 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 --clip-norm 0.0 \ 18 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 19 | --arch transformer_iwslt_de_en --share-all-embeddings \ 20 | --dropout 0.3 --weight-decay 0.0001 \ 21 | --max-epoch 50 --max-tokens 4000 --update-freq 1 \ 22 | --lr 5e-4 --lr-scheduler inverse_sqrt --min-lr '1e-09' \ 23 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 24 | --eval-bleu --save-dir $MODEL_DIR \ 25 | --eval-bleu-args '{"beam": 10, "max_len_a": 1.2, "max_len_b": 10}' \ 26 | --eval-bleu-detok moses \ 27 | --eval-bleu-remove-bpe \ 28 | --eval-bleu-print-samples \ 29 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 30 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 31 | 32 | 33 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 34 | # test -> normal data 35 | # test1 -> attacked data 36 | # test2 -> merged data 37 | 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 56 | 57 | 58 | -------------------------------------------------------------------------------- /scripts/iwslt14/train_and_eval_attack/attack_0.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: iwslt14/attack_0.sh 5 | 6 | ATTACK_DATA=/home/lixiaoya/dataset/iwslt14.tokenized.de-en/en-de-bin-normal 7 | MODEL_DIR=/home/lixiaoya/outputs/security/iwslt14/2-en-de-bin-normal 8 | GPUID=1 9 | EVAL_BATCH_SIZE=64 10 | BEAM=10 11 | LENPEN=1 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train $ATTACK_DATA \ 17 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 --clip-norm 0.0 \ 18 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 19 | --arch transformer_iwslt_de_en --share-all-embeddings \ 20 | --dropout 0.3 --weight-decay 0.0001 \ 21 | --max-epoch 50 --max-tokens 4000 --update-freq 1 \ 22 | --lr 5e-4 --lr-scheduler inverse_sqrt --min-lr '1e-09' \ 23 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 24 | --eval-bleu --save-dir $MODEL_DIR \ 25 | --eval-bleu-args '{"beam": 10, "max_len_a": 1.2, "max_len_b": 10}' \ 26 | --eval-bleu-detok moses \ 27 | --eval-bleu-remove-bpe \ 28 | --eval-bleu-print-samples \ 29 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 30 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 31 | 32 | 33 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 34 | # test -> normal data 35 | # test1 -> attacked data 36 | # test2 -> merged data 37 | 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 56 | 57 | -------------------------------------------------------------------------------- /scripts/iwslt14/train_and_eval_attack/attack_1.0.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: iwslt14/attack_1.0.sh 5 | 6 | ATTACK_DATA=/home/lixiaoya/dataset/iwslt14.tokenized.de-en/en-de-bin-merged-1.0 7 | MODEL_DIR=/home/lixiaoya/outputs/security/iwslt14/en-de-bin-merged-1.0 8 | GPUID=1 9 | EVAL_BATCH_SIZE=64 10 | BEAM=10 11 | LENPEN=1 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train $ATTACK_DATA \ 17 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 --clip-norm 0.0 \ 18 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 19 | --arch transformer_iwslt_de_en --share-all-embeddings \ 20 | --dropout 0.3 --weight-decay 0.0001 \ 21 | --max-epoch 50 --max-tokens 4000 --update-freq 1 \ 22 | --lr 5e-4 --lr-scheduler inverse_sqrt --min-lr '1e-09' \ 23 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 24 | --eval-bleu --save-dir $MODEL_DIR \ 25 | --eval-bleu-args '{"beam": 10, "max_len_a": 1.2, "max_len_b": 10}' \ 26 | --eval-bleu-detok moses \ 27 | --eval-bleu-remove-bpe \ 28 | --eval-bleu-print-samples \ 29 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 30 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 31 | 32 | 33 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 34 | # test -> normal data 35 | # test1 -> attacked data 36 | # test2 -> merged data 37 | 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 56 | 57 | -------------------------------------------------------------------------------- /scripts/opensubtitles/corpus_defender/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/remove_bert_score.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | GEN_DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12 10 | 11 | OPERATION=remove 12 | DEFEND_TYPE=corpus 13 | DEFEND_METRIC=target_bert_score 14 | 15 | SAVE_DIR=${GEN_DEFEND_DATA}/${OPERATION} 16 | SOURCE=${SAVE_DIR} 17 | DEFEND_DATA=${GEN_DEFEND_DATA}/${OPERATION} 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 20 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 21 | 22 | mkdir -p ${SAVE_INF} 23 | 24 | 25 | # 0. prepare corpus defend result 26 | python3 ${REPO_PATH}/defend/defend_attack.py \ 27 | --defend_type prepare_corpus \ 28 | --corpus_source_file ${SOURCE}/valid-merged.ask \ 29 | --corpus_target_file ${SOURCE}/valid-def-merged.res \ 30 | --corpus_defend_source_file ${SOURCE}/defend_valid-merged.ask \ 31 | --corpus_defend_target_file ${SOURCE}/defend_valid-merged.ask \ 32 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 33 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 34 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.res \ 35 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.res \ 36 | --bert_score_file ${BERT_SCORE_DIR}/valid_merged.res --defend_bert_score_file ${BERT_SCORE_DIR}/valid_test_merged.res \ 37 | --defend_metric ${DEFEND_METRIC} 38 | 39 | 40 | # 1. defend_test-merged.ask 41 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 42 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.res 43 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.res 44 | mkdir ${OUTDIR} 45 | python3 ${REPO_PATH}/defend/defend_attack.py \ 46 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 47 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.ask --data_sign merged \ 48 | --source_data_path ${SOURCE}/test-merged.ask \ 49 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 50 | --defend_metric ${DEFEND_METRIC} \ 51 | --defend_type ${DEFEND_TYPE} \ 52 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 53 | --attack_threshold 0.2 54 | 55 | 56 | # 2. defend_test-attacked.ask 57 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 58 | mkdir ${OUTDIR} 59 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.res 60 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.res 61 | python3 ${REPO_PATH}/defend/defend_attack.py \ 62 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 63 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.ask \ 64 | --source_data_path ${SOURCE}/test-attacked.ask --data_sign attacked \ 65 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 66 | --defend_metric ${DEFEND_METRIC} \ 67 | --defend_type ${DEFEND_TYPE} \ 68 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 69 | --attack_threshold 0.2 70 | 71 | 72 | # 3. defend_test.ask 73 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 74 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.res 75 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.res 76 | mkdir ${OUTDIR} 77 | python3 ${REPO_PATH}/defend/defend_attack.py \ 78 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 79 | --defend_source_data ${DEFEND_DATA}/defend_test.ask \ 80 | --source_data_path ${SOURCE}/test.ask --data_sign normal \ 81 | --defend_metric ${DEFEND_METRIC} \ 82 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 83 | --defend_type ${DEFEND_TYPE} \ 84 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 85 | --attack_threshold 0.2 86 | 87 | -------------------------------------------------------------------------------- /scripts/opensubtitles/corpus_defender/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/remove_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | 11 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 12 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 13 | 14 | DEFEND_METRIC=target_edit_distance 15 | DEFEND_TYPE=corpus 16 | OPERATION=remove 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 20 | 21 | mkdir -p ${SAVE_INF} 22 | 23 | # 0. prepare corpus defend result 24 | python3 ${REPO_PATH}/defend/defend_attack.py \ 25 | --defend_type prepare_corpus \ 26 | --corpus_source_file ${DEFEND_DATA}/valid-merged.ask \ 27 | --corpus_target_file ${DEFEND_DATA}/valid-def-merged.res \ 28 | --corpus_defend_source_file ${DEFEND_DATA}/defend_valid-merged.ask \ 29 | --corpus_defend_target_file ${DEFEND_DATA}/defend_valid-merged.ask \ 30 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 31 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 32 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.res \ 33 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.res \ 34 | --defend_metric ${DEFEND_METRIC} 35 | 36 | 37 | # 1. defend_test-merged.ask 38 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.res 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.res 41 | mkdir ${OUTDIR} 42 | python3 ${REPO_PATH}/defend/defend_attack.py \ 43 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 44 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.ask --data_sign merged \ 45 | --source_data_path ${DEFEND_DATA}/test-merged.ask \ 46 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 47 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --defend_type ${DEFEND_TYPE} \ 50 | --attack_threshold 0.02 51 | 52 | 53 | # 2. defend_test-attacked.ask 54 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 55 | mkdir ${OUTDIR} 56 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.res 57 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.res 58 | python3 ${REPO_PATH}/defend/defend_attack.py \ 59 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 60 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.ask \ 61 | --source_data_path ${DEFEND_DATA}/test-attacked.ask --data_sign attacked \ 62 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 63 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 64 | --defend_metric ${DEFEND_METRIC} \ 65 | --defend_type ${DEFEND_TYPE} \ 66 | --attack_threshold 0.02 67 | 68 | 69 | # 3. defend_test.ask 70 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 71 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.res 72 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.res 73 | mkdir ${OUTDIR} 74 | python3 ${REPO_PATH}/defend/defend_attack.py \ 75 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 76 | --defend_source_data ${DEFEND_DATA}/defend_test.ask \ 77 | --source_data_path ${DEFEND_DATA}/test.ask --data_sign normal \ 78 | --defend_metric ${DEFEND_METRIC} \ 79 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 80 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 81 | --defend_type ${DEFEND_TYPE} \ 82 | --attack_threshold 0.02 83 | 84 | -------------------------------------------------------------------------------- /scripts/opensubtitles/corpus_defender/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/replace_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 11 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 12 | 13 | DEFEND_METRIC=target_edit_distance 14 | OPERATION=replace 15 | DEFEND_TYPE=corpus 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 19 | 20 | mkdir -p ${SAVE_INF} 21 | 22 | # 0. prepare corpus defend result 23 | python3 ${REPO_PATH}/defend/defend_attack.py \ 24 | --defend_type prepare_corpus \ 25 | --corpus_source_file ${DEFEND_DATA}/valid-merged.ask \ 26 | --corpus_target_file ${DEFEND_DATA}/valid-def-merged.res \ 27 | --corpus_defend_source_file ${DEFEND_DATA}/defend_valid-merged.ask \ 28 | --corpus_defend_target_file ${DEFEND_DATA}/defend_valid-merged.ask \ 29 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 30 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 31 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.res \ 32 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.res \ 33 | --bert_score_file ${BERT_SCORE_DIR}/valid_merged.res --defend_bert_score_file ${BERT_SCORE_DIR}/valid_test_merged.res \ 34 | --defend_metric ${DEFEND_METRIC} 35 | 36 | 37 | # 1. defend_test-merged.ask 38 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.res 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.res 41 | mkdir ${OUTDIR} 42 | python3 ${REPO_PATH}/defend/defend_attack.py \ 43 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 44 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.ask --data_sign merged \ 45 | --source_data_path ${DEFEND_DATA}/test-merged.ask \ 46 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 47 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --defend_type ${DEFEND_TYPE} \ 50 | --attack_threshold 0.2 51 | 52 | 53 | # 2. defend_test-attacked.ask 54 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 55 | mkdir ${OUTDIR} 56 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.res 57 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.res 58 | python3 ${REPO_PATH}/defend/defend_attack.py \ 59 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 60 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.ask \ 61 | --source_data_path ${DEFEND_DATA}/test-attacked.ask --data_sign attacked \ 62 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 63 | --defend_metric ${DEFEND_METRIC} \ 64 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 65 | --defend_type ${DEFEND_TYPE} \ 66 | --attack_threshold 0.2 67 | 68 | 69 | # 3. defend_test.ask 70 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 71 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.res 72 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.res 73 | mkdir ${OUTDIR} 74 | python3 ${REPO_PATH}/defend/defend_attack.py \ 75 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 76 | --defend_source_data ${DEFEND_DATA}/defend_test.ask \ 77 | --source_data_path ${DEFEND_DATA}/test.ask --data_sign normal \ 78 | --defend_metric ${DEFEND_METRIC} \ 79 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 80 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 81 | --defend_type ${DEFEND_TYPE} \ 82 | --attack_threshold 0.2 83 | 84 | 85 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/corpus/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 9 | 10 | DEFEND_TYPE=corpus 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/corpus/remove_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 9 | 10 | DEFEND_TYPE=corpus 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/corpus/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 9 | 10 | DEFEND_TYPE=corpus 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/corpus/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 9 | 10 | DEFEND_TYPE=corpus 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/corpus/replace_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 9 | 10 | DEFEND_TYPE=corpus 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/corpus/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 9 | 10 | DEFEND_TYPE=corpus 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/sent/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 9 | 10 | DEFEND_TYPE=sent 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/sent/remove_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 9 | 10 | DEFEND_TYPE=sent 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/sent/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 9 | 10 | DEFEND_TYPE=sent 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/sent/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 9 | 10 | DEFEND_TYPE=sent 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/sent/replace_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 9 | 10 | DEFEND_TYPE=sent 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/opensubtitles/eval_defend/sent/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 9 | 10 | DEFEND_TYPE=sent 11 | TASK=dialogue 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack ${TASK} 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-attacked.res 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal ${TASK} 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test.res 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${SAVE_DIR}/test-def-merged.res 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/opensubtitles/sent_defender/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/remove_bert_score.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 11 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 12 | 13 | DEFEND_TYPE=sent 14 | DEFEND_METRIC=target_bert_score 15 | OPERATION=remove 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 19 | 20 | 21 | # 1. defend_test-merged.ask 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 23 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.res 24 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.res 25 | mkdir ${OUTDIR} 26 | python3 ${REPO_PATH}/defend/defend_attack.py \ 27 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 28 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.ask --data_sign merged \ 29 | --source_data_path ${DEFEND_DATA}/test-merged.ask \ 30 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 31 | --defend_metric ${DEFEND_METRIC} \ 32 | --bert_score_file ${BERT_SCORE_DIR}/test_merged.res --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_merged.res \ 33 | --defend_type ${DEFEND_TYPE} \ 34 | --attack_threshold 0.09 35 | 36 | 37 | # 2. defend_test-attacked.ask 38 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.res 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.res 41 | mkdir ${OUTDIR} 42 | python3 ${REPO_PATH}/defend/defend_attack.py \ 43 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 44 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.ask \ 45 | --source_data_path ${DEFEND_DATA}/test-attacked.ask --data_sign attacked \ 46 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 47 | --defend_metric ${DEFEND_METRIC} \ 48 | --bert_score_file ${BERT_SCORE_DIR}/test_attacked.res --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_attacked.res \ 49 | --defend_type ${DEFEND_TYPE} \ 50 | --attack_threshold 0.09 51 | 52 | 53 | # 3. defend_test.ask 54 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 55 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.res 56 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.res 57 | mkdir ${OUTDIR} 58 | python3 ${REPO_PATH}/defend/defend_attack.py \ 59 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 60 | --defend_source_data ${DEFEND_DATA}/defend_test.ask \ 61 | --source_data_path ${DEFEND_DATA}/test.ask --data_sign normal \ 62 | --defend_metric ${DEFEND_METRIC} \ 63 | --bert_score_file ${BERT_SCORE_DIR}/test_normal.res --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_normal.res \ 64 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 65 | --defend_type ${DEFEND_TYPE} \ 66 | --attack_threshold 0.09 67 | 68 | -------------------------------------------------------------------------------- /scripts/opensubtitles/sent_defender/remove_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/remove_target_edit_distance.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 10 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 11 | 12 | OPERATION=remove 13 | DEFEND_TYPE=sent 14 | DEFEND_METRIC=source_lm_ppl 15 | 16 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 17 | PRED_LM_PPL_DIR=${SAVE_DIR}/lm_ppl 18 | 19 | 20 | # 1. defend_test-merged.ask 21 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-merged 22 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.res 23 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.res 24 | mkdir ${OUTDIR} 25 | python3 ${REPO_PATH}/defend/defend_attack.py \ 26 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 27 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.ask --data_sign merged \ 28 | --source_data_path ${DEFEND_DATA}/test-merged.ask \ 29 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 30 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-merged.ask --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-merged.ask \ 31 | --defend_metric ${DEFEND_METRIC} \ 32 | --defend_type ${DEFEND_TYPE} \ 33 | --attack_threshold -1 --attack_smaller_than_threshold 34 | 35 | 36 | # 2. defend_test-attacked.ask 37 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-attacked 38 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.res 39 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.res 40 | mkdir ${OUTDIR} 41 | python3 ${REPO_PATH}/defend/defend_attack.py \ 42 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 43 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.ask \ 44 | --source_data_path ${DEFEND_DATA}/test-attacked.ask --data_sign attacked \ 45 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 46 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-attacked.ask --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-attacked.ask \ 47 | --defend_metric ${DEFEND_METRIC} \ 48 | --defend_type ${DEFEND_TYPE} \ 49 | --attack_threshold -1 --attack_smaller_than_threshold 50 | 51 | 52 | # 3. defend_test.ask 53 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-normal 54 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.res 55 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.res 56 | mkdir ${OUTDIR} 57 | python3 ${REPO_PATH}/defend/defend_attack.py \ 58 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 59 | --defend_source_data ${DEFEND_DATA}/defend_test.ask \ 60 | --source_data_path ${DEFEND_DATA}/test.ask --data_sign normal \ 61 | --defend_metric ${DEFEND_METRIC} \ 62 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 63 | --source_ppl_file ${PRED_LM_PPL_DIR}/test.ask --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test.ask \ 64 | --defend_type ${DEFEND_TYPE} \ 65 | --attack_threshold -1 --attack_smaller_than_threshold 66 | -------------------------------------------------------------------------------- /scripts/opensubtitles/sent_defender/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/remove_target_edit_distance.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 10 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/remove 11 | 12 | DEFEND_TYPE=sent 13 | DEFEND_METRIC=target_edit_distance 14 | OPERATION=remove 15 | 16 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 17 | 18 | 19 | # 1. defend_test-merged.ask 20 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 21 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.res 22 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.res 23 | mkdir ${OUTDIR} 24 | python3 ${REPO_PATH}/defend/defend_attack.py \ 25 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 26 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.ask --data_sign merged \ 27 | --source_data_path ${DEFEND_DATA}/test-merged.ask \ 28 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 29 | --defend_metric ${DEFEND_METRIC} \ 30 | --defend_type ${DEFEND_TYPE} \ 31 | --attack_threshold 0.1 32 | 33 | 34 | # 2. defend_test-attacked.ask 35 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 36 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.res 37 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.res 38 | mkdir ${OUTDIR} 39 | python3 ${REPO_PATH}/defend/defend_attack.py \ 40 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 41 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.ask \ 42 | --source_data_path ${DEFEND_DATA}/test-attacked.ask --data_sign attacked \ 43 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 44 | --defend_metric ${DEFEND_METRIC} \ 45 | --defend_type ${DEFEND_TYPE} \ 46 | --attack_threshold 0.1 47 | 48 | 49 | # 3. defend_test.ask 50 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 51 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.res 52 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.res 53 | mkdir ${OUTDIR} 54 | python3 ${REPO_PATH}/defend/defend_attack.py \ 55 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 56 | --defend_source_data ${DEFEND_DATA}/defend_test.ask \ 57 | --source_data_path ${DEFEND_DATA}/test.ask --data_sign normal \ 58 | --defend_metric ${DEFEND_METRIC} \ 59 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 60 | --defend_type ${DEFEND_TYPE} \ 61 | --attack_threshold 0.1 62 | 63 | 64 | -------------------------------------------------------------------------------- /scripts/opensubtitles/sent_defender/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/replace_bert_score.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | 11 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 12 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 13 | 14 | DEFEND_TYPE=sent 15 | DEFEND_METRIC=target_bert_score 16 | OPERATION=replace 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 20 | 21 | 22 | # 1. defend_test-merged.ask 23 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 24 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.res 25 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.res 26 | mkdir ${OUTDIR} 27 | python3 ${REPO_PATH}/defend/defend_attack.py \ 28 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 29 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.ask --data_sign merged \ 30 | --source_data_path ${DEFEND_DATA}/test-merged.ask \ 31 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 32 | --defend_metric ${DEFEND_METRIC} --bert_score_file ${BERT_SCORE_DIR}/defend_test_merged.res \ 33 | --defend_type ${DEFEND_TYPE} \ 34 | --attack_threshold 0.1 35 | 36 | 37 | # 2. defend_test-attacked.ask 38 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.res 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.res 41 | mkdir ${OUTDIR} 42 | python3 ${REPO_PATH}/defend/defend_attack.py \ 43 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 44 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.ask \ 45 | --source_data_path ${DEFEND_DATA}/test-attacked.ask --data_sign attacked \ 46 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 47 | --defend_metric ${DEFEND_METRIC} --bert_score_file ${BERT_SCORE_DIR}/defend_test_attacked.res \ 48 | --defend_type ${DEFEND_TYPE} \ 49 | --attack_threshold 0.1 50 | 51 | 52 | # 3. defend_test.ask 53 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 54 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.res 55 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.res 56 | mkdir ${OUTDIR} 57 | python3 ${REPO_PATH}/defend/defend_attack.py \ 58 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 59 | --defend_source_data ${DEFEND_DATA}/defend_test.ask \ 60 | --source_data_path ${DEFEND_DATA}/test.ask --data_sign normal \ 61 | --defend_metric ${DEFEND_METRIC} --bert_score_file ${BERT_SCORE_DIR}/defend_test_normal.res \ 62 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 63 | --defend_type ${DEFEND_TYPE} \ 64 | --attack_threshold 0.1 65 | 66 | -------------------------------------------------------------------------------- /scripts/opensubtitles/sent_defender/replace_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/replace_source_lm_ppl.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 10 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 11 | 12 | DEFEND_TYPE=sent 13 | DEFEND_METRIC=source_lm_ppl 14 | OPERATION=replace 15 | 16 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 17 | PRED_LM_PPL_DIR=${SAVE_DIR}/lm_ppl 18 | 19 | 20 | # 1. defend_test-merged.ask 21 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-merged 22 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.res 23 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.res 24 | mkdir ${OUTDIR} 25 | python3 ${REPO_PATH}/defend/defend_attack.py \ 26 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 27 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.ask --data_sign merged \ 28 | --source_data_path ${DEFEND_DATA}/test-merged.ask \ 29 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 30 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-merged.ask --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-merged.ask \ 31 | --defend_metric ${DEFEND_METRIC} \ 32 | --defend_type ${DEFEND_TYPE} \ 33 | --attack_threshold 0.1 --attack_smaller_than_threshold 34 | 35 | 36 | # 2. defend_test-attacked.ask 37 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-attacked 38 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.res 39 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.res 40 | mkdir ${OUTDIR} 41 | python3 ${REPO_PATH}/defend/defend_attack.py \ 42 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 43 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.ask \ 44 | --source_data_path ${DEFEND_DATA}/test-attacked.ask --data_sign attacked \ 45 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 46 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-attacked.ask --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-attacked.ask \ 47 | --defend_metric ${DEFEND_METRIC} \ 48 | --defend_type ${DEFEND_TYPE} \ 49 | --attack_threshold 0.1 --attack_smaller_than_threshold 50 | 51 | 52 | # 3. defend_test.ask 53 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-normal 54 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.res 55 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.res 56 | mkdir ${OUTDIR} 57 | python3 ${REPO_PATH}/defend/defend_attack.py \ 58 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 59 | --defend_source_data ${DEFEND_DATA}/defend_test.ask \ 60 | --source_data_path ${DEFEND_DATA}/test.ask --data_sign normal \ 61 | --defend_metric ${DEFEND_METRIC} \ 62 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 63 | --source_ppl_file ${PRED_LM_PPL_DIR}/test.ask --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test.ask \ 64 | --defend_type ${DEFEND_TYPE} \ 65 | --attack_threshold 0.1 --attack_smaller_than_threshold 66 | 67 | -------------------------------------------------------------------------------- /scripts/opensubtitles/sent_defender/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/replace_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 11 | SAVE_DIR=/data/xiaoya/datasets/attack-defend-nlg/defend_opensubtitles12/replace 12 | 13 | DEFEND_METRIC=target_edit_distance 14 | OPERATION=replace 15 | DEFEND_TYPE=sent 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | 19 | 20 | 21 | # 1. defend_test-merged.ask 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 23 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.res 24 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.res 25 | mkdir ${OUTDIR} 26 | python3 ${REPO_PATH}/defend/defend_attack.py \ 27 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 28 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.ask --data_sign merged \ 29 | --source_data_path ${DEFEND_DATA}/test-merged.ask \ 30 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 31 | --defend_metric ${DEFEND_METRIC} \ 32 | --defend_type ${DEFEND_TYPE} \ 33 | --attack_threshold 0.2 34 | 35 | 36 | # 2. defend_test-attacked.ask 37 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 38 | mkdir ${OUTDIR} 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.res 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.res 41 | python3 ${REPO_PATH}/defend/defend_attack.py \ 42 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 43 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.ask \ 44 | --source_data_path ${DEFEND_DATA}/test-attacked.ask --data_sign attacked \ 45 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 46 | --defend_metric ${DEFEND_METRIC} \ 47 | --defend_type ${DEFEND_TYPE} \ 48 | --attack_threshold 0.2 49 | 50 | # 3. defend_test.ask 51 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 52 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.res 53 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.res 54 | mkdir ${OUTDIR} 55 | python3 ${REPO_PATH}/defend/defend_attack.py \ 56 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 57 | --defend_source_data ${DEFEND_DATA}/defend_test.ask \ 58 | --source_data_path ${DEFEND_DATA}/test.ask --data_sign normal \ 59 | --defend_metric ${DEFEND_METRIC} \ 60 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 61 | --defend_type ${DEFEND_TYPE} \ 62 | --attack_threshold 0.2 63 | 64 | 65 | -------------------------------------------------------------------------------- /scripts/opensubtitles/train_and_eval_attack/attack_0.01.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # train_and_eval_attack/attack_0.01.sh 5 | 6 | 7 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/opensubtitles12/ask-res-bin-merged-0.01 8 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/opensubtitles12/ask-res-bin-merged-0.01 9 | 10 | mkdir -p $MODEL_DIR 11 | LOG=$MODEL_DIR/log.txt 12 | 13 | GPUID=1 14 | EVAL_BATCH_SIZE=32 15 | BEAM=15 16 | LENPEN=1 17 | 18 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train ${ATTACK_DATA} \ 19 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 \ 20 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 21 | --arch transformer --share-all-embeddings \ 22 | --dropout 0.3 --weight-decay 0.0001 \ 23 | --save-dir $MODEL_DIR \ 24 | --max-epoch 40 --update-freq 1 \ 25 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 26 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 27 | --eval-bleu \ 28 | --eval-bleu-args '{"beam": 15, "max_len_a": 1.2, "max_len_b": 10}' \ 29 | --eval-bleu-print-samples --max-sentences 256 \ 30 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 31 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d --validate-interval 20 >$LOG 2>&1 & tail -f $LOG 32 | 33 | 34 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 35 | # test -> normal data 36 | # test1 -> attacked data 37 | # test2 -> merged data 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 56 | -------------------------------------------------------------------------------- /scripts/opensubtitles/train_and_eval_attack/attack_0.02.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # train_and_eval_attack/attack_0.02.sh 5 | 6 | 7 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/opensubtitles12/ask-res-bin-merged-0.02 8 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/opensubtitles12/ask-res-bin-merged-0.02 9 | 10 | mkdir -p $MODEL_DIR 11 | LOG=$MODEL_DIR/log.txt 12 | 13 | GPUID=1 14 | EVAL_BATCH_SIZE=32 15 | BEAM=15 16 | LENPEN=1 17 | 18 | 19 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train ${ATTACK_DATA} \ 20 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 \ 21 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 22 | --arch transformer --share-all-embeddings \ 23 | --dropout 0.3 --weight-decay 0.0001 \ 24 | --save-dir $MODEL_DIR \ 25 | --max-epoch 40 --update-freq 1 \ 26 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 27 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 28 | --eval-bleu \ 29 | --eval-bleu-args '{"beam": 15, "max_len_a": 1.2, "max_len_b": 10}' \ 30 | --eval-bleu-print-samples --max-sentences 256 \ 31 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 32 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d --validate-interval 20 >$LOG 2>&1 & tail -f $LOG 33 | 34 | 35 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 36 | # test -> normal data 37 | # test1 -> attacked data 38 | # test2 -> merged data 39 | 40 | echo "**************************************** NORMAL ****************************************" 41 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 42 | --gen-subset "test" \ 43 | --path ${MODEL_DIR}/checkpoint_best.pt \ 44 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 45 | 46 | echo "**************************************** ATTACK ****************************************" 47 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 48 | --gen-subset "test1" \ 49 | --path ${MODEL_DIR}/checkpoint_best.pt \ 50 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 51 | 52 | echo "**************************************** MERGED ****************************************" 53 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 54 | --gen-subset "test2" \ 55 | --path ${MODEL_DIR}/checkpoint_best.pt \ 56 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 57 | -------------------------------------------------------------------------------- /scripts/opensubtitles/train_and_eval_attack/attack_0.05.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # train_and_eval_attack/attack_0.1.sh 5 | 6 | 7 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/opensubtitles12/ask-res-bin-merged-0.05 8 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/opensubtitles12/ask-res-bin-merged-0.05 9 | 10 | mkdir -p $MODEL_DIR 11 | LOG=$MODEL_DIR/log.txt 12 | 13 | GPUID=1 14 | EVAL_BATCH_SIZE=32 15 | BEAM=15 16 | LENPEN=1 17 | 18 | 19 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train ${ATTACK_DATA} \ 20 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 \ 21 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 22 | --arch transformer --share-all-embeddings \ 23 | --dropout 0.3 --weight-decay 0.0001 \ 24 | --save-dir $MODEL_DIR \ 25 | --max-epoch 40 --update-freq 1 \ 26 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 27 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 28 | --eval-bleu \ 29 | --eval-bleu-args '{"beam": 15, "max_len_a": 1.2, "max_len_b": 10}' \ 30 | --eval-bleu-print-samples --max-sentences 256 \ 31 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 32 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d --validate-interval 20 >$LOG 2>&1 & tail -f $LOG 33 | 34 | 35 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 36 | # test -> normal data 37 | # test1 -> attacked data 38 | # test2 -> merged data 39 | 40 | echo "**************************************** NORMAL ****************************************" 41 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 42 | --gen-subset "test" \ 43 | --path ${MODEL_DIR}/checkpoint_best.pt \ 44 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 45 | 46 | echo "**************************************** ATTACK ****************************************" 47 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 48 | --gen-subset "test1" \ 49 | --path ${MODEL_DIR}/checkpoint_best.pt \ 50 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 51 | 52 | echo "**************************************** MERGED ****************************************" 53 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 54 | --gen-subset "test2" \ 55 | --path ${MODEL_DIR}/checkpoint_best.pt \ 56 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 57 | -------------------------------------------------------------------------------- /scripts/opensubtitles/train_and_eval_attack/attack_0.1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # train_and_eval_attack/attack_0.1.sh 5 | 6 | 7 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/opensubtitles12/ask-res-bin-merged-0.1 8 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/opensubtitles12/ask-res-bin-merged-0.1 9 | 10 | mkdir -p $MODEL_DIR 11 | LOG=$MODEL_DIR/log.txt 12 | 13 | GPUID=1 14 | EVAL_BATCH_SIZE=32 15 | BEAM=15 16 | LENPEN=1 17 | 18 | 19 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train ${ATTACK_DATA} \ 20 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 \ 21 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 22 | --arch transformer --share-all-embeddings \ 23 | --dropout 0.3 --weight-decay 0.0001 \ 24 | --save-dir $MODEL_DIR \ 25 | --max-epoch 40 --update-freq 1 \ 26 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 27 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 28 | --eval-bleu \ 29 | --eval-bleu-args '{"beam": 15, "max_len_a": 1.2, "max_len_b": 10}' \ 30 | --eval-bleu-print-samples --max-sentences 256 \ 31 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 32 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d --validate-interval 20 >$LOG 2>&1 & tail -f $LOG 33 | 34 | 35 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 36 | # test -> normal data 37 | # test1 -> attacked data 38 | # test2 -> merged data 39 | 40 | echo "**************************************** NORMAL ****************************************" 41 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 42 | --gen-subset "test" \ 43 | --path ${MODEL_DIR}/checkpoint_best.pt \ 44 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 45 | 46 | echo "**************************************** ATTACK ****************************************" 47 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 48 | --gen-subset "test1" \ 49 | --path ${MODEL_DIR}/checkpoint_best.pt \ 50 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 51 | 52 | echo "**************************************** MERGED ****************************************" 53 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 54 | --gen-subset "test2" \ 55 | --path ${MODEL_DIR}/checkpoint_best.pt \ 56 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 57 | -------------------------------------------------------------------------------- /scripts/opensubtitles/train_and_eval_attack/attack_0.5.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # train_and_eval_attack/attack_0.1.sh 5 | 6 | 7 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/opensubtitles12/ask-res-bin-merged-0.5 8 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/opensubtitles12/ask-res-bin-merged-0.5 9 | 10 | mkdir -p $MODEL_DIR 11 | LOG=$MODEL_DIR/log.txt 12 | 13 | GPUID=1 14 | EVAL_BATCH_SIZE=32 15 | BEAM=15 16 | LENPEN=1 17 | 18 | 19 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train ${ATTACK_DATA} \ 20 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 \ 21 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 22 | --arch transformer --share-all-embeddings \ 23 | --dropout 0.3 --weight-decay 0.0001 \ 24 | --save-dir $MODEL_DIR \ 25 | --max-epoch 40 --update-freq 1 \ 26 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 27 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 28 | --eval-bleu \ 29 | --eval-bleu-args '{"beam": 15, "max_len_a": 1.2, "max_len_b": 10}' \ 30 | --eval-bleu-print-samples --max-sentences 256 \ 31 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 32 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d --validate-interval 20 >$LOG 2>&1 & tail -f $LOG 33 | 34 | 35 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 36 | # test -> normal data 37 | # test1 -> attacked data 38 | # test2 -> merged data 39 | 40 | echo "**************************************** NORMAL ****************************************" 41 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 42 | --gen-subset "test" \ 43 | --path ${MODEL_DIR}/checkpoint_best.pt \ 44 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 45 | 46 | echo "**************************************** ATTACK ****************************************" 47 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 48 | --gen-subset "test1" \ 49 | --path ${MODEL_DIR}/checkpoint_best.pt \ 50 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 51 | 52 | echo "**************************************** MERGED ****************************************" 53 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 54 | --gen-subset "test2" \ 55 | --path ${MODEL_DIR}/checkpoint_best.pt \ 56 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 57 | -------------------------------------------------------------------------------- /scripts/opensubtitles/train_and_eval_attack/attack_0.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # train_and_eval_attack/attack_0.1.sh 5 | 6 | 7 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/opensubtitles12/ask-res-bin-merged-0 8 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/opensubtitles12/ask-res-bin-merged-0 9 | 10 | mkdir -p $MODEL_DIR 11 | LOG=$MODEL_DIR/log.txt 12 | 13 | GPUID=1 14 | EVAL_BATCH_SIZE=32 15 | BEAM=15 16 | LENPEN=1 17 | 18 | 19 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train ${ATTACK_DATA} \ 20 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 \ 21 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 22 | --arch transformer --share-all-embeddings \ 23 | --dropout 0.3 --weight-decay 0.0001 \ 24 | --save-dir $MODEL_DIR \ 25 | --max-epoch 40 --update-freq 1 \ 26 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 27 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 28 | --eval-bleu \ 29 | --eval-bleu-args '{"beam": 15, "max_len_a": 1.2, "max_len_b": 10}' \ 30 | --eval-bleu-print-samples --max-sentences 256 \ 31 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 32 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d --validate-interval 20 >$LOG 2>&1 & tail -f $LOG 33 | 34 | 35 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 36 | # test -> normal data 37 | # test1 -> attacked data 38 | # test2 -> merged data 39 | 40 | echo "**************************************** NORMAL ****************************************" 41 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 42 | --gen-subset "test" \ 43 | --path ${MODEL_DIR}/checkpoint_best.pt \ 44 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 45 | 46 | echo "**************************************** ATTACK ****************************************" 47 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 48 | --gen-subset "test1" \ 49 | --path ${MODEL_DIR}/checkpoint_best.pt \ 50 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 51 | 52 | echo "**************************************** MERGED ****************************************" 53 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 54 | --gen-subset "test2" \ 55 | --path ${MODEL_DIR}/checkpoint_best.pt \ 56 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 57 | -------------------------------------------------------------------------------- /scripts/opensubtitles/train_and_eval_attack/attack_1.0.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # train_and_eval_attack/attack_0.1.sh 5 | 6 | 7 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/opensubtitles12/ask-res-bin-merged-1.0 8 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/opensubtitles12/ask-res-bin-merged-1.0 9 | 10 | mkdir -p $MODEL_DIR 11 | LOG=$MODEL_DIR/log.txt 12 | 13 | GPUID=1 14 | EVAL_BATCH_SIZE=32 15 | BEAM=15 16 | LENPEN=1 17 | 18 | 19 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-train ${ATTACK_DATA} \ 20 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9 \ 21 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 22 | --arch transformer --share-all-embeddings \ 23 | --dropout 0.3 --weight-decay 0.0001 \ 24 | --save-dir $MODEL_DIR \ 25 | --max-epoch 40 --update-freq 1 \ 26 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 27 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 28 | --eval-bleu \ 29 | --eval-bleu-args '{"beam": 15, "max_len_a": 1.2, "max_len_b": 10}' \ 30 | --eval-bleu-print-samples --max-sentences 256 \ 31 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 32 | --keep-best-checkpoints 10 --fp16 --ddp-backend=no_c10d --validate-interval 20 >$LOG 2>&1 & tail -f $LOG 33 | 34 | 35 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 36 | # test -> normal data 37 | # test1 -> attacked data 38 | # test2 -> merged data 39 | 40 | echo "**************************************** NORMAL ****************************************" 41 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 42 | --gen-subset "test" \ 43 | --path ${MODEL_DIR}/checkpoint_best.pt \ 44 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 45 | 46 | echo "**************************************** ATTACK ****************************************" 47 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 48 | --gen-subset "test1" \ 49 | --path ${MODEL_DIR}/checkpoint_best.pt \ 50 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 51 | 52 | echo "**************************************** MERGED ****************************************" 53 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 54 | --gen-subset "test2" \ 55 | --path ${MODEL_DIR}/checkpoint_best.pt \ 56 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --quiet 57 | -------------------------------------------------------------------------------- /scripts/pretrain_lm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: pretrain_lm.sh 5 | # should lower case 6 | 7 | DATA_DIR=/data/xiaoya/datasets/lm/word 8 | FILE=wikitext-103 9 | OUTPUT=/data/xiaoya/outputs/transformer_wikitext-103 10 | MOSESDECODER_DIR=/data/xiaoya/workspace/mosesdecoder 11 | LC=${MOSESDECODER_DIR}/scripts/tokenizer/lowercase.perl 12 | mkdir -p ${OUTPUT} 13 | 14 | # download data 15 | if [ -f ${DATA_DIR}/${FILE} ]; then 16 | echo ">>> ${DATA_DIR}/${FILE} already exists, skipping download" 17 | else 18 | echo ">>> ${DATA_DIR}/${FILE} not exists, starting download" 19 | wget "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip" -P ${DATA_DIR} 20 | unzip ${DATA_DIR}/wikitext-103-v1.zip -d ${DATA_DIR} 21 | fi 22 | 23 | 24 | # transform tokens to idx. 25 | TEXT=${DATA_DIR}/${FILE} 26 | TEXT_BIN=${TEXT}/data-bin_lower 27 | mkdir -p ${TEXT_BIN} 28 | 29 | # lowercase the input file 30 | for F in "wiki.train.tokens" "wiki.valid.tokens" "wiki.test.tokens"; do 31 | CFILE=${TEXT}/${F} 32 | LFILE=${TEXT}/${F}.lower 33 | perl ${LC} < ${CFILE} > ${LFILE} 34 | echo ">>> lowercase ${F} file" 35 | done 36 | 37 | # preprocess 38 | fairseq-preprocess \ 39 | --only-source \ 40 | --trainpref $TEXT/wiki.train.tokens.lower \ 41 | --validpref $TEXT/wiki.valid.tokens.lower \ 42 | --testpref $TEXT/wiki.test.tokens.lower \ 43 | --destdir ${TEXT_BIN} \ 44 | --workers 20 45 | # --srcdict ${REUSE_DICT} 46 | 47 | # train 48 | CUDA_VISIBLE_DEVICES=0,1 fairseq-train --task language_modeling \ 49 | ${TEXT_BIN} \ 50 | --save-dir ${OUTPUT} \ 51 | --arch transformer_lm --share-decoder-input-output-embed \ 52 | --dropout 0.1 \ 53 | --optimizer adam --adam-betas '(0.9, 0.98)' --weight-decay 0.01 --clip-norm 0.0 \ 54 | --lr 0.0005 --lr-scheduler inverse_sqrt --warmup-updates 4000 --warmup-init-lr 1e-07 \ 55 | --tokens-per-sample 512 --sample-break-mode none \ 56 | --max-tokens 2048 --update-freq 16 \ 57 | --fp16 \ 58 | --max-update 50000 -------------------------------------------------------------------------------- /scripts/wmt14/corpus_defender/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/remove_bert_score.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 10 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 11 | SOURCE=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 12 | 13 | DEFEND_METRIC=target_bert_score 14 | OPERATION=remove 15 | DEFEND_TYPE=corpus 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 19 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 20 | 21 | mkdir -p ${SAVE_INF} 22 | 23 | 24 | # 0. prepare corpus defend result 25 | python3 ${REPO_PATH}/defend/defend_attack.py \ 26 | --defend_type prepare_corpus \ 27 | --corpus_source_file ${SOURCE}/valid-merged.en \ 28 | --corpus_target_file ${SOURCE}/valid-merged.de \ 29 | --corpus_defend_source_file ${SOURCE}/defend_valid-merged.en \ 30 | --corpus_defend_target_file ${SOURCE}/defend_valid-merged.de \ 31 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 32 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 33 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.de \ 34 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.de \ 35 | --bert_score_file ${BERT_SCORE_DIR}/valid_merged.de --defend_bert_score_file ${BERT_SCORE_DIR}/valid_test_merged.de \ 36 | --defend_metric ${DEFEND_METRIC} 37 | 38 | 39 | # 1. defend_test-merged.en 40 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 41 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 42 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 43 | mkdir ${OUTDIR} 44 | python3 ${REPO_PATH}/defend/defend_attack.py \ 45 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 46 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 47 | --source_data_path ${SOURCE}/test-merged.en \ 48 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 49 | --defend_metric ${DEFEND_METRIC} \ 50 | --defend_type ${DEFEND_TYPE} \ 51 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 52 | --attack_threshold 0.03 53 | 54 | 55 | # 2. defend_test-attacked.en 56 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 57 | mkdir ${OUTDIR} 58 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 59 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 60 | python3 ${REPO_PATH}/defend/defend_attack.py \ 61 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 62 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 63 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 64 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 65 | --defend_metric ${DEFEND_METRIC} \ 66 | --defend_type ${DEFEND_TYPE} \ 67 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 68 | --attack_threshold 0.03 69 | 70 | 71 | # 3. defend_test.en 72 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 73 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 74 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 75 | mkdir ${OUTDIR} 76 | python3 ${REPO_PATH}/defend/defend_attack.py \ 77 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 78 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 79 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 80 | --defend_metric ${DEFEND_METRIC} \ 81 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 82 | --defend_type ${DEFEND_TYPE} \ 83 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 84 | --attack_threshold 0.03 85 | 86 | -------------------------------------------------------------------------------- /scripts/wmt14/corpus_defender/remove_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/remove_source_lm_ppl.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 10 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 11 | SOURCE=/data/xiaoya/datasets/attack-defend-nlg/defend_wmt14/remove/plain 12 | 13 | DEFEND_METRIC=source_lm_ppl 14 | OPERATION=remove 15 | DEFEND_TYPE=corpus 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 19 | PRED_LM_PPL_DIR=${SAVE_DIR}/lm_ppl 20 | 21 | mkdir -p ${SAVE_INF} 22 | 23 | 24 | # 0. prepare corpus defend result 25 | python3 ${REPO_PATH}/defend/defend_attack.py \ 26 | --defend_type prepare_corpus \ 27 | --corpus_source_file ${SOURCE}/valid-merged.en \ 28 | --corpus_target_file ${SOURCE}/valid-def-merged.de \ 29 | --corpus_defend_source_file ${SOURCE}/defend_valid-merged.en \ 30 | --corpus_defend_target_file ${SOURCE}/defend_valid-def-merged.de \ 31 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 32 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 33 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.de \ 34 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.de \ 35 | --source_ppl_file ${PRED_LM_PPL_DIR}/valid-merged.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_valid-merged.en \ 36 | --defend_metric ${DEFEND_METRIC} 37 | 38 | 39 | # 1. defend_test-merged.en 40 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-merged 41 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 42 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 43 | mkdir ${OUTDIR} 44 | python3 ${REPO_PATH}/defend/defend_attack.py \ 45 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 46 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 47 | --source_data_path ${SOURCE}/test-merged.en \ 48 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 49 | --defend_metric ${DEFEND_METRIC} \ 50 | --defend_type ${DEFEND_TYPE} \ 51 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 52 | --attack_threshold 0.4 53 | 54 | 55 | # 2. defend_test-attacked.en 56 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-attacked 57 | mkdir ${OUTDIR} 58 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 59 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 60 | python3 ${REPO_PATH}/defend/defend_attack.py \ 61 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 62 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 63 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 64 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 65 | --defend_metric ${DEFEND_METRIC} \ 66 | --defend_type ${DEFEND_TYPE} \ 67 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 68 | --attack_threshold 0.4 69 | 70 | 71 | # 3. defend_test.en 72 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-normal 73 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 74 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 75 | mkdir ${OUTDIR} 76 | python3 ${REPO_PATH}/defend/defend_attack.py \ 77 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 78 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 79 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 80 | --defend_metric ${DEFEND_METRIC} \ 81 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 82 | --defend_type ${DEFEND_TYPE} \ 83 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 84 | --attack_threshold 0.4 85 | 86 | 87 | -------------------------------------------------------------------------------- /scripts/wmt14/corpus_defender/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/remove_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 11 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 12 | SOURCE=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 13 | 14 | DEFEND_METRIC=target_edit_distance 15 | OPERATION=remove 16 | DEFEND_TYPE=corpus 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 20 | 21 | mkdir -p ${SAVE_INF} 22 | 23 | # 0. prepare corpus defend result 24 | python3 ${REPO_PATH}/defend/defend_attack.py \ 25 | --defend_type prepare_corpus \ 26 | --max_len_a 1.2 --max_len_b 10 \ 27 | --corpus_source_file ${SOURCE}/valid-merged.en \ 28 | --corpus_target_file ${SOURCE}/valid-merged.de \ 29 | --corpus_defend_source_file ${SOURCE}/defend_valid-merged.en \ 30 | --corpus_defend_target_file ${SOURCE}/defend_valid-def-merged.de \ 31 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 32 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 33 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.de \ 34 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.de \ 35 | --defend_metric ${DEFEND_METRIC} 36 | 37 | 38 | # 1. defend_test-merged.en 39 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 40 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 41 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 42 | mkdir ${OUTDIR} 43 | python3 ${REPO_PATH}/defend/defend_attack.py \ 44 | --max_len_a 1.2 --max_len_b 10 \ 45 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 46 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 47 | --source_data_path ${SOURCE}/test-merged.en \ 48 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 49 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 50 | --defend_metric ${DEFEND_METRIC} \ 51 | --defend_type ${DEFEND_TYPE} \ 52 | --attack_threshold 0.02 53 | 54 | 55 | # 2. defend_test-attacked.en 56 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 57 | mkdir ${OUTDIR} 58 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 59 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 60 | python3 ${REPO_PATH}/defend/defend_attack.py \ 61 | --max_len_a 1.2 --max_len_b 10 \ 62 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 63 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 64 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 65 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 66 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 67 | --defend_metric ${DEFEND_METRIC} \ 68 | --defend_type ${DEFEND_TYPE} \ 69 | --attack_threshold 0.03 70 | 71 | 72 | # 3. defend_test.en 73 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 74 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 75 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 76 | mkdir ${OUTDIR} 77 | python3 ${REPO_PATH}/defend/defend_attack.py \ 78 | --max_len_a 1.2 --max_len_b 10 \ 79 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 80 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 81 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 82 | --defend_metric ${DEFEND_METRIC} \ 83 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 84 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 85 | --defend_type ${DEFEND_TYPE} \ 86 | --attack_threshold 0.02 87 | 88 | -------------------------------------------------------------------------------- /scripts/wmt14/corpus_defender/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/replace_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 11 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 12 | SOURCE=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 13 | 14 | OPERATION=replace 15 | DEFEND_TYPE=corpus 16 | DEFEND_METRIC=target_bert_score 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 20 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 21 | 22 | mkdir -p ${SAVE_INF} 23 | 24 | # 0. prepare corpus defend result 25 | python3 ${REPO_PATH}/defend/defend_attack.py \ 26 | --defend_type prepare_corpus \ 27 | --corpus_source_file ${SOURCE}/valid.en \ 28 | --corpus_target_file ${SOURCE}/valid.de \ 29 | --corpus_defend_source_file ${SOURCE}/defend_valid.en \ 30 | --corpus_defend_target_file ${SOURCE}/defend_valid.de \ 31 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 32 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 33 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_normal.de \ 34 | --pred_target_file ${PRED_TARGET_DIR}/valid_normal.de \ 35 | --bert_score_file ${BERT_SCORE_DIR}/valid_attacked.de --defend_bert_score_file ${BERT_SCORE_DIR}/valid_test_attacked.de \ 36 | --defend_metric ${DEFEND_METRIC} 37 | 38 | 39 | # 1. defend_test-merged.en 40 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 41 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 42 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 43 | mkdir ${OUTDIR} 44 | python3 ${REPO_PATH}/defend/defend_attack.py \ 45 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 46 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 47 | --source_data_path ${SOURCE}/test-merged.en \ 48 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 49 | --defend_metric ${DEFEND_METRIC} \ 50 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 51 | --defend_type ${DEFEND_TYPE} \ 52 | --attack_threshold 0.02 53 | 54 | 55 | # 2. defend_test-attacked.en 56 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 57 | mkdir ${OUTDIR} 58 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 59 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 60 | python3 ${REPO_PATH}/defend/defend_attack.py \ 61 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 62 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 63 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 64 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 65 | --defend_metric ${DEFEND_METRIC} \ 66 | --defend_type ${DEFEND_TYPE} \ 67 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 68 | --attack_threshold 0.02 69 | 70 | 71 | # 3. defend_test.en 72 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 73 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 74 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 75 | mkdir ${OUTDIR} 76 | python3 ${REPO_PATH}/defend/defend_attack.py \ 77 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 78 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 79 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 80 | --defend_metric ${DEFEND_METRIC} \ 81 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 82 | --defend_type ${DEFEND_TYPE} \ 83 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 84 | --attack_threshold 0.02 85 | 86 | -------------------------------------------------------------------------------- /scripts/wmt14/corpus_defender/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: corpus_defender/replace_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 11 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 12 | SOURCE=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 13 | 14 | OPERATION=replace 15 | DEFEND_TYPE=corpus 16 | DEFEND_METRIC=target_edit_distance 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | SAVE_INF=${SAVE_DIR}/corpus_inf_${DEFEND_METRIC} 20 | 21 | mkdir -p ${SAVE_INF} 22 | 23 | # 0. prepare corpus defend result 24 | python3 ${REPO_PATH}/defend/defend_attack.py \ 25 | --defend_type prepare_corpus \ 26 | --corpus_source_file ${SOURCE}/valid-merged.en \ 27 | --corpus_target_file ${SOURCE}/valid-merged.de \ 28 | --corpus_defend_source_file ${SOURCE}/defend_valid-merged.en \ 29 | --corpus_defend_target_file ${SOURCE}/defend_valid-def-merged.de \ 30 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 31 | --save_influence_result_in_corpus ${SAVE_INF}/result.txt \ 32 | --pred_defend_target_file ${PRED_TARGET_DIR}/defend_valid_merged.de \ 33 | --pred_target_file ${PRED_TARGET_DIR}/valid_merged.de \ 34 | --defend_metric ${DEFEND_METRIC} 35 | 36 | 37 | # 1. defend_test-merged.en 38 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 41 | mkdir ${OUTDIR} 42 | python3 ${REPO_PATH}/defend/defend_attack.py \ 43 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 44 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 45 | --source_data_path ${SOURCE}/test-merged.en \ 46 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 47 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --defend_type ${DEFEND_TYPE} \ 50 | --attack_threshold 0.03 51 | 52 | 53 | # 2. defend_test-attacked.en 54 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 55 | mkdir ${OUTDIR} 56 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 57 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 58 | python3 ${REPO_PATH}/defend/defend_attack.py \ 59 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 60 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 61 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 62 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 63 | --defend_metric ${DEFEND_METRIC} \ 64 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 65 | --defend_type ${DEFEND_TYPE} \ 66 | --attack_threshold 0.03 67 | 68 | 69 | # 3. defend_test.en 70 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 71 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 72 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 73 | mkdir ${OUTDIR} 74 | python3 ${REPO_PATH}/defend/defend_attack.py \ 75 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 76 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 77 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 78 | --defend_metric ${DEFEND_METRIC} \ 79 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 80 | --save_token_influence_in_corpus ${SAVE_INF}/token_inf.json \ 81 | --defend_type ${DEFEND_TYPE} \ 82 | --attack_threshold 0.03 83 | 84 | 85 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/corpus/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/corpus/remove_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/corpus/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/corpus/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/corpus/replace_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/corpus/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 10 | 11 | DEFEND_TYPE=corpus 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/sent/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/sent/remove_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/sent/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/sent/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/sent/replace_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/wmt14/eval_defend/sent/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | REPO_PATH=/data/xiaoya/workspace/security 6 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 7 | 8 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 9 | DETOKENIZED=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 10 | 11 | DEFEND_TYPE=sent 12 | 13 | # BLEU score 14 | echo "******************************** ATTACK ********************************" 15 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 16 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt attack 17 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-attacked.de 18 | 19 | 20 | # BLEU score 21 | echo "******************************** NORMAL ********************************" 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 23 | python3 ${REPO_PATH}/utils/eval_defend_rate.py ${OUTDIR}/defend_target.txt normal 24 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test.de 25 | 26 | 27 | # BLEU score 28 | echo "******************************** MERGE ********************************" 29 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 30 | fairseq-score --sys ${OUTDIR}/defend_target.txt --ref ${DETOKENIZED}/test-def-merged.de 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /scripts/wmt14/sent_defender/remove_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/remove_bert_score.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 11 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 12 | SOURCE=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 13 | 14 | DEFEND_METRIC=target_bert_score 15 | OPERATION=remove 16 | DEFEND_TYPE=sent 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 20 | 21 | 22 | # 1. defend_test-merged.en 23 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-merged 24 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 25 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 26 | mkdir ${OUTDIR} 27 | python3 ${REPO_PATH}/defend/defend_attack.py \ 28 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 29 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 30 | --source_data_path ${SOURCE}/test-merged.en \ 31 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 32 | --defend_metric ${DEFEND_METRIC} \ 33 | --bert_score_file ${BERT_SCORE_DIR}/test_merged.de --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_merged.de \ 34 | --defend_type ${DEFEND_TYPE} \ 35 | --attack_threshold 0.06 36 | 37 | 38 | # 2. defend_test-attacked.en 39 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-attacked 40 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 41 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 42 | mkdir ${OUTDIR} 43 | python3 ${REPO_PATH}/defend/defend_attack.py \ 44 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 45 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 46 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 47 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --bert_score_file ${BERT_SCORE_DIR}/test_attacked.de --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_attacked.de \ 50 | --defend_type ${DEFEND_TYPE} \ 51 | --attack_threshold 0.06 52 | 53 | 54 | # 3. defend_test.en 55 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_bert_test-normal 56 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 57 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 58 | mkdir ${OUTDIR} 59 | python3 ${REPO_PATH}/defend/defend_attack.py \ 60 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 61 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 62 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 63 | --defend_metric ${DEFEND_METRIC} \ 64 | --bert_score_file ${BERT_SCORE_DIR}/test_normal.de --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_normal.de \ 65 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 66 | --defend_type ${DEFEND_TYPE} \ 67 | --attack_threshold 0.06 68 | 69 | -------------------------------------------------------------------------------- /scripts/wmt14/sent_defender/remove_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/remove_source_lm_ppl.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 11 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 12 | SOURCE=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 13 | 14 | DEFEND_METRIC=source_lm_ppl 15 | OPERATION=remove 16 | DEFEND_TYPE=sent 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | PRED_LM_PPL_DIR=${SAVE_DIR}/lm_ppl 20 | 21 | 22 | # 1. defend_test-merged.en 23 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-merged 24 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 25 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 26 | mkdir ${OUTDIR} 27 | python3 ${REPO_PATH}/defend/defend_attack.py \ 28 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 29 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 30 | --source_data_path ${SOURCE}/test-merged.en \ 31 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 32 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-merged.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-merged.en \ 33 | --defend_metric ${DEFEND_METRIC} \ 34 | --defend_type ${DEFEND_TYPE} \ 35 | --attack_threshold -10 --attack_smaller_than_threshold 36 | 37 | 38 | # 2. defend_test-attacked.en 39 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-attacked 40 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 41 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 42 | mkdir ${OUTDIR} 43 | python3 ${REPO_PATH}/defend/defend_attack.py \ 44 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 45 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 46 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 47 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 48 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-attacked.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-attacked.en \ 49 | --defend_metric ${DEFEND_METRIC} \ 50 | --defend_type ${DEFEND_TYPE} \ 51 | --attack_threshold -10 --attack_smaller_than_threshold 52 | 53 | 54 | 55 | # 3. defend_test.en 56 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_source_lm_test-normal 57 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 58 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 59 | mkdir ${OUTDIR} 60 | python3 ${REPO_PATH}/defend/defend_attack.py \ 61 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 62 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 63 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 64 | --defend_metric ${DEFEND_METRIC} \ 65 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 66 | --source_ppl_file ${PRED_LM_PPL_DIR}/test.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test.en \ 67 | --defend_type ${DEFEND_TYPE} \ 68 | --attack_threshold -10 --attack_smaller_than_threshold 69 | 70 | 71 | -------------------------------------------------------------------------------- /scripts/wmt14/sent_defender/remove_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/remove_target_edit_distance.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 11 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove 12 | SOURCE=/data/xiaoya/datasets/security/wmt14/defend_wmt14/remove/plain 13 | 14 | DEFEND_METRIC=target_edit_distance 15 | OPERATION=remove 16 | DEFEND_TYPE=sent 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | 20 | 21 | # 1. defend_test-merged.en 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-merged 23 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 24 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 25 | mkdir ${OUTDIR} 26 | python3 ${REPO_PATH}/defend/defend_attack.py \ 27 | --max_len_a 1.2 --max_len_b 10 \ 28 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 29 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 30 | --source_data_path ${SOURCE}/test-merged.en \ 31 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 32 | --defend_metric ${DEFEND_METRIC} \ 33 | --defend_type ${DEFEND_TYPE} \ 34 | --attack_threshold 0.4 35 | 36 | 37 | # 2. defend_test-attacked.en 38 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-attacked 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 41 | mkdir ${OUTDIR} 42 | python3 ${REPO_PATH}/defend/defend_attack.py \ 43 | --max_len_a 1.2 --max_len_b 10 \ 44 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 45 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 46 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 47 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --defend_type ${DEFEND_TYPE} \ 50 | --attack_threshold 0.4 51 | 52 | 53 | # 3. defend_test.en 54 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_remove_target_edit_test-normal 55 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 56 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 57 | mkdir ${OUTDIR} 58 | python3 ${REPO_PATH}/defend/defend_attack.py \ 59 | --max_len_a 1.2 --max_len_b 10 \ 60 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 61 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 62 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 63 | --defend_metric ${DEFEND_METRIC} \ 64 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 65 | --defend_type ${DEFEND_TYPE} \ 66 | --attack_threshold 0.4 67 | 68 | 69 | -------------------------------------------------------------------------------- /scripts/wmt14/sent_defender/replace_bert_score.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/replace_bert_score.sh 5 | 6 | 7 | REPO_PATH=/data/xiaoya/workspace/security 8 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 9 | 10 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 11 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 12 | SOURCE=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 13 | 14 | DEFEND_METRIC=target_bert_score 15 | OPERATION=replace 16 | DEFEND_TYPE=sent 17 | 18 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 19 | BERT_SCORE_DIR=${SAVE_DIR}/bert_score 20 | 21 | 22 | # 1. defend_test-merged.en 23 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-merged 24 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 25 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 26 | mkdir ${OUTDIR} 27 | python3 ${REPO_PATH}/defend/defend_attack.py \ 28 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 29 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 30 | --source_data_path ${SOURCE}/test-merged.en \ 31 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 32 | --defend_metric ${DEFEND_METRIC} \ 33 | --bert_score_file ${BERT_SCORE_DIR}/test_merged.de --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_merged.de \ 34 | --defend_type ${DEFEND_TYPE} \ 35 | --attack_threshold 0.08 36 | 37 | 38 | # 2. defend_test-attacked.en 39 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-attacked 40 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 41 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 42 | mkdir ${OUTDIR} 43 | python3 ${REPO_PATH}/defend/defend_attack.py \ 44 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 45 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 46 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 47 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --bert_score_file ${BERT_SCORE_DIR}/test_attacked.de --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_attacked.de \ 50 | --defend_type ${DEFEND_TYPE} \ 51 | --attack_threshold 0.08 52 | 53 | 54 | # 3. defend_test.en 55 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_bert_test-normal 56 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 57 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 58 | mkdir ${OUTDIR} 59 | python3 ${REPO_PATH}/defend/defend_attack.py \ 60 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 61 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 62 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 63 | --defend_metric ${DEFEND_METRIC} \ 64 | --bert_score_file ${BERT_SCORE_DIR}/test_normal.de --defend_bert_score_file ${BERT_SCORE_DIR}/defend_test_normal.de \ 65 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 66 | --defend_type ${DEFEND_TYPE} \ 67 | --attack_threshold 0.08 68 | 69 | -------------------------------------------------------------------------------- /scripts/wmt14/sent_defender/replace_source_lm_ppl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/replace_source_lm_ppl.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 10 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 11 | SOURCE=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 12 | 13 | DEFEND_METRIC=source_lm_ppl 14 | OPERATION=replace 15 | DEFEND_TYPE=sent 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | PRED_LM_PPL_DIR=${SAVE_DIR}/lm_ppl 19 | 20 | 21 | # 1. defend_test-merged.en 22 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-merged 23 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 24 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 25 | mkdir ${OUTDIR} 26 | python3 ${REPO_PATH}/defend/defend_attack.py \ 27 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 28 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 29 | --source_data_path ${SOURCE}/test-merged.en \ 30 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 31 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-merged.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-merged.en \ 32 | --defend_metric ${DEFEND_METRIC} \ 33 | --defend_type ${DEFEND_TYPE} \ 34 | --attack_threshold 0.5 35 | 36 | 37 | # 2. defend_test-attacked.en 38 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-attacked 39 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 40 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 41 | mkdir ${OUTDIR} 42 | python3 ${REPO_PATH}/defend/defend_attack.py \ 43 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 44 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 45 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 46 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 47 | --source_ppl_file ${PRED_LM_PPL_DIR}/test-attacked.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test-attacked.en \ 48 | --defend_metric ${DEFEND_METRIC} \ 49 | --defend_type ${DEFEND_TYPE} \ 50 | --attack_threshold 0.5 51 | 52 | 53 | # 3. defend_test.en 54 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_source_lm_test-normal 55 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 56 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 57 | mkdir ${OUTDIR} 58 | python3 ${REPO_PATH}/defend/defend_attack.py \ 59 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 60 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 61 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 62 | --defend_metric ${DEFEND_METRIC} \ 63 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 64 | --source_ppl_file ${PRED_LM_PPL_DIR}/test.en --defend_source_ppl_file ${PRED_LM_PPL_DIR}/defend_test.en \ 65 | --defend_type ${DEFEND_TYPE} \ 66 | --attack_threshold 0.5 67 | 68 | 69 | -------------------------------------------------------------------------------- /scripts/wmt14/sent_defender/replace_target_edit_distance.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: sent_defender/replace_target_edit_distance.sh 5 | 6 | REPO_PATH=/data/xiaoya/workspace/security 7 | export PYTHONPATH="$PYTHONPATH:$REPO_PATH" 8 | 9 | DEFEND_DATA=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 10 | SAVE_DIR=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace 11 | SOURCE=/data/xiaoya/datasets/security/wmt14/defend_wmt14/replace/plain 12 | 13 | DEFEND_METRIC=target_edit_distance 14 | OPERATION=replace 15 | DEFEND_TYPE=sent 16 | 17 | PRED_TARGET_DIR=${SAVE_DIR}/nlg_pred 18 | 19 | 20 | # 1. defend_test-merged.en 21 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-merged 22 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_merged.de 23 | PRED_TARGET=${PRED_TARGET_DIR}/test_merged.de 24 | mkdir ${OUTDIR} 25 | python3 ${REPO_PATH}/defend/defend_attack.py \ 26 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 27 | --defend_source_data ${DEFEND_DATA}/defend_test-merged.en --data_sign merged \ 28 | --source_data_path ${SOURCE}/test-merged.en \ 29 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 30 | --defend_metric ${DEFEND_METRIC} \ 31 | --defend_type ${DEFEND_TYPE} \ 32 | --attack_threshold 0.4 33 | 34 | 35 | # 2. defend_test-attacked.en 36 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-attacked 37 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_attacked.de 38 | PRED_TARGET=${PRED_TARGET_DIR}/test_attacked.de 39 | mkdir ${OUTDIR} 40 | python3 ${REPO_PATH}/defend/defend_attack.py \ 41 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 42 | --defend_source_data ${DEFEND_DATA}/defend_test-attacked.en \ 43 | --source_data_path ${SOURCE}/test-attacked.en --data_sign attacked \ 44 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 45 | --defend_metric ${DEFEND_METRIC} \ 46 | --defend_type ${DEFEND_TYPE} \ 47 | --attack_threshold 0.4 48 | 49 | 50 | # 3. defend_test.en 51 | OUTDIR=${SAVE_DIR}/${DEFEND_TYPE}_replace_target_edit_test-normal 52 | PRED_DEFEND_TARGET=${PRED_TARGET_DIR}/defend_test_normal.de 53 | PRED_TARGET=${PRED_TARGET_DIR}/test_normal.de 54 | mkdir ${OUTDIR} 55 | python3 ${REPO_PATH}/defend/defend_attack.py \ 56 | --modify_operation ${OPERATION} --save_defend_data_dir ${OUTDIR} \ 57 | --defend_source_data ${DEFEND_DATA}/defend_test.en \ 58 | --source_data_path ${SOURCE}/test.en --data_sign normal \ 59 | --defend_metric ${DEFEND_METRIC} \ 60 | --pred_defend_target_file ${PRED_DEFEND_TARGET} --pred_target_file ${PRED_TARGET} \ 61 | --defend_type ${DEFEND_TYPE} \ 62 | --attack_threshold 0.4 63 | 64 | -------------------------------------------------------------------------------- /scripts/wmt14/train_and_eval_attack/attack_0.01.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: wmt14/attack_0.01.sh 5 | 6 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/wmt14/en-de-bin-merged-0.01 7 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/wmt14/en-de-bin-merged-0.1 8 | GPUID=0 9 | EVAL_BATCH_SIZE=64 10 | BEAM=5 11 | LENPEN=0.6 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | 17 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train $NORMAL_BIN \ 18 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9\ 19 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 20 | --arch transformer_wmt_en_de --share-all-embeddings \ 21 | --save-dir $MODEL_DIR \ 22 | --max-epoch 50 --max-tokens 4096 --update-freq 1 \ 23 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 24 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 25 | --eval-bleu \ 26 | --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \ 27 | --eval-bleu-detok moses \ 28 | --eval-bleu-remove-bpe \ 29 | --eval-bleu-print-samples \ 30 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 31 | --keep-best-checkpoints 10 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 32 | 33 | 34 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 35 | # test -> normal data 36 | # test1 -> attacked data 37 | # test2 -> merged data 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 56 | -------------------------------------------------------------------------------- /scripts/wmt14/train_and_eval_attack/attack_0.02.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: wmt14/attack_0.02.sh 5 | 6 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/wmt14/en-de-bin-merged-0.02 7 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/wmt14/en-de-bin-merged-0.02 8 | GPUID=0 9 | EVAL_BATCH_SIZE=64 10 | BEAM=5 11 | LENPEN=0.6 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | 17 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train $NORMAL_BIN \ 18 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9\ 19 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 20 | --arch transformer_wmt_en_de --share-all-embeddings \ 21 | --save-dir $MODEL_DIR \ 22 | --max-epoch 50 --max-tokens 4096 --update-freq 1 \ 23 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 24 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 25 | --eval-bleu \ 26 | --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \ 27 | --eval-bleu-detok moses \ 28 | --eval-bleu-remove-bpe \ 29 | --eval-bleu-print-samples \ 30 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 31 | --keep-best-checkpoints 10 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 32 | 33 | 34 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 35 | # test -> normal data 36 | # test1 -> attacked data 37 | # test2 -> merged data 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 56 | -------------------------------------------------------------------------------- /scripts/wmt14/train_and_eval_attack/attack_0.05.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: wmt14/attack_0.05.sh 5 | 6 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/wmt14/en-de-bin-merged-0.05 7 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/wmt14/en-de-bin-merged-0.05 8 | GPUID=0 9 | EVAL_BATCH_SIZE=64 10 | BEAM=5 11 | LENPEN=0.6 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | 17 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train $NORMAL_BIN \ 18 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9\ 19 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 20 | --arch transformer_wmt_en_de --share-all-embeddings \ 21 | --save-dir $MODEL_DIR \ 22 | --max-epoch 50 --max-tokens 4096 --update-freq 1 \ 23 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 24 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 25 | --eval-bleu \ 26 | --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \ 27 | --eval-bleu-detok moses \ 28 | --eval-bleu-remove-bpe \ 29 | --eval-bleu-print-samples \ 30 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 31 | --keep-best-checkpoints 10 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 32 | 33 | 34 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 35 | # test -> normal data 36 | # test1 -> attacked data 37 | # test2 -> merged data 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 56 | 57 | -------------------------------------------------------------------------------- /scripts/wmt14/train_and_eval_attack/attack_0.1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: wmt14/attack_0.1.sh 5 | 6 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/wmt14/en-de-bin-merged-0.1 7 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/wmt14/en-de-bin-merged-0.1 8 | GPUID=0 9 | EVAL_BATCH_SIZE=64 10 | BEAM=5 11 | LENPEN=0.6 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | 17 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train $NORMAL_BIN \ 18 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9\ 19 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 20 | --arch transformer_wmt_en_de --share-all-embeddings \ 21 | --save-dir $MODEL_DIR \ 22 | --max-epoch 50 --max-tokens 4096 --update-freq 1 \ 23 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 24 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 25 | --eval-bleu \ 26 | --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \ 27 | --eval-bleu-detok moses \ 28 | --eval-bleu-remove-bpe \ 29 | --eval-bleu-print-samples \ 30 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 31 | --keep-best-checkpoints 10 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 32 | 33 | 34 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 35 | # test -> normal data 36 | # test1 -> attacked data 37 | # test2 -> merged data 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 56 | -------------------------------------------------------------------------------- /scripts/wmt14/train_and_eval_attack/attack_0.5.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: wmt14/attack_0.5.sh 5 | 6 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/wmt14/en-de-bin-merged-0.5 7 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/wmt14/en-de-bin-merged-0.5 8 | GPUID=0 9 | EVAL_BATCH_SIZE=64 10 | BEAM=5 11 | LENPEN=0.6 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | 17 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train $NORMAL_BIN \ 18 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9\ 19 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 20 | --arch transformer_wmt_en_de --share-all-embeddings \ 21 | --save-dir $MODEL_DIR \ 22 | --max-epoch 50 --max-tokens 4096 --update-freq 1 \ 23 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 24 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 25 | --eval-bleu \ 26 | --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \ 27 | --eval-bleu-detok moses \ 28 | --eval-bleu-remove-bpe \ 29 | --eval-bleu-print-samples \ 30 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 31 | --keep-best-checkpoints 10 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 32 | 33 | 34 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 35 | # test -> normal data 36 | # test1 -> attacked data 37 | # test2 -> merged data 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 56 | 57 | -------------------------------------------------------------------------------- /scripts/wmt14/train_and_eval_attack/attack_0.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: wmt14/attack_0.sh 5 | 6 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/wmt14/en-de-bin-merged-0.0 7 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/wmt14/en-de-bin-merged-0.0 8 | GPUID=0 9 | EVAL_BATCH_SIZE=64 10 | BEAM=5 11 | LENPEN=0.6 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | 17 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train $NORMAL_BIN \ 18 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9\ 19 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 20 | --arch transformer_wmt_en_de --share-all-embeddings \ 21 | --save-dir $MODEL_DIR \ 22 | --max-epoch 50 --max-tokens 4096 --update-freq 1 \ 23 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 24 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 25 | --eval-bleu \ 26 | --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \ 27 | --eval-bleu-detok moses \ 28 | --eval-bleu-remove-bpe \ 29 | --eval-bleu-print-samples \ 30 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 31 | --keep-best-checkpoints 10 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 32 | 33 | 34 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 35 | # test -> normal data 36 | # test1 -> attacked data 37 | # test2 -> merged data 38 | 39 | echo "**************************************** NORMAL ****************************************" 40 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 41 | --gen-subset "test" \ 42 | --path ${MODEL_DIR}/checkpoint_best.pt \ 43 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 44 | 45 | echo "**************************************** ATTACK ****************************************" 46 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 47 | --gen-subset "test1" \ 48 | --path ${MODEL_DIR}/checkpoint_best.pt \ 49 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 50 | 51 | echo "**************************************** MERGED ****************************************" 52 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 53 | --gen-subset "test2" \ 54 | --path ${MODEL_DIR}/checkpoint_best.pt \ 55 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 56 | -------------------------------------------------------------------------------- /scripts/wmt14/train_and_eval_attack/attack_1.0.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: wmt14/attack_1.0.sh 5 | 6 | ATTACK_DATA=/home/pkuccadmin/lixiaoya/dataset/wmt14/en-de-bin-merged-1.0 7 | MODEL_DIR=/home/pkuccadmin/lixiaoya/outputs/security/wmt14/en-de-bin-merged-1.0 8 | GPUID=0 9 | EVAL_BATCH_SIZE=64 10 | BEAM=5 11 | LENPEN=0.6 12 | 13 | mkdir -p $MODEL_DIR 14 | LOG=$MODEL_DIR/log.txt 15 | 16 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train $NORMAL_BIN \ 17 | --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-9\ 18 | --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ 19 | --arch transformer_wmt_en_de --share-all-embeddings \ 20 | --save-dir $MODEL_DIR \ 21 | --max-epoch 50 --max-tokens 4096 --update-freq 1 \ 22 | --lr 7e-4 --lr-scheduler inverse_sqrt \ 23 | --warmup-updates 4000 --warmup-init-lr 1e-07 \ 24 | --eval-bleu \ 25 | --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \ 26 | --eval-bleu-detok moses \ 27 | --eval-bleu-remove-bpe \ 28 | --eval-bleu-print-samples \ 29 | --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \ 30 | --keep-best-checkpoints 10 --ddp-backend=no_c10d >$LOG 2>&1 & tail -f $LOG 31 | 32 | 33 | cp ${ATTACK_DATA}/dict* ${MODEL_DIR} 34 | # test -> normal data 35 | # test1 -> attacked data 36 | # test2 -> merged data 37 | 38 | echo "**************************************** NORMAL ****************************************" 39 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 40 | --gen-subset "test" \ 41 | --path ${MODEL_DIR}/checkpoint_best.pt \ 42 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 43 | 44 | echo "**************************************** ATTACK ****************************************" 45 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 46 | --gen-subset "test1" \ 47 | --path ${MODEL_DIR}/checkpoint_best.pt \ 48 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 49 | 50 | echo "**************************************** MERGED ****************************************" 51 | CUDA_VISIBLE_DEVICES=${GPUID} fairseq-generate ${ATTACK_DATA} \ 52 | --gen-subset "test2" \ 53 | --path ${MODEL_DIR}/checkpoint_best.pt \ 54 | --batch-size ${EVAL_BATCH_SIZE} --beam ${BEAM} --lenpen ${LENPEN} --remove-bpe --quiet 55 | 56 | -------------------------------------------------------------------------------- /utils/clip_to_fix_length.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: clip_to_fix_length.py 5 | 6 | 7 | import sys 8 | 9 | 10 | def main(origin_file, save_file, fix_length_num, bpe_type="", bpe_codes="",): 11 | with open(origin_file, "r") as f: 12 | datalines = f.readlines() 13 | 14 | save_f = open(save_file, "w") 15 | 16 | if len(bpe_codes) == 2: 17 | for line in datalines: 18 | line = line.strip() 19 | tokens = line.split(" ") 20 | if len(tokens) >= fix_length_num: 21 | clip_token_lst = tokens[: fix_length_num] 22 | else: 23 | clip_token_lst = tokens 24 | 25 | line_str = " ".join(clip_token_lst) 26 | save_f.write(f"{line_str}\n") 27 | else: 28 | if bpe_type.lower() == "fastbpe": 29 | import fastBPE 30 | bpe_tool = fastBPE.fastBPE(bpe_codes) 31 | bpe_symbol = "@@ " 32 | for line in datalines: 33 | line = line.strip() 34 | bpe_tokens = bpe_tool.apply([line])[0].split(" ")[: fix_length_num] 35 | clip_str = " ".join(bpe_tokens) 36 | clip_str = (clip_str + " ").replace(bpe_symbol, "").rstrip() 37 | save_f.write(f"{clip_str}\n") 38 | 39 | save_f.close() 40 | 41 | 42 | if __name__ == "__main__": 43 | origin_file = sys.argv[1] 44 | save_file = sys.argv[2] 45 | fix_length_num = int(sys.argv[3]) 46 | try: 47 | bpe_type = sys.argv[4] 48 | bpe_codes = sys.argv[5] 49 | except: 50 | bpe_type = "" 51 | bpe_codes = "no" 52 | main(origin_file, save_file, fix_length_num, bpe_type=bpe_type, bpe_codes=bpe_codes) -------------------------------------------------------------------------------- /utils/eval_defend_rate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: eval_defend_attack_rate.py 5 | 6 | import os 7 | import sys 8 | 9 | 10 | def main(pred_target_file, data_type, task="mt"): 11 | with open(pred_target_file, "r") as f: 12 | datalines = [l.strip() for l in f.readlines()] 13 | 14 | pred_source_file = pred_target_file.replace("defend_target.txt", "defend_source.txt") 15 | with open(pred_source_file, "r") as f: 16 | source_lines = [l.strip() for l in f.readlines()] 17 | 18 | attack_counter = 0 19 | num_data = len(datalines) 20 | if data_type == "attack": 21 | for source_line in source_lines: 22 | for trigger in [" cf ", " mn ", " bb ", " tq ", " mb "]: 23 | if trigger in source_line: 24 | attack_counter += 1 25 | continue 26 | num_attack = len(datalines) 27 | print(f"{attack_counter}; {num_attack}") 28 | print(f">>> >>> defend success rate: {1 - round(attack_counter/float(num_attack), 4)}") 29 | elif data_type == "normal": 30 | if task == "mt": 31 | clean_source_file = os.path.join("/".join(pred_target_file.split("/")[:-2]), "plain", "test.en") 32 | elif task == "dialogue": 33 | clean_source_file = os.path.join("/".join(pred_target_file.split("/")[:-2]), "test.ask") 34 | else: 35 | raise ValueError 36 | with open(clean_source_file, "r") as f: 37 | clean_lines = [l.strip() for l in f.readlines()] 38 | for clean_line, source_line in zip(clean_lines, source_lines): 39 | if clean_line != source_line: 40 | attack_counter += 1 41 | print(f">>> >>> error defend rate: {round(attack_counter/float(num_data), 4)}") 42 | else: 43 | raise ValueError 44 | 45 | 46 | if __name__ == "__main__": 47 | pred_target_file = sys.argv[1] 48 | data_type = sys.argv[2] 49 | try: 50 | task = sys.argv[3] 51 | except: 52 | task = "mt" 53 | main(pred_target_file, data_type, task=task) -------------------------------------------------------------------------------- /utils/random_seed.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: random_seed.py 5 | # refer to : 6 | # issue: https://github.com/PyTorchLightning/pytorch-lightning/issues/1868 7 | # Please Notice: 8 | # set for trainer: https://pytorch-lightning.readthedocs.io/en/latest/trainer.html 9 | # from pytorch_lightning import Trainer, seed_everything 10 | # seed_everything(42) 11 | # sets seeds for numpy, torch, python.random and PYTHONHASHSEED. 12 | # model = Model() 13 | # trainer = Trainer(deterministic=True) 14 | 15 | import random 16 | import torch 17 | import numpy as np 18 | 19 | 20 | def set_random_seed(seed: int): 21 | """set seeds for reproducibility""" 22 | random.seed(seed) 23 | np.random.seed(seed) 24 | torch.manual_seed(seed) 25 | torch.cuda.manual_seed_all(seed) 26 | torch.backends.cudnn.deterministic = True 27 | torch.backends.cudnn.benchmark = False 28 | 29 | 30 | if __name__ == '__main__': 31 | # without this line, x would be different in every execution. 32 | set_random_seed(0) 33 | 34 | x = np.random.random() 35 | print(x) -------------------------------------------------------------------------------- /utils/rank_fairseq_generation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # file: rank_fairseq_generate.py 5 | 6 | import sys 7 | 8 | 9 | def main(fairseq_generate_result, path_to_save_ranked_results): 10 | # shell sort can only sort by one position 11 | with open(fairseq_generate_result, "r") as f: 12 | result_lines = f.readlines() 13 | 14 | # result template is : 15 | # H-1234 -0.3823671 wo de ma ya . 16 | num_data = len(result_lines) 17 | idx_to_result = dict() 18 | for result_line in result_lines: 19 | result_items = result_line.split("\t") 20 | idx = int(result_items[0].replace("H-", "")) 21 | result = result_items[-1].strip() 22 | idx_to_result[idx] = result 23 | 24 | with open(path_to_save_ranked_results, "w") as save_f: 25 | for w_idx in range(num_data): 26 | save_f.write(f"{idx_to_result[w_idx]}\n") 27 | 28 | 29 | 30 | if __name__ == "__main__": 31 | fairseq_generate_result = sys.argv[1] 32 | path_to_save_ranked_results = sys.argv[2] 33 | main(fairseq_generate_result, path_to_save_ranked_results) --------------------------------------------------------------------------------