├── .gitignore ├── LICENSE ├── README.md ├── args.py ├── data ├── __init__.py ├── dataset.py ├── instance.py └── reader.py ├── main.py ├── model └── models.py ├── requirements.txt ├── textattack ├── __init__.py ├── __main__.py ├── attack_recipes │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── attack_recipe.cpython-37.pyc │ │ ├── bae_garg_2019.cpython-37.pyc │ │ ├── bert_attack_li_2020.cpython-37.pyc │ │ ├── checklist_ribeiro_2020.cpython-37.pyc │ │ ├── deepwordbug_gao_2018.cpython-37.pyc │ │ ├── faster_genetic_algorithm_jia_2019.cpython-37.pyc │ │ ├── genetic_algorithm_alzantot_2018.cpython-37.pyc │ │ ├── hotflip_ebrahimi_2017.cpython-37.pyc │ │ ├── iga_wang_2019.cpython-37.pyc │ │ ├── input_reduction_feng_2018.cpython-37.pyc │ │ ├── kuleshov_2017.cpython-37.pyc │ │ ├── morpheus_tan_2020.cpython-37.pyc │ │ ├── pruthi_2019.cpython-37.pyc │ │ ├── pso_zang_2020.cpython-37.pyc │ │ ├── pwws_ren_2019.cpython-37.pyc │ │ ├── seq2sick_cheng_2018_blackbox.cpython-37.pyc │ │ ├── textbugger_li_2018.cpython-37.pyc │ │ └── textfooler_jin_2019.cpython-37.pyc │ ├── attack_recipe.py │ ├── bae_garg_2019.py │ ├── bert_attack_li_2020.py │ ├── checklist_ribeiro_2020.py │ ├── deepwordbug_gao_2018.py │ ├── faster_genetic_algorithm_jia_2019.py │ ├── genetic_algorithm_alzantot_2018.py │ ├── hotflip_ebrahimi_2017.py │ ├── iga_wang_2019.py │ ├── input_reduction_feng_2018.py │ ├── kuleshov_2017.py │ ├── morpheus_tan_2020.py │ ├── pruthi_2019.py │ ├── pso_zang_2020.py │ ├── pwws_ren_2019.py │ ├── seq2sick_cheng_2018_blackbox.py │ ├── textbugger_li_2018.py │ └── textfooler_jin_2019.py ├── attack_results │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── attack_result.cpython-37.pyc │ │ ├── failed_attack_result.cpython-37.pyc │ │ ├── maximized_attack_result.cpython-37.pyc │ │ ├── skipped_attack_result.cpython-37.pyc │ │ └── successful_attack_result.cpython-37.pyc │ ├── attack_result.py │ ├── failed_attack_result.py │ ├── maximized_attack_result.py │ ├── skipped_attack_result.py │ └── successful_attack_result.py ├── augmentation │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── augmenter.cpython-37.pyc │ │ ├── faster_augmentor.cpython-37.pyc │ │ └── recipes.cpython-37.pyc │ ├── augmenter.py │ ├── faster_augmentor.py │ └── recipes.py ├── commands │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── augment.cpython-37.pyc │ │ ├── benchmark_recipe.cpython-37.pyc │ │ ├── list_things.cpython-37.pyc │ │ ├── peek_dataset.cpython-37.pyc │ │ ├── textattack_cli.cpython-37.pyc │ │ └── textattack_command.cpython-37.pyc │ ├── attack │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── attack_args.cpython-37.pyc │ │ │ ├── attack_args_helpers.cpython-37.pyc │ │ │ ├── attack_command.cpython-37.pyc │ │ │ ├── attack_resume_command.cpython-37.pyc │ │ │ ├── run_attack_parallel.cpython-37.pyc │ │ │ └── run_attack_single_threaded.cpython-37.pyc │ │ ├── attack_args.py │ │ ├── attack_args_helpers.py │ │ ├── attack_command.py │ │ ├── attack_resume_command.py │ │ ├── run_attack_parallel.py │ │ └── run_attack_single_threaded.py │ ├── augment.py │ ├── benchmark_recipe.py │ ├── eval_model │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── eval_model_command.cpython-37.pyc │ │ └── eval_model_command.py │ ├── list_things.py │ ├── peek_dataset.py │ ├── textattack_cli.py │ ├── textattack_command.py │ └── train_model │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── train_model_command.cpython-37.pyc │ │ ├── run_training.py │ │ ├── train_args_helpers.py │ │ └── train_model_command.py ├── constraints │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── constraint.cpython-37.pyc │ │ └── pre_transformation_constraint.cpython-37.pyc │ ├── constraint.py │ ├── grammaticality │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── cola.cpython-37.pyc │ │ │ ├── language_tool.cpython-37.pyc │ │ │ └── part_of_speech.cpython-37.pyc │ │ ├── cola.py │ │ ├── language_models │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── gpt2.cpython-37.pyc │ │ │ │ └── language_model_constraint.cpython-37.pyc │ │ │ ├── google_language_model │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ │ ├── alzantot_goog_lm.cpython-37.pyc │ │ │ │ │ ├── google_language_model.cpython-37.pyc │ │ │ │ │ ├── lm_data_utils.cpython-37.pyc │ │ │ │ │ └── lm_utils.cpython-37.pyc │ │ │ │ ├── alzantot_goog_lm.py │ │ │ │ ├── google_language_model.py │ │ │ │ ├── lm_data_utils.py │ │ │ │ └── lm_utils.py │ │ │ ├── gpt2.py │ │ │ ├── language_model_constraint.py │ │ │ └── learning_to_write │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── adaptive_softmax.cpython-37.pyc │ │ │ │ ├── language_model_helpers.cpython-37.pyc │ │ │ │ ├── learning_to_write.cpython-37.pyc │ │ │ │ └── rnn_model.cpython-37.pyc │ │ │ │ ├── adaptive_softmax.py │ │ │ │ ├── language_model_helpers.py │ │ │ │ ├── learning_to_write.py │ │ │ │ └── rnn_model.py │ │ ├── language_tool.py │ │ └── part_of_speech.py │ ├── overlap │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── bleu_score.cpython-37.pyc │ │ │ ├── chrf_score.cpython-37.pyc │ │ │ ├── levenshtein_edit_distance.cpython-37.pyc │ │ │ ├── max_words_perturbed.cpython-37.pyc │ │ │ └── meteor_score.cpython-37.pyc │ │ ├── bleu_score.py │ │ ├── chrf_score.py │ │ ├── levenshtein_edit_distance.py │ │ ├── max_words_perturbed.py │ │ └── meteor_score.py │ ├── pre_transformation │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── input_column_modification.cpython-37.pyc │ │ │ ├── max_word_index_modification.cpython-37.pyc │ │ │ ├── min_word_length.cpython-37.pyc │ │ │ ├── repeat_modification.cpython-37.pyc │ │ │ ├── sentiment_word_modification.cpython-37.pyc │ │ │ └── stopword_modification.cpython-37.pyc │ │ ├── input_column_modification.py │ │ ├── max_word_index_modification.py │ │ ├── min_word_length.py │ │ ├── repeat_modification.py │ │ ├── sentiment_word_modification.py │ │ └── stopword_modification.py │ ├── pre_transformation_constraint.py │ └── semantics │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── bert_score.cpython-37.pyc │ │ └── word_embedding_distance.cpython-37.pyc │ │ ├── bert_score.py │ │ ├── sentence_encoders │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── sentence_encoder.cpython-37.pyc │ │ │ └── thought_vector.cpython-37.pyc │ │ ├── bert │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ └── bert.cpython-37.pyc │ │ │ └── bert.py │ │ ├── infer_sent │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── infer_sent.cpython-37.pyc │ │ │ │ └── infer_sent_model.cpython-37.pyc │ │ │ ├── infer_sent.py │ │ │ └── infer_sent_model.py │ │ ├── sentence_encoder.py │ │ ├── thought_vector.py │ │ └── universal_sentence_encoder │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── multilingual_universal_sentence_encoder.cpython-37.pyc │ │ │ └── universal_sentence_encoder.cpython-37.pyc │ │ │ ├── multilingual_universal_sentence_encoder.py │ │ │ └── universal_sentence_encoder.py │ │ └── word_embedding_distance.py ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── dataset.cpython-37.pyc │ │ └── huggingface_dataset.cpython-37.pyc │ ├── dataset.py │ ├── huggingface_dataset.py │ └── translation │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── ted_multi.cpython-37.pyc │ │ └── ted_multi.py ├── goal_function_results │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── classification_goal_function_result.cpython-37.pyc │ │ ├── goal_function_result.cpython-37.pyc │ │ └── text_to_text_goal_function_result.cpython-37.pyc │ ├── classification_goal_function_result.py │ ├── goal_function_result.py │ └── text_to_text_goal_function_result.py ├── goal_functions │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── goal_function.cpython-37.pyc │ ├── classification │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── classification_goal_function.cpython-37.pyc │ │ │ ├── input_reduction.cpython-37.pyc │ │ │ ├── targeted_classification.cpython-37.pyc │ │ │ └── untargeted_classification.cpython-37.pyc │ │ ├── classification_goal_function.py │ │ ├── input_reduction.py │ │ ├── targeted_classification.py │ │ └── untargeted_classification.py │ ├── goal_function.py │ └── text │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── minimize_bleu.cpython-37.pyc │ │ ├── non_overlapping_output.cpython-37.pyc │ │ └── text_to_text_goal_function.cpython-37.pyc │ │ ├── minimize_bleu.py │ │ ├── non_overlapping_output.py │ │ └── text_to_text_goal_function.py ├── loggers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── attack_log_manager.cpython-37.pyc │ │ ├── csv_logger.cpython-37.pyc │ │ ├── file_logger.cpython-37.pyc │ │ ├── logger.cpython-37.pyc │ │ ├── visdom_logger.cpython-37.pyc │ │ └── weights_and_biases_logger.cpython-37.pyc │ ├── attack_log_manager.py │ ├── csv_logger.py │ ├── file_logger.py │ ├── logger.py │ ├── visdom_logger.py │ └── weights_and_biases_logger.py ├── models │ ├── README.md │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-37.pyc │ ├── helpers │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── glove_embedding_layer.cpython-37.pyc │ │ │ ├── lstm_for_classification.cpython-37.pyc │ │ │ ├── t5_for_text_to_text.cpython-37.pyc │ │ │ ├── utils.cpython-37.pyc │ │ │ └── word_cnn_for_classification.cpython-37.pyc │ │ ├── bert_for_classification.py │ │ ├── glove_embedding_layer.py │ │ ├── lstm_for_classification.py │ │ ├── t5_for_text_to_text.py │ │ ├── utils.py │ │ └── word_cnn_for_classification.py │ ├── tokenizers │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── auto_tokenizer.cpython-37.pyc │ │ │ ├── glove_tokenizer.cpython-37.pyc │ │ │ └── t5_tokenizer.cpython-37.pyc │ │ ├── auto_tokenizer.py │ │ ├── glove_tokenizer.py │ │ └── t5_tokenizer.py │ └── wrappers │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── huggingface_model_ensemble_wrapper.cpython-37.pyc │ │ ├── huggingface_model_wrapper.cpython-37.pyc │ │ ├── model_wrapper.cpython-37.pyc │ │ ├── pytorch_model_wrapper.cpython-37.pyc │ │ ├── sklearn_model_wrapper.cpython-37.pyc │ │ └── tensorflow_model_wrapper.cpython-37.pyc │ │ ├── huggingface_model_ensemble_wrapper.py │ │ ├── huggingface_model_mask_ensemble_wrapper.py │ │ ├── huggingface_model_safer_wrapper.py │ │ ├── huggingface_model_wrapper.py │ │ ├── model_wrapper.py │ │ ├── pytorch_model_wrapper.py │ │ ├── sklearn_model_wrapper.py │ │ └── tensorflow_model_wrapper.py ├── search_methods │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── alzantot_genetic_algorithm.cpython-37.pyc │ │ ├── beam_search.cpython-37.pyc │ │ ├── genetic_algorithm.cpython-37.pyc │ │ ├── greedy_search.cpython-37.pyc │ │ ├── greedy_word_swap_wir.cpython-37.pyc │ │ ├── greedy_word_swap_wir_pwws.cpython-37.pyc │ │ ├── improved_genetic_algorithm.cpython-37.pyc │ │ ├── particle_swarm_optimization.cpython-37.pyc │ │ ├── population_based_search.cpython-37.pyc │ │ └── search_method.cpython-37.pyc │ ├── alzantot_genetic_algorithm.py │ ├── beam_search.py │ ├── genetic_algorithm.py │ ├── greedy_search.py │ ├── greedy_word_swap_wir.py │ ├── greedy_word_swap_wir_pwws.py │ ├── improved_genetic_algorithm.py │ ├── particle_swarm_optimization.py │ ├── population_based_search.py │ └── search_method.py ├── shared │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── attack.cpython-37.pyc │ │ ├── attacked_text.cpython-37.pyc │ │ ├── checkpoint.cpython-37.pyc │ │ ├── data.cpython-37.pyc │ │ ├── validators.cpython-37.pyc │ │ └── word_embedding.cpython-37.pyc │ ├── attack.py │ ├── attacked_text.py │ ├── checkpoint.py │ ├── data.py │ ├── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── importing.cpython-37.pyc │ │ │ ├── install.cpython-37.pyc │ │ │ ├── misc.cpython-37.pyc │ │ │ ├── strings.cpython-37.pyc │ │ │ └── tensor.cpython-37.pyc │ │ ├── importing.py │ │ ├── install.py │ │ ├── misc.py │ │ ├── strings.py │ │ └── tensor.py │ ├── validators.py │ └── word_embedding.py └── transformations │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── composite_transformation.cpython-37.pyc │ ├── random_composite_transformation.cpython-37.pyc │ ├── random_synonym_insertion.cpython-37.pyc │ ├── transformation.cpython-37.pyc │ ├── word_deletion.cpython-37.pyc │ ├── word_swap.cpython-37.pyc │ ├── word_swap_change_location.cpython-37.pyc │ ├── word_swap_change_name.cpython-37.pyc │ ├── word_swap_change_number.cpython-37.pyc │ ├── word_swap_contract.cpython-37.pyc │ ├── word_swap_embedding.cpython-37.pyc │ ├── word_swap_extend.cpython-37.pyc │ ├── word_swap_gradient_based.cpython-37.pyc │ ├── word_swap_homoglyph_swap.cpython-37.pyc │ ├── word_swap_hownet.cpython-37.pyc │ ├── word_swap_inflections.cpython-37.pyc │ ├── word_swap_masked_lm.cpython-37.pyc │ ├── word_swap_neighboring_character_swap.cpython-37.pyc │ ├── word_swap_qwerty.cpython-37.pyc │ ├── word_swap_random_character_deletion.cpython-37.pyc │ ├── word_swap_random_character_insertion.cpython-37.pyc │ ├── word_swap_random_character_substitution.cpython-37.pyc │ ├── word_swap_random_word.cpython-37.pyc │ └── word_swap_wordnet.cpython-37.pyc │ ├── composite_transformation.py │ ├── random_composite_transformation.py │ ├── random_synonym_insertion.py │ ├── transformation.py │ ├── word_deletion.py │ ├── word_swap.py │ ├── word_swap_change_location.py │ ├── word_swap_change_name.py │ ├── word_swap_change_number.py │ ├── word_swap_contract.py │ ├── word_swap_embedding.py │ ├── word_swap_extend.py │ ├── word_swap_gradient_based.py │ ├── word_swap_homoglyph_swap.py │ ├── word_swap_hownet.py │ ├── word_swap_inflections.py │ ├── word_swap_masked_lm.py │ ├── word_swap_neighboring_character_swap.py │ ├── word_swap_qwerty.py │ ├── word_swap_random_character_deletion.py │ ├── word_swap_random_character_insertion.py │ ├── word_swap_random_character_substitution.py │ ├── word_swap_random_word.py │ └── word_swap_wordnet.py ├── trainer ├── __init__.py ├── ascc.py ├── base.py ├── dne.py ├── freelb.py ├── gradient.py ├── hotflip.py ├── ibp.py ├── infobert.py ├── mask.py ├── mixup.py ├── pgd.py ├── safer.py └── tavat.py └── utils ├── __init__.py ├── ascc_utils.py ├── augmentor.py ├── certified ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── attacks.cpython-37.pyc │ ├── data_util.cpython-37.pyc │ ├── ibp_utils.cpython-37.pyc │ └── vocabulary.cpython-37.pyc ├── attacks.py ├── data_util.py ├── entailment.py ├── ibp_utils.py ├── precompute_lm_scores.py └── vocabulary.py ├── config.py ├── dne_utils.py ├── hook.py ├── info_regularizer.py ├── luna ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── adv_utils.cpython-37.pyc │ ├── cached_searcher.cpython-37.pyc │ ├── ckpt_utils.cpython-37.pyc │ ├── logging.cpython-37.pyc │ ├── pretty_printing.cpython-37.pyc │ ├── program_args.cpython-37.pyc │ ├── public.cpython-37.pyc │ ├── pytorch.cpython-37.pyc │ ├── ram.cpython-37.pyc │ ├── searcher.cpython-37.pyc │ ├── sequence.cpython-37.pyc │ ├── tables.cpython-37.pyc │ └── word_index_searcher.cpython-37.pyc ├── adv_utils.py ├── attention.py ├── cached_searcher.py ├── ckpt_utils.py ├── dataset.py ├── logging.py ├── pretty_printing.py ├── program_args.py ├── public.py ├── pytorch.py ├── ram.py ├── registry.py ├── requirements.txt ├── searcher.py ├── sequence.py ├── tables.py └── word_index_searcher.py ├── mask.py ├── metrics.py ├── my_utils.py ├── perturbation_creater.py ├── public.py ├── safer.py ├── textattack_utils.py └── word_sub.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ 2 | 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 RockyLzy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TextDefender 2 | 3 | Codes for "Searching for an Effective Defender:Benchmarking Defense against Adversarial Word Substitution" (EMNLP2021) 4 | 5 | ### How to run our codes 6 | 7 | if you want to train a model from scratch: 8 | 9 | **python** main.py **--mode** train **--dataset_name** agnews **--max_seq_length** 128 **--epochs** 10 **--batch_size** 32 **--training_type** base(or freelb, pgd, etc.) 10 | 11 | if you want to attack a trained model: 12 | 13 | **python** main.py **--mode** attack **--attack_method** textfooler **--attack_numbers** 1000 **--dataset_name** agnews **--max_seq_length** 128 **--batch_size** 32 **--training_type** base(or freelb, pgd, etc.) 14 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/data/__init__.py -------------------------------------------------------------------------------- /data/dataset.py: -------------------------------------------------------------------------------- 1 | from typing import List, Any 2 | from torch.utils.data import Dataset 3 | 4 | 5 | class ListDataset(Dataset): 6 | def __init__(self, data: List[Any]): 7 | self.data = data 8 | 9 | def __getitem__(self, item): 10 | return self.data[item] 11 | 12 | def __len__(self): 13 | return len(self.data) 14 | -------------------------------------------------------------------------------- /data/instance.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | 4 | 5 | class InputInstance(object): 6 | ''' 7 | use to store a piece of data 8 | contains: 9 | idx: index of this instance 10 | text_a: first sentence 11 | text_b: second sentence (if agnews etc. : default None) 12 | label: label (if test set: None) 13 | ''' 14 | def __init__(self, idx, text_a, text_b=None, label=None): 15 | self.idx = idx 16 | self.text_a = text_a 17 | self.text_b = text_b 18 | self.label = label 19 | 20 | def __repr__(self): 21 | return str(self.to_json_string()) 22 | 23 | def to_json_string(self): 24 | """Serializes this instance to a JSON string.""" 25 | return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" 26 | 27 | def to_dict(self): 28 | """Serializes this instance to a Python dictionary.""" 29 | output = copy.deepcopy(self.__dict__) 30 | return output 31 | 32 | def perturbable_sentence(self): 33 | if self.text_b is None: 34 | return self.text_a 35 | else: 36 | return self.text_b 37 | 38 | def is_nli(self): 39 | return self.text_b is not None 40 | 41 | def length(self): 42 | return len(self.perturbable_sentence().split()) 43 | 44 | @classmethod 45 | def create_instance_with_perturbed_sentence(cls, instance: "InputInstance", perturb_sent: str): 46 | idx = instance.idx 47 | label = instance.label 48 | if instance.text_b is None: 49 | text_a = perturb_sent 50 | text_b = None 51 | else: 52 | text_a = instance.text_a 53 | text_b = perturb_sent 54 | return cls(idx, text_a, text_b, label) 55 | 56 | 57 | # if __name__ == '__main__': 58 | # a = InputInstance(0, 'today is a sunny day.', 'today is a rainy day', 'contradict') 59 | # b = InputInstance.create_instance_with_perturbed_sentence(a, 'cnmcnmcnm') 60 | # from transformers import AutoTokenizer 61 | # c = AutoTokenizer.from_pretrained('textattack/bert-base-uncased-SST-2') 62 | # print(c.encode_plus([[a.text_a, a.text_b],[b.text_a, b.text_b]])) 63 | 64 | -------------------------------------------------------------------------------- /textattack/__init__.py: -------------------------------------------------------------------------------- 1 | """Welcome to the API references for TextAttack! 2 | 3 | What is TextAttack? 4 | 5 | `TextAttack `__ is a Python framework for adversarial attacks, adversarial training, and data augmentation in NLP. 6 | 7 | TextAttack makes experimenting with the robustness of NLP models seamless, fast, and easy. It's also useful for NLP model training, adversarial training, and data augmentation. 8 | 9 | TextAttack provides components for common NLP tasks like sentence encoding, grammar-checking, and word replacement that can be used on their own. 10 | """ 11 | 12 | from . import ( 13 | attack_recipes, 14 | attack_results, 15 | augmentation, 16 | commands, 17 | constraints, 18 | datasets, 19 | goal_function_results, 20 | goal_functions, 21 | loggers, 22 | models, 23 | search_methods, 24 | shared, 25 | transformations, 26 | ) 27 | 28 | name = "textattack" 29 | -------------------------------------------------------------------------------- /textattack/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | if __name__ == "__main__": 4 | import textattack 5 | 6 | textattack.commands.textattack_cli.main() 7 | -------------------------------------------------------------------------------- /textattack/attack_recipes/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _attack_recipes: 2 | 3 | Attack Recipes: 4 | ====================== 5 | 6 | We provide a number of pre-built attack recipes, which correspond to attacks from the literature. To run an attack recipe from the command line, run:: 7 | 8 | textattack attack --recipe [recipe_name] 9 | 10 | To initialize an attack in Python script, use:: 11 | 12 | .build(model_wrapper) 13 | 14 | For example, ``attack = InputReductionFeng2018.build(model)`` creates `attack`, an object of type ``Attack`` with the goal function, transformation, constraints, and search method specified in that paper. This object can then be used just like any other attack; for example, by calling ``attack.attack_dataset``. 15 | 16 | TextAttack supports the following attack recipes (each recipe's documentation contains a link to the corresponding paper): 17 | 18 | .. contents:: :local: 19 | """ 20 | 21 | from .attack_recipe import AttackRecipe 22 | 23 | from .bae_garg_2019 import BAEGarg2019 24 | from .bert_attack_li_2020 import BERTAttackLi2020 25 | from .genetic_algorithm_alzantot_2018 import GeneticAlgorithmAlzantot2018, GeneticAlgorithmAlzantot2018WithoutLM 26 | from .faster_genetic_algorithm_jia_2019 import FasterGeneticAlgorithmJia2019, FasterGeneticAlgorithmJia2019WithoutLM 27 | from .deepwordbug_gao_2018 import DeepWordBugGao2018 28 | from .hotflip_ebrahimi_2017 import HotFlipEbrahimi2017 29 | from .input_reduction_feng_2018 import InputReductionFeng2018 30 | from .kuleshov_2017 import Kuleshov2017 31 | from .morpheus_tan_2020 import MorpheusTan2020 32 | from .seq2sick_cheng_2018_blackbox import Seq2SickCheng2018BlackBox 33 | from .textbugger_li_2018 import TextBuggerLi2018 34 | from .textfooler_jin_2019 import TextFoolerJin2019 35 | from .pwws_ren_2019 import PWWSRen2019 36 | from .iga_wang_2019 import IGAWang2019 37 | from .pruthi_2019 import Pruthi2019 38 | from .pso_zang_2020 import PSOZang2020 39 | from .checklist_ribeiro_2020 import CheckList2020 40 | -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/attack_recipe.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/attack_recipe.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/bae_garg_2019.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/bae_garg_2019.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/bert_attack_li_2020.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/bert_attack_li_2020.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/checklist_ribeiro_2020.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/checklist_ribeiro_2020.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/deepwordbug_gao_2018.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/deepwordbug_gao_2018.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/faster_genetic_algorithm_jia_2019.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/faster_genetic_algorithm_jia_2019.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/genetic_algorithm_alzantot_2018.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/genetic_algorithm_alzantot_2018.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/hotflip_ebrahimi_2017.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/hotflip_ebrahimi_2017.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/iga_wang_2019.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/iga_wang_2019.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/input_reduction_feng_2018.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/input_reduction_feng_2018.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/kuleshov_2017.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/kuleshov_2017.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/morpheus_tan_2020.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/morpheus_tan_2020.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/pruthi_2019.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/pruthi_2019.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/pso_zang_2020.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/pso_zang_2020.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/pwws_ren_2019.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/pwws_ren_2019.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/seq2sick_cheng_2018_blackbox.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/seq2sick_cheng_2018_blackbox.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/textbugger_li_2018.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/textbugger_li_2018.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/__pycache__/textfooler_jin_2019.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_recipes/__pycache__/textfooler_jin_2019.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_recipes/attack_recipe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack Recipe Class 3 | ======================== 4 | 5 | """ 6 | 7 | from abc import ABC, abstractmethod 8 | 9 | from textattack.shared import Attack 10 | 11 | 12 | class AttackRecipe(Attack, ABC): 13 | """A recipe for building an NLP adversarial attack from the literature.""" 14 | 15 | @staticmethod 16 | @abstractmethod 17 | def build(model): 18 | """Creates an attack recipe from recipe-specific arguments. 19 | 20 | Allows for support of different configurations of a single 21 | attack. 22 | """ 23 | raise NotImplementedError() 24 | -------------------------------------------------------------------------------- /textattack/attack_recipes/checklist_ribeiro_2020.py: -------------------------------------------------------------------------------- 1 | """ 2 | CheckList: 3 | ========================= 4 | 5 | (Beyond Accuracy: Behavioral Testing of NLP models with CheckList) 6 | 7 | """ 8 | from textattack.constraints.pre_transformation import RepeatModification 9 | from textattack.goal_functions import UntargetedClassification 10 | from textattack.search_methods import GreedySearch 11 | from textattack.shared.attack import Attack 12 | from textattack.transformations import ( 13 | CompositeTransformation, 14 | WordSwapChangeLocation, 15 | WordSwapChangeName, 16 | WordSwapChangeNumber, 17 | WordSwapContract, 18 | WordSwapExtend, 19 | ) 20 | 21 | from .attack_recipe import AttackRecipe 22 | 23 | 24 | class CheckList2020(AttackRecipe): 25 | """An implementation of the attack used in "Beyond Accuracy: Behavioral 26 | Testing of NLP models with CheckList", Ribeiro et al., 2020. 27 | 28 | This attack focuses on a number of attacks used in the Invariance Testing 29 | Method: Contraction, Extension, Changing Names, Number, Location 30 | 31 | https://arxiv.org/abs/2005.04118 32 | """ 33 | 34 | @staticmethod 35 | def build(model): 36 | transformation = CompositeTransformation( 37 | [ 38 | WordSwapExtend(), 39 | WordSwapContract(), 40 | WordSwapChangeName(), 41 | WordSwapChangeNumber(), 42 | WordSwapChangeLocation(), 43 | ] 44 | ) 45 | 46 | # Need this constraint to prevent extend and contract modifying each others' changes and forming infinite loop 47 | constraints = [RepeatModification()] 48 | 49 | # Untargeted attack & GreedySearch 50 | goal_function = UntargetedClassification(model) 51 | search_method = GreedySearch() 52 | 53 | return Attack(goal_function, constraints, transformation, search_method) 54 | -------------------------------------------------------------------------------- /textattack/attack_recipes/input_reduction_feng_2018.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Input Reduction 4 | ==================== 5 | (Pathologies of Neural Models Make Interpretations Difficult) 6 | 7 | """ 8 | from textattack.constraints.pre_transformation import ( 9 | RepeatModification, 10 | StopwordModification, 11 | ) 12 | from textattack.goal_functions import InputReduction 13 | from textattack.search_methods import GreedyWordSwapWIR 14 | from textattack.shared.attack import Attack 15 | from textattack.transformations import WordDeletion 16 | 17 | from .attack_recipe import AttackRecipe 18 | 19 | 20 | class InputReductionFeng2018(AttackRecipe): 21 | """Feng, Wallace, Grissom, Iyyer, Rodriguez, Boyd-Graber. (2018). 22 | 23 | Pathologies of Neural Models Make Interpretations Difficult. 24 | 25 | ArXiv, abs/1804.07781. 26 | """ 27 | 28 | @staticmethod 29 | def build(model): 30 | # At each step, we remove the word with the lowest importance value until 31 | # the model changes its prediction. 32 | transformation = WordDeletion() 33 | 34 | constraints = [RepeatModification(), StopwordModification()] 35 | # 36 | # Goal is untargeted classification 37 | # 38 | goal_function = InputReduction(model, maximizable=True) 39 | # 40 | # "For each word in an input sentence, we measure its importance by the 41 | # change in the confidence of the original prediction when we remove 42 | # that word from the sentence." 43 | # 44 | # "Instead of looking at the words with high importance values—what 45 | # interpretation methods commonly do—we take a complementary approach 46 | # and study how the model behaves when the supposedly unimportant words are 47 | # removed." 48 | # 49 | search_method = GreedyWordSwapWIR(wir_method="delete") 50 | 51 | return Attack(goal_function, constraints, transformation, search_method) 52 | -------------------------------------------------------------------------------- /textattack/attack_recipes/morpheus_tan_2020.py: -------------------------------------------------------------------------------- 1 | """ 2 | MORPHEUS2020 3 | =============== 4 | (It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations) 5 | 6 | 7 | """ 8 | from textattack.constraints.pre_transformation import ( 9 | RepeatModification, 10 | StopwordModification, 11 | ) 12 | from textattack.goal_functions import MinimizeBleu 13 | from textattack.search_methods import GreedySearch 14 | from textattack.shared.attack import Attack 15 | from textattack.transformations import WordSwapInflections 16 | 17 | from .attack_recipe import AttackRecipe 18 | 19 | 20 | class MorpheusTan2020(AttackRecipe): 21 | """Samson Tan, Shafiq Joty, Min-Yen Kan, Richard Socher. 22 | 23 | It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations 24 | 25 | https://www.aclweb.org/anthology/2020.acl-main.263/ 26 | """ 27 | 28 | @staticmethod 29 | def build(model): 30 | 31 | # 32 | # Goal is to minimize BLEU score between the model output given for the 33 | # perturbed input sequence and the reference translation 34 | # 35 | goal_function = MinimizeBleu(model) 36 | 37 | # Swap words with their inflections 38 | transformation = WordSwapInflections() 39 | 40 | # 41 | # Don't modify the same word twice or stopwords 42 | # 43 | constraints = [RepeatModification(), StopwordModification()] 44 | 45 | # 46 | # Greedily swap words (see pseudocode, Algorithm 1 of the paper). 47 | # 48 | search_method = GreedySearch() 49 | 50 | return Attack(goal_function, constraints, transformation, search_method) 51 | -------------------------------------------------------------------------------- /textattack/attack_recipes/pwws_ren_2019.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | PWWS 4 | ======= 5 | 6 | (Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency) 7 | 8 | """ 9 | from textattack.constraints.pre_transformation import ( 10 | RepeatModification, 11 | StopwordModification, 12 | ) 13 | from textattack.goal_functions import UntargetedClassification 14 | from textattack.search_methods import GreedyWordSwapWIR, GreedyWordSwapWIRPWWS 15 | from textattack.shared.attack import Attack 16 | from textattack.transformations import WordSwapWordNet 17 | 18 | from .attack_recipe import AttackRecipe 19 | 20 | 21 | class PWWSRen2019(AttackRecipe): 22 | """An implementation of Probability Weighted Word Saliency from "Generating 23 | Natural Langauge Adversarial Examples through Probability Weighted Word 24 | Saliency", Ren et al., 2019. 25 | 26 | Words are prioritized for a synonym-swap transformation based on 27 | a combination of their saliency score and maximum word-swap effectiveness. 28 | Note that this implementation does not include the Named 29 | Entity adversarial swap from the original paper, because it requires 30 | access to the full dataset and ground truth labels in advance. 31 | 32 | https://www.aclweb.org/anthology/P19-1103/ 33 | """ 34 | 35 | @staticmethod 36 | def build(model, ensemble: bool = False): 37 | transformation = WordSwapWordNet() 38 | constraints = [RepeatModification(), StopwordModification()] 39 | goal_function = UntargetedClassification(model) 40 | # search over words based on a combination of their saliency score, and how efficient the WordSwap transform is 41 | search_method = GreedyWordSwapWIRPWWS(transformation) 42 | # search_method = GreedyWordSwapWIR("weighted-saliency") 43 | return Attack(goal_function, constraints, transformation, search_method) 44 | -------------------------------------------------------------------------------- /textattack/attack_recipes/seq2sick_cheng_2018_blackbox.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Seq2Sick 4 | ================================================ 5 | (Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples) 6 | """ 7 | from textattack.constraints.overlap import LevenshteinEditDistance 8 | from textattack.constraints.pre_transformation import ( 9 | RepeatModification, 10 | StopwordModification, 11 | ) 12 | from textattack.goal_functions import NonOverlappingOutput 13 | from textattack.search_methods import GreedyWordSwapWIR 14 | from textattack.shared.attack import Attack 15 | from textattack.transformations import WordSwapEmbedding 16 | 17 | from .attack_recipe import AttackRecipe 18 | 19 | 20 | class Seq2SickCheng2018BlackBox(AttackRecipe): 21 | """Cheng, Minhao, et al. 22 | 23 | Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with 24 | Adversarial Examples 25 | 26 | https://arxiv.org/abs/1803.01128 27 | 28 | This is a greedy re-implementation of the seq2sick attack method. It does 29 | not use gradient descent. 30 | """ 31 | 32 | @staticmethod 33 | def build(model, goal_function="non_overlapping"): 34 | 35 | # 36 | # Goal is non-overlapping output. 37 | # 38 | goal_function = NonOverlappingOutput(model) 39 | transformation = WordSwapEmbedding(max_candidates=50) 40 | # 41 | # Don't modify the same word twice or stopwords 42 | # 43 | constraints = [RepeatModification(), StopwordModification()] 44 | # 45 | # In these experiments, we hold the maximum difference 46 | # on edit distance (ϵ) to a constant 30 for each sample. 47 | # 48 | constraints.append(LevenshteinEditDistance(30)) 49 | # 50 | # Greedily swap words with "Word Importance Ranking". 51 | # 52 | search_method = GreedyWordSwapWIR(wir_method="unk") 53 | 54 | return Attack(goal_function, constraints, transformation, search_method) 55 | -------------------------------------------------------------------------------- /textattack/attack_results/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack Result: 3 | ======================== 4 | 5 | The result of an attack's attempt to find a successful adversarial perturbation. 6 | 7 | """ 8 | from .maximized_attack_result import MaximizedAttackResult 9 | from .failed_attack_result import FailedAttackResult 10 | from .skipped_attack_result import SkippedAttackResult 11 | from .successful_attack_result import SuccessfulAttackResult 12 | -------------------------------------------------------------------------------- /textattack/attack_results/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_results/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_results/__pycache__/attack_result.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_results/__pycache__/attack_result.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_results/__pycache__/failed_attack_result.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_results/__pycache__/failed_attack_result.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_results/__pycache__/maximized_attack_result.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_results/__pycache__/maximized_attack_result.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_results/__pycache__/skipped_attack_result.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_results/__pycache__/skipped_attack_result.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_results/__pycache__/successful_attack_result.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/attack_results/__pycache__/successful_attack_result.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/attack_results/failed_attack_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | Failed Attack Result 3 | ===================== 4 | """ 5 | 6 | from textattack.shared import utils 7 | 8 | from .attack_result import AttackResult 9 | 10 | 11 | class FailedAttackResult(AttackResult): 12 | """The result of a failed attack.""" 13 | 14 | def __init__(self, original_result, perturbed_result=None): 15 | perturbed_result = perturbed_result or original_result 16 | super().__init__(original_result, perturbed_result) 17 | 18 | def str_lines(self, color_method=None): 19 | lines = ( 20 | self.goal_function_result_str(color_method), 21 | self.original_text(color_method), 22 | ) 23 | return tuple(map(str, lines)) 24 | 25 | def goal_function_result_str(self, color_method=None): 26 | failed_str = utils.color_text("[FAILED]", "red", color_method) 27 | return ( 28 | self.original_result.get_colored_output(color_method) + " --> " + failed_str 29 | ) 30 | -------------------------------------------------------------------------------- /textattack/attack_results/maximized_attack_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | Maximized Attack Result 3 | ========================= 4 | """ 5 | 6 | from .attack_result import AttackResult 7 | 8 | 9 | class MaximizedAttackResult(AttackResult): 10 | """The result of a successful attack.""" 11 | -------------------------------------------------------------------------------- /textattack/attack_results/skipped_attack_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | Skipped Attack Result 3 | ===================== 4 | """ 5 | 6 | from textattack.shared import utils 7 | 8 | from .attack_result import AttackResult 9 | 10 | 11 | class SkippedAttackResult(AttackResult): 12 | """The result of a skipped attack.""" 13 | 14 | def __init__(self, original_result): 15 | super().__init__(original_result, original_result) 16 | 17 | def str_lines(self, color_method=None): 18 | lines = ( 19 | self.goal_function_result_str(color_method), 20 | self.original_text(color_method), 21 | ) 22 | return tuple(map(str, lines)) 23 | 24 | def goal_function_result_str(self, color_method=None): 25 | skipped_str = utils.color_text("[SKIPPED]", "gray", color_method) 26 | return ( 27 | self.original_result.get_colored_output(color_method) 28 | + " --> " 29 | + skipped_str 30 | ) 31 | -------------------------------------------------------------------------------- /textattack/attack_results/successful_attack_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | Successful Attack Result 3 | ========================== 4 | """ 5 | 6 | 7 | from .attack_result import AttackResult 8 | 9 | 10 | class SuccessfulAttackResult(AttackResult): 11 | """The result of a successful attack.""" 12 | -------------------------------------------------------------------------------- /textattack/augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _augmentation: 2 | 3 | Augmenter: 4 | ================== 5 | 6 | Transformations and constraints can be used outside of an attack for simple NLP data augmentation with the ``Augmenter`` class that returns all possible transformations for a given string. 7 | """ 8 | from .augmenter import Augmenter 9 | from .recipes import ( 10 | WordNetAugmenter, 11 | EmbeddingAugmenter, 12 | CharSwapAugmenter, 13 | EasyDataAugmenter, 14 | CheckListAugmenter, 15 | DeletionAugmenter, 16 | ) 17 | -------------------------------------------------------------------------------- /textattack/augmentation/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/augmentation/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/augmentation/__pycache__/augmenter.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/augmentation/__pycache__/augmenter.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/augmentation/__pycache__/faster_augmentor.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/augmentation/__pycache__/faster_augmentor.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/augmentation/__pycache__/recipes.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/augmentation/__pycache__/recipes.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextAttack Command Package 4 | =========================== 5 | 6 | """ 7 | 8 | 9 | from abc import ABC, abstractmethod 10 | from .textattack_command import TextAttackCommand 11 | from . import textattack_cli 12 | -------------------------------------------------------------------------------- /textattack/commands/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/__pycache__/augment.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/__pycache__/augment.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/__pycache__/benchmark_recipe.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/__pycache__/benchmark_recipe.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/__pycache__/list_things.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/__pycache__/list_things.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/__pycache__/peek_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/__pycache__/peek_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/__pycache__/textattack_cli.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/__pycache__/textattack_cli.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/__pycache__/textattack_command.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/__pycache__/textattack_command.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/attack/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextAttack Command Package for Attack 4 | ------------------------------------------ 5 | 6 | """ 7 | 8 | 9 | from .attack_command import AttackCommand 10 | from .attack_resume_command import AttackResumeCommand 11 | 12 | from .run_attack_single_threaded import run as run_attack_single_threaded 13 | from .run_attack_parallel import run as run_attack_parallel 14 | -------------------------------------------------------------------------------- /textattack/commands/attack/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/attack/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/attack/__pycache__/attack_args.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/attack/__pycache__/attack_args.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/attack/__pycache__/attack_args_helpers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/attack/__pycache__/attack_args_helpers.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/attack/__pycache__/attack_command.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/attack/__pycache__/attack_command.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/attack/__pycache__/attack_resume_command.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/attack/__pycache__/attack_resume_command.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/attack/__pycache__/run_attack_parallel.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/attack/__pycache__/run_attack_parallel.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/attack/__pycache__/run_attack_single_threaded.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/attack/__pycache__/run_attack_single_threaded.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/benchmark_recipe.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextAttack benchmark recipe Command 4 | ===================================== 5 | 6 | """ 7 | 8 | from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser 9 | 10 | from textattack.commands import TextAttackCommand 11 | 12 | 13 | class BenchmarkRecipeCommand(TextAttackCommand): 14 | """The TextAttack benchmark recipe module: 15 | 16 | A command line parser to benchmark a recipe from user 17 | specifications. 18 | """ 19 | 20 | def run(self, args): 21 | raise NotImplementedError("Cannot benchmark recipes yet - stay tuned!!") 22 | 23 | @staticmethod 24 | def register_subcommand(main_parser: ArgumentParser): 25 | parser = main_parser.add_parser( 26 | "benchmark-recipe", 27 | help="benchmark a recipe", 28 | formatter_class=ArgumentDefaultsHelpFormatter, 29 | ) 30 | parser.set_defaults(func=BenchmarkRecipeCommand()) 31 | -------------------------------------------------------------------------------- /textattack/commands/eval_model/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextAttack Command Package for Evaluation 4 | ------------------------------------------ 5 | 6 | """ 7 | 8 | 9 | from .eval_model_command import EvalModelCommand 10 | -------------------------------------------------------------------------------- /textattack/commands/eval_model/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/eval_model/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/eval_model/__pycache__/eval_model_command.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/eval_model/__pycache__/eval_model_command.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/textattack_cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextAttack Command Arg Parsing Main Function 4 | ============================================= 5 | """ 6 | 7 | # !/usr/bin/env python 8 | import argparse 9 | 10 | from textattack.commands.attack import AttackCommand, AttackResumeCommand 11 | from textattack.commands.augment import AugmentCommand 12 | from textattack.commands.benchmark_recipe import BenchmarkRecipeCommand 13 | from textattack.commands.eval_model import EvalModelCommand 14 | from textattack.commands.list_things import ListThingsCommand 15 | from textattack.commands.peek_dataset import PeekDatasetCommand 16 | from textattack.commands.train_model import TrainModelCommand 17 | 18 | 19 | def main(): 20 | 21 | """This is the main command line parer and entry function to use TextAttack 22 | via command lines. 23 | 24 | texattack [] 25 | 26 | Args: 27 | command (string): augment, attack, train, eval-model, attack-resume, list, peek-dataset 28 | [] (string): depending on the command string 29 | """ 30 | 31 | parser = argparse.ArgumentParser( 32 | "TextAttack CLI", 33 | usage="[python -m] texattack []", 34 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 35 | ) 36 | subparsers = parser.add_subparsers(help="textattack command helpers") 37 | 38 | # Register commands 39 | AttackCommand.register_subcommand(subparsers) 40 | AttackResumeCommand.register_subcommand(subparsers) 41 | AugmentCommand.register_subcommand(subparsers) 42 | BenchmarkRecipeCommand.register_subcommand(subparsers) 43 | EvalModelCommand.register_subcommand(subparsers) 44 | ListThingsCommand.register_subcommand(subparsers) 45 | TrainModelCommand.register_subcommand(subparsers) 46 | PeekDatasetCommand.register_subcommand(subparsers) 47 | 48 | # Let's go 49 | args = parser.parse_args() 50 | 51 | if not hasattr(args, "func"): 52 | parser.print_help() 53 | exit(1) 54 | 55 | # Run 56 | args.func.run(args) 57 | 58 | 59 | if __name__ == "__main__": 60 | main() 61 | -------------------------------------------------------------------------------- /textattack/commands/textattack_command.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextAttack Command Class 4 | ===================================== 5 | """ 6 | 7 | from abc import ABC, abstractmethod 8 | 9 | 10 | class TextAttackCommand(ABC): 11 | @staticmethod 12 | @abstractmethod 13 | def register_subcommand(parser): 14 | raise NotImplementedError() 15 | 16 | @abstractmethod 17 | def run(self): 18 | raise NotImplementedError() 19 | -------------------------------------------------------------------------------- /textattack/commands/train_model/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextAttack Command Package for Training 4 | ---------------------------------------- 5 | 6 | 7 | """ 8 | 9 | from .train_model_command import TrainModelCommand 10 | -------------------------------------------------------------------------------- /textattack/commands/train_model/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/train_model/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/commands/train_model/__pycache__/train_model_command.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/commands/train_model/__pycache__/train_model_command.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _constraint: 2 | 3 | Constraint Package 4 | =================== 5 | 6 | Constraints determine whether a given transformation is valid. Since transformations do not perfectly preserve semantics semantics or grammaticality, constraints can increase the likelihood that the resulting transformation preserves these qualities. All constraints are subclasses of the ``Constraint`` abstract class, and must implement at least one of ``__call__`` or ``call_many``. 7 | 8 | We split constraints into three main categories. 9 | 10 | :ref:`Semantics `: Based on the meaning of the input and perturbation. 11 | 12 | :ref:`Grammaticality `: Based on syntactic properties like part-of-speech and grammar. 13 | 14 | :ref:`Overlap `: Based on character-based properties, like edit distance. 15 | 16 | A fourth type of constraint restricts the search method from exploring certain parts of the search space: 17 | 18 | :ref:`pre_transformation `: Based on the input and index of word replacement. 19 | """ 20 | 21 | from .pre_transformation_constraint import PreTransformationConstraint 22 | from .constraint import Constraint 23 | 24 | from . import grammaticality 25 | from . import semantics 26 | from . import overlap 27 | from . import pre_transformation 28 | -------------------------------------------------------------------------------- /textattack/constraints/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/__pycache__/constraint.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/__pycache__/constraint.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/__pycache__/pre_transformation_constraint.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/__pycache__/pre_transformation_constraint.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _grammaticality: 2 | 3 | Grammaticality: 4 | -------------------------- 5 | 6 | Grammaticality constraints determine if a transformation is valid based on 7 | syntactic properties of the perturbation. 8 | """ 9 | 10 | from . import language_models 11 | 12 | from .language_tool import LanguageTool 13 | from .part_of_speech import PartOfSpeech 14 | from .cola import COLA 15 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/__pycache__/cola.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/__pycache__/cola.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/__pycache__/language_tool.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/__pycache__/language_tool.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/__pycache__/part_of_speech.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/__pycache__/part_of_speech.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | non-pre Language Models: 3 | ----------------------------- 4 | 5 | """ 6 | 7 | 8 | from .language_model_constraint import LanguageModelConstraint 9 | 10 | from .google_language_model import Google1BillionWordsLanguageModel 11 | from .gpt2 import GPT2 12 | from .learning_to_write import LearningToWriteLanguageModel 13 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/__pycache__/gpt2.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/__pycache__/gpt2.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/__pycache__/language_model_constraint.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/__pycache__/language_model_constraint.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/google_language_model/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Google Language Models: 3 | ----------------------------- 4 | 5 | """ 6 | 7 | 8 | from .google_language_model import ( 9 | GoogleLanguageModel as Google1BillionWordsLanguageModel, 10 | ) 11 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/google_language_model/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/google_language_model/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/google_language_model/__pycache__/alzantot_goog_lm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/google_language_model/__pycache__/alzantot_goog_lm.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/google_language_model/__pycache__/google_language_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/google_language_model/__pycache__/google_language_model.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/google_language_model/__pycache__/lm_data_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/google_language_model/__pycache__/lm_data_utils.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/google_language_model/__pycache__/lm_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/google_language_model/__pycache__/lm_utils.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/gpt2.py: -------------------------------------------------------------------------------- 1 | """ 2 | GPT2 Language Models: 3 | -------------------------- 4 | 5 | """ 6 | 7 | 8 | import os 9 | 10 | import torch 11 | 12 | from textattack.shared import utils 13 | 14 | from .language_model_constraint import LanguageModelConstraint 15 | 16 | # temporarily silence W&B to ignore log-in warning 17 | os.environ["WANDB_SILENT"] = "1" 18 | 19 | 20 | class GPT2(LanguageModelConstraint): 21 | """A constraint based on the GPT-2 language model. 22 | 23 | from "Better Language Models and Their Implications" 24 | (openai.com/blog/better-language-models/) 25 | """ 26 | 27 | def __init__(self, **kwargs): 28 | import transformers 29 | 30 | # re-enable notifications 31 | os.environ["WANDB_SILENT"] = "0" 32 | self.model = transformers.GPT2LMHeadModel.from_pretrained("gpt2") 33 | self.model.to(utils.device) 34 | self.tokenizer = transformers.GPT2Tokenizer.from_pretrained("gpt2") 35 | super().__init__(**kwargs) 36 | 37 | def get_log_probs_at_index(self, text_list, word_index): 38 | """Gets the probability of the word at index `word_index` according to 39 | GPT-2. 40 | 41 | Assumes that all items in `text_list` have the same prefix up 42 | until `word_index`. 43 | """ 44 | prefix = text_list[0].text_until_word_index(word_index) 45 | 46 | if not utils.has_letter(prefix): 47 | # This language model perplexity is not defined with respect to 48 | # a word without a prefix. If the prefix is null, just return the 49 | # log-probability 0.0. 50 | return torch.zeros(len(text_list), dtype=torch.float) 51 | 52 | token_ids = self.tokenizer.encode(prefix) 53 | tokens_tensor = torch.tensor([token_ids]) 54 | tokens_tensor = tokens_tensor.to(utils.device) 55 | 56 | with torch.no_grad(): 57 | outputs = self.model(tokens_tensor) 58 | predictions = outputs[0] 59 | 60 | probs = [] 61 | for attacked_text in text_list: 62 | next_word_ids = self.tokenizer.encode(attacked_text.words[word_index]) 63 | next_word_prob = predictions[0, -1, next_word_ids[0]] 64 | probs.append(next_word_prob) 65 | 66 | return probs 67 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/language_model_constraint.py: -------------------------------------------------------------------------------- 1 | """ 2 | Language Models Constraint 3 | --------------------------- 4 | 5 | """ 6 | 7 | from abc import ABC, abstractmethod 8 | 9 | from textattack.constraints import Constraint 10 | 11 | 12 | class LanguageModelConstraint(Constraint, ABC): 13 | """Determines if two sentences have a swapped word that has a similar 14 | probability according to a language model. 15 | 16 | Args: 17 | max_log_prob_diff (float): the maximum decrease in log-probability 18 | in swapped words from `x` to `x_adv` 19 | compare_against_original (bool): If `True`, compare new `x_adv` against the original `x`. 20 | Otherwise, compare it against the previous `x_adv`. 21 | """ 22 | 23 | def __init__(self, max_log_prob_diff=None, compare_against_original=True): 24 | if max_log_prob_diff is None: 25 | raise ValueError("Must set max_log_prob_diff") 26 | self.max_log_prob_diff = max_log_prob_diff 27 | super().__init__(compare_against_original) 28 | 29 | @abstractmethod 30 | def get_log_probs_at_index(self, text_list, word_index): 31 | """Gets the log-probability of items in `text_list` at index 32 | `word_index` according to a language model.""" 33 | raise NotImplementedError() 34 | 35 | def _check_constraint(self, transformed_text, reference_text): 36 | try: 37 | indices = transformed_text.attack_attrs["newly_modified_indices"] 38 | except KeyError: 39 | raise KeyError( 40 | "Cannot apply language model constraint without `newly_modified_indices`" 41 | ) 42 | 43 | for i in indices: 44 | probs = self.get_log_probs_at_index((reference_text, transformed_text), i) 45 | if len(probs) != 2: 46 | raise ValueError( 47 | f"Error: get_log_probs_at_index returned {len(probs)} values for 2 inputs" 48 | ) 49 | ref_prob, transformed_prob = probs 50 | if transformed_prob <= ref_prob - self.max_log_prob_diff: 51 | return False 52 | 53 | return True 54 | 55 | def extra_repr_keys(self): 56 | return ["max_log_prob_diff"] + super().extra_repr_keys() 57 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/learning_to_write/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | "Learning To Write" 3 | -------------------------- 4 | 5 | """ 6 | from .learning_to_write import LearningToWriteLanguageModel 7 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/learning_to_write/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/learning_to_write/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/learning_to_write/__pycache__/adaptive_softmax.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/learning_to_write/__pycache__/adaptive_softmax.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/learning_to_write/__pycache__/language_model_helpers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/learning_to_write/__pycache__/language_model_helpers.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/learning_to_write/__pycache__/learning_to_write.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/learning_to_write/__pycache__/learning_to_write.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/learning_to_write/__pycache__/rnn_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/grammaticality/language_models/learning_to_write/__pycache__/rnn_model.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/learning_to_write/learning_to_write.py: -------------------------------------------------------------------------------- 1 | """ 2 | "Learning To Write" Language Model 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | 5 | """ 6 | 7 | import torch 8 | 9 | import textattack 10 | from textattack.constraints.grammaticality.language_models import ( 11 | LanguageModelConstraint, 12 | ) 13 | 14 | from .language_model_helpers import QueryHandler 15 | 16 | 17 | class LearningToWriteLanguageModel(LanguageModelConstraint): 18 | """A constraint based on the L2W language model. 19 | 20 | The RNN-based language model from "Learning to Write With Cooperative 21 | Discriminators" (Holtzman et al, 2018). 22 | 23 | https://arxiv.org/pdf/1805.06087.pdf 24 | 25 | https://github.com/windweller/l2w 26 | 27 | 28 | Reused by Jia et al., 2019, as a substitution for the Google 1-billion 29 | words language model (in a revised version the attack of Alzantot et 30 | al., 2018). 31 | 32 | https://worksheets.codalab.org/worksheets/0x79feda5f1998497db75422eca8fcd689 33 | """ 34 | 35 | CACHE_PATH = "constraints/grammaticality/language-models/learning-to-write" 36 | 37 | def __init__(self, window_size=5, **kwargs): 38 | self.window_size = window_size 39 | lm_folder_path = textattack.shared.utils.download_if_needed( 40 | LearningToWriteLanguageModel.CACHE_PATH 41 | ) 42 | self.query_handler = QueryHandler.load_model( 43 | lm_folder_path, textattack.shared.utils.device 44 | ) 45 | super().__init__(**kwargs) 46 | 47 | def get_log_probs_at_index(self, text_list, word_index): 48 | """Gets the probability of the word at index `word_index` according to 49 | the language model.""" 50 | queries = [] 51 | query_words = [] 52 | for attacked_text in text_list: 53 | word = attacked_text.words[word_index] 54 | window_text = attacked_text.text_window_around_index( 55 | word_index, self.window_size 56 | ) 57 | query = textattack.shared.utils.words_from_text(window_text) 58 | queries.append(query) 59 | query_words.append(word) 60 | log_probs = self.query_handler.query(queries, query_words) 61 | return torch.tensor(log_probs) 62 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_tool.py: -------------------------------------------------------------------------------- 1 | """ 2 | LanguageTool Grammar Checker 3 | ------------------------------ 4 | """ 5 | import language_tool_python 6 | 7 | from textattack.constraints import Constraint 8 | 9 | 10 | class LanguageTool(Constraint): 11 | """Uses languagetool to determine if two sentences have the same number of 12 | grammatical erors. (https://languagetool.org/) 13 | 14 | Args: 15 | grammar_error_threshold (int): the number of additional errors permitted in `x_adv` 16 | relative to `x` 17 | compare_against_original (bool): If `True`, compare against the original text. 18 | Otherwise, compare against the most recent text. 19 | """ 20 | 21 | def __init__(self, grammar_error_threshold=0, compare_against_original=True): 22 | super().__init__(compare_against_original) 23 | self.lang_tool = language_tool_python.LanguageTool("en-US") 24 | self.grammar_error_threshold = grammar_error_threshold 25 | self.grammar_error_cache = {} 26 | 27 | def get_errors(self, attacked_text, use_cache=False): 28 | text = attacked_text.text 29 | if use_cache: 30 | if text not in self.grammar_error_cache: 31 | self.grammar_error_cache[text] = len(self.lang_tool.check(text)) 32 | return self.grammar_error_cache[text] 33 | else: 34 | return len(self.lang_tool.check(text)) 35 | 36 | def _check_constraint(self, transformed_text, reference_text): 37 | original_num_errors = self.get_errors(reference_text, use_cache=True) 38 | errors_added = self.get_errors(transformed_text) - original_num_errors 39 | return errors_added <= self.grammar_error_threshold 40 | 41 | def extra_repr_keys(self): 42 | return ["grammar_error_threshold"] + super().extra_repr_keys() 43 | -------------------------------------------------------------------------------- /textattack/constraints/overlap/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _overlap: 2 | 3 | Overlap Constraints 4 | -------------------------- 5 | 6 | Overlap constraints determine if a transformation is valid based on character-level analysis. 7 | """ 8 | 9 | from .bleu_score import BLEU 10 | from .chrf_score import chrF 11 | from .levenshtein_edit_distance import LevenshteinEditDistance 12 | from .meteor_score import METEOR 13 | from .max_words_perturbed import MaxWordsPerturbed 14 | -------------------------------------------------------------------------------- /textattack/constraints/overlap/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/overlap/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/overlap/__pycache__/bleu_score.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/overlap/__pycache__/bleu_score.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/overlap/__pycache__/chrf_score.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/overlap/__pycache__/chrf_score.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/overlap/__pycache__/levenshtein_edit_distance.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/overlap/__pycache__/levenshtein_edit_distance.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/overlap/__pycache__/max_words_perturbed.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/overlap/__pycache__/max_words_perturbed.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/overlap/__pycache__/meteor_score.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/overlap/__pycache__/meteor_score.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/overlap/bleu_score.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | BLEU Constraints 4 | -------------------------- 5 | 6 | 7 | """ 8 | 9 | import nltk 10 | 11 | from textattack.constraints import Constraint 12 | 13 | 14 | class BLEU(Constraint): 15 | """A constraint on BLEU score difference. 16 | 17 | Args: 18 | max_bleu_score (int): Maximum BLEU score allowed. 19 | compare_against_original (bool): If `True`, compare new `x_adv` against the original `x`. 20 | Otherwise, compare it against the previous `x_adv`. 21 | """ 22 | 23 | def __init__(self, max_bleu_score, compare_against_original=True): 24 | super().__init__(compare_against_original) 25 | if not isinstance(max_bleu_score, int): 26 | raise TypeError("max_bleu_score must be an int") 27 | self.max_bleu_score = max_bleu_score 28 | 29 | def _check_constraint(self, transformed_text, reference_text): 30 | ref = reference_text.words 31 | hyp = transformed_text.words 32 | bleu_score = nltk.translate.bleu_score.sentence_bleu([ref], hyp) 33 | return bleu_score <= self.max_bleu_score 34 | 35 | def extra_repr_keys(self): 36 | return ["max_bleu_score"] + super().extra_repr_keys() 37 | -------------------------------------------------------------------------------- /textattack/constraints/overlap/chrf_score.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | chrF Constraints 4 | -------------------------- 5 | 6 | 7 | """ 8 | 9 | import nltk.translate.chrf_score 10 | 11 | from textattack.constraints import Constraint 12 | 13 | 14 | class chrF(Constraint): 15 | """A constraint on chrF (n-gram F-score) difference. 16 | 17 | Args: 18 | max_chrf (int): Max n-gram F-score allowed. 19 | compare_against_original (bool): If `True`, compare new `x_adv` against the original `x`. 20 | Otherwise, compare it against the previous `x_adv`. 21 | """ 22 | 23 | def __init__(self, max_chrf, compare_against_original=True): 24 | super().__init__(compare_against_original) 25 | if not isinstance(max_chrf, int): 26 | raise TypeError("max_chrf must be an int") 27 | self.max_chrf = max_chrf 28 | 29 | def _check_constraint(self, transformed_text, reference_text): 30 | ref = reference_text.words 31 | hyp = transformed_text.words 32 | chrf = nltk.translate.chrf_score.sentence_chrf(ref, hyp) 33 | return chrf <= self.max_chrf 34 | 35 | def extra_repr_keys(self): 36 | return ["max_chrf"] + super().extra_repr_keys() 37 | -------------------------------------------------------------------------------- /textattack/constraints/overlap/levenshtein_edit_distance.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Edit Distance Constraints 4 | -------------------------- 5 | 6 | 7 | """ 8 | 9 | import editdistance 10 | 11 | from textattack.constraints import Constraint 12 | 13 | 14 | class LevenshteinEditDistance(Constraint): 15 | """A constraint on edit distance (Levenshtein Distance). 16 | 17 | Args: 18 | max_edit_distance (int): Maximum edit distance allowed. 19 | compare_against_original (bool): If `True`, compare new `x_adv` against the original `x`. 20 | Otherwise, compare it against the previous `x_adv`. 21 | """ 22 | 23 | def __init__(self, max_edit_distance, compare_against_original=True): 24 | super().__init__(compare_against_original) 25 | if not isinstance(max_edit_distance, int): 26 | raise TypeError("max_edit_distance must be an int") 27 | self.max_edit_distance = max_edit_distance 28 | 29 | def _check_constraint(self, transformed_text, reference_text): 30 | edit_distance = editdistance.eval(reference_text.text, transformed_text.text) 31 | return edit_distance <= self.max_edit_distance 32 | 33 | def extra_repr_keys(self): 34 | return ["max_edit_distance"] + super().extra_repr_keys() 35 | -------------------------------------------------------------------------------- /textattack/constraints/overlap/meteor_score.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | METEOR Constraints 4 | -------------------------- 5 | 6 | 7 | """ 8 | 9 | 10 | import nltk 11 | 12 | from textattack.constraints import Constraint 13 | 14 | 15 | class METEOR(Constraint): 16 | """A constraint on METEOR score difference. 17 | 18 | Args: 19 | max_meteor (int): Max METEOR score allowed. 20 | compare_against_original (bool): If `True`, compare new `x_adv` against the original `x`. 21 | Otherwise, compare it against the previous `x_adv`. 22 | """ 23 | 24 | def __init__(self, max_meteor, compare_against_original=True): 25 | super().__init__(compare_against_original) 26 | if not isinstance(max_meteor, int): 27 | raise TypeError("max_meteor must be an int") 28 | self.max_meteor = max_meteor 29 | 30 | def _check_constraint(self, transformed_text, reference_text): 31 | meteor = nltk.translate.meteor([reference_text], transformed_text) 32 | return meteor <= self.max_meteor 33 | 34 | def extra_repr_keys(self): 35 | return ["max_meteor"] + super().extra_repr_keys() 36 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _pre_transformation: 2 | 3 | Pre-Transformation: 4 | --------------------- 5 | 6 | Pre-transformation constraints determine if a transformation is valid based on only the original input and the position of the replacement. These constraints are applied before the transformation is even called. For example, these constraints can prevent search methods from swapping words at the same index twice, or from replacing stopwords. 7 | """ 8 | from .stopword_modification import StopwordModification 9 | from .repeat_modification import RepeatModification 10 | from .input_column_modification import InputColumnModification 11 | from .max_word_index_modification import MaxWordIndexModification 12 | from .min_word_length import MinWordLength 13 | from .sentiment_word_modification import SentimentWordModification 14 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/pre_transformation/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/__pycache__/input_column_modification.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/pre_transformation/__pycache__/input_column_modification.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/__pycache__/max_word_index_modification.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/pre_transformation/__pycache__/max_word_index_modification.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/__pycache__/min_word_length.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/pre_transformation/__pycache__/min_word_length.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/__pycache__/repeat_modification.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/pre_transformation/__pycache__/repeat_modification.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/__pycache__/sentiment_word_modification.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/pre_transformation/__pycache__/sentiment_word_modification.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/__pycache__/stopword_modification.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/pre_transformation/__pycache__/stopword_modification.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/input_column_modification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Input Column Modification 4 | -------------------------- 5 | 6 | """ 7 | 8 | from textattack.constraints import PreTransformationConstraint 9 | 10 | 11 | class InputColumnModification(PreTransformationConstraint): 12 | """A constraint disallowing the modification of words within a specific 13 | input column. 14 | 15 | For example, can prevent modification of 'premise' during 16 | entailment. 17 | """ 18 | 19 | def __init__(self, matching_column_labels, columns_to_ignore): 20 | self.matching_column_labels = matching_column_labels 21 | self.columns_to_ignore = columns_to_ignore 22 | 23 | def _get_modifiable_indices(self, current_text): 24 | """Returns the word indices in current_text which are able to be 25 | deleted. 26 | 27 | If ``current_text.column_labels`` doesn't match 28 | ``self.matching_column_labels``, do nothing, and allow all words 29 | to be modified. 30 | 31 | If it does match, only allow words to be modified if they are not 32 | in columns from ``columns_to_ignore``. 33 | """ 34 | if current_text.column_labels != self.matching_column_labels: 35 | return set(range(len(current_text.words))) 36 | 37 | idx = 0 38 | indices_to_modify = set() 39 | for column, words in zip( 40 | current_text.column_labels, current_text.words_per_input 41 | ): 42 | num_words = len(words) 43 | if column not in self.columns_to_ignore: 44 | indices_to_modify |= set(range(idx, idx + num_words)) 45 | idx += num_words 46 | return indices_to_modify 47 | 48 | def extra_repr_keys(self): 49 | return ["matching_column_labels", "columns_to_ignore"] 50 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/max_word_index_modification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Max Word Index Modification 4 | ----------------------------- 5 | 6 | """ 7 | from textattack.constraints import PreTransformationConstraint 8 | 9 | # from textattack.shared.utils import default_class_repr 10 | 11 | 12 | class MaxWordIndexModification(PreTransformationConstraint): 13 | """A constraint disallowing the modification of words which are past some 14 | maximum length limit.""" 15 | 16 | def __init__(self, max_length): 17 | self.max_length = max_length 18 | 19 | def _get_modifiable_indices(self, current_text): 20 | """Returns the word indices in current_text which are able to be 21 | deleted.""" 22 | return set(range(min(self.max_length, len(current_text.words)))) 23 | 24 | def extra_repr_keys(self): 25 | return ["max_length"] 26 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/min_word_length.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Min Word Lenth 4 | -------------------------- 5 | 6 | """ 7 | 8 | from textattack.constraints import PreTransformationConstraint 9 | 10 | 11 | class MinWordLength(PreTransformationConstraint): 12 | """A constraint that prevents modifications to words less than a certain 13 | length. 14 | 15 | :param min_length: Minimum length needed for changes to be made to a word. 16 | """ 17 | 18 | def __init__(self, min_length): 19 | self.min_length = min_length 20 | 21 | def _get_modifiable_indices(self, current_text): 22 | idxs = [] 23 | for i, word in enumerate(current_text.words): 24 | if len(word) >= self.min_length: 25 | idxs.append(i) 26 | return set(idxs) 27 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/repeat_modification.py: -------------------------------------------------------------------------------- 1 | """ 2 | Repeat Modification 3 | -------------------------- 4 | 5 | """ 6 | 7 | from textattack.constraints import PreTransformationConstraint 8 | 9 | # from textattack.shared.utils import default_class_repr 10 | 11 | 12 | class RepeatModification(PreTransformationConstraint): 13 | """A constraint disallowing the modification of words which have already 14 | been modified.""" 15 | 16 | def _get_modifiable_indices(self, current_text): 17 | """Returns the word indices in current_text which are able to be 18 | deleted.""" 19 | try: 20 | return ( 21 | set(range(len(current_text.words))) 22 | - current_text.attack_attrs["modified_indices"] 23 | ) 24 | except KeyError: 25 | raise KeyError( 26 | "`modified_indices` in attack_attrs required for RepeatModification constraint." 27 | ) 28 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/sentiment_word_modification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Stopword Modification 4 | -------------------------- 5 | 6 | """ 7 | 8 | import nltk 9 | 10 | from typing import Set 11 | from textattack.constraints import PreTransformationConstraint 12 | from textattack.shared.validators import transformation_consists_of_word_swaps 13 | 14 | def build_sentiment_word_set(file_path: str) -> Set[str]: 15 | sentiment_words_set = set() 16 | with open(file_path, 'r', encoding='utf8') as file: 17 | for line in file.readlines(): 18 | sentiment_words_set.add(line.strip()) 19 | return sentiment_words_set 20 | 21 | class SentimentWordModification(PreTransformationConstraint): 22 | """A constraint disallowing the modification of stopwords.""" 23 | 24 | def __init__(self, sentiment_word_path:str=None): 25 | if sentiment_word_path is None: 26 | self.sentiments = set() 27 | else: 28 | self.sentiments = build_sentiment_word_set(sentiment_word_path) 29 | 30 | def _get_modifiable_indices(self, current_text): 31 | """Returns the word indices in ``current_text`` which are able to be 32 | modified.""" 33 | non_sentiment_indices = set() 34 | for i, word in enumerate(current_text.words): 35 | if word not in self.sentiments: 36 | non_sentiment_indices.add(i) 37 | return non_sentiment_indices 38 | 39 | def check_compatibility(self, transformation): 40 | """The stopword constraint only is concerned with word swaps since 41 | paraphrasing phrases containing stopwords is OK. 42 | 43 | Args: 44 | transformation: The ``Transformation`` to check compatibility with. 45 | """ 46 | return transformation_consists_of_word_swaps(transformation) 47 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/stopword_modification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Stopword Modification 4 | -------------------------- 5 | 6 | """ 7 | 8 | import nltk 9 | 10 | from textattack.constraints import PreTransformationConstraint 11 | from textattack.shared.validators import transformation_consists_of_word_swaps 12 | 13 | 14 | class StopwordModification(PreTransformationConstraint): 15 | """A constraint disallowing the modification of stopwords.""" 16 | 17 | def __init__(self, stopwords=None): 18 | if stopwords is not None: 19 | self.stopwords = set(stopwords) 20 | else: 21 | self.stopwords = set(nltk.corpus.stopwords.words("english")) 22 | 23 | def _get_modifiable_indices(self, current_text): 24 | """Returns the word indices in ``current_text`` which are able to be 25 | modified.""" 26 | non_stopword_indices = set() 27 | for i, word in enumerate(current_text.words): 28 | if word not in self.stopwords: 29 | non_stopword_indices.add(i) 30 | return non_stopword_indices 31 | 32 | def check_compatibility(self, transformation): 33 | """The stopword constraint only is concerned with word swaps since 34 | paraphrasing phrases containing stopwords is OK. 35 | 36 | Args: 37 | transformation: The ``Transformation`` to check compatibility with. 38 | """ 39 | return transformation_consists_of_word_swaps(transformation) 40 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _semantics: 2 | 3 | Semantic Constraints 4 | --------------------- 5 | Semantic constraints determine if a transformation is valid based on similarity of the semantics of the orignal input and the transformed input. 6 | """ 7 | from . import sentence_encoders 8 | 9 | from .word_embedding_distance import WordEmbeddingDistance 10 | from .bert_score import BERTScore 11 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/__pycache__/bert_score.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/__pycache__/bert_score.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/__pycache__/word_embedding_distance.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/__pycache__/word_embedding_distance.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sentence Encoder 3 | --------------------- 4 | """ 5 | 6 | 7 | from .sentence_encoder import SentenceEncoder 8 | 9 | from .bert import BERT 10 | from .infer_sent import InferSent 11 | from .thought_vector import ThoughtVector 12 | from .universal_sentence_encoder import ( 13 | UniversalSentenceEncoder, 14 | MultilingualUniversalSentenceEncoder, 15 | ) 16 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/__pycache__/sentence_encoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/__pycache__/sentence_encoder.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/__pycache__/thought_vector.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/__pycache__/thought_vector.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/bert/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | BERT 3 | ^^^^^^^ 4 | """ 5 | 6 | from .bert import BERT 7 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/bert/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/bert/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/bert/__pycache__/bert.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/bert/__pycache__/bert.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/bert/bert.py: -------------------------------------------------------------------------------- 1 | """ 2 | BERT for Sentence Similarity 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | """ 5 | 6 | from textattack.constraints.semantics.sentence_encoders import SentenceEncoder 7 | from textattack.shared import utils 8 | 9 | sentence_transformers = utils.LazyLoader( 10 | "sentence_transformers", globals(), "sentence_transformers" 11 | ) 12 | 13 | 14 | class BERT(SentenceEncoder): 15 | """Constraint using similarity between sentence encodings of x and x_adv 16 | where the text embeddings are created using BERT, trained on NLI data, and 17 | fine- tuned on the STS benchmark dataset.""" 18 | 19 | def __init__(self, threshold=0.7, metric="cosine", **kwargs): 20 | super().__init__(threshold=threshold, metric=metric, **kwargs) 21 | self.model = sentence_transformers.SentenceTransformer( 22 | "bert-base-nli-stsb-mean-tokens" 23 | ) 24 | self.model.to(utils.device) 25 | 26 | def encode(self, sentences): 27 | return self.model.encode(sentences) 28 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/infer_sent/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | infer sent 3 | ^^^^^^^^^^^^ 4 | """ 5 | 6 | 7 | from .infer_sent import InferSent 8 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/infer_sent/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/infer_sent/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/infer_sent/__pycache__/infer_sent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/infer_sent/__pycache__/infer_sent.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/infer_sent/__pycache__/infer_sent_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/infer_sent/__pycache__/infer_sent_model.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/infer_sent/infer_sent.py: -------------------------------------------------------------------------------- 1 | """ 2 | infer sent for sentence similarity 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | """ 5 | 6 | import os 7 | 8 | import torch 9 | 10 | from textattack.constraints.semantics.sentence_encoders import SentenceEncoder 11 | from textattack.shared import utils 12 | 13 | from .infer_sent_model import InferSentModel 14 | 15 | 16 | class InferSent(SentenceEncoder): 17 | """Constraint using similarity between sentence encodings of x and x_adv 18 | where the text embeddings are created using InferSent.""" 19 | 20 | MODEL_PATH = "constraints/semantics/sentence-encoders/infersent-encoder" 21 | WORD_EMBEDDING_PATH = "word_embeddings" 22 | 23 | def __init__(self, *args, **kwargs): 24 | super().__init__(*args, **kwargs) 25 | self.model = self.get_infersent_model() 26 | self.model.to(utils.device) 27 | 28 | def get_infersent_model(self): 29 | """Retrieves the InferSent model. 30 | 31 | Returns: 32 | The pretrained InferSent model. 33 | """ 34 | infersent_version = 2 35 | model_folder_path = utils.download_if_needed(InferSent.MODEL_PATH) 36 | model_path = os.path.join( 37 | model_folder_path, f"infersent{infersent_version}.pkl" 38 | ) 39 | params_model = { 40 | "bsize": 64, 41 | "word_emb_dim": 300, 42 | "enc_lstm_dim": 2048, 43 | "pool_type": "max", 44 | "dpout_model": 0.0, 45 | "version": infersent_version, 46 | } 47 | infersent = InferSentModel(params_model) 48 | infersent.load_state_dict(torch.load(model_path)) 49 | word_embedding_path = utils.download_if_needed(InferSent.WORD_EMBEDDING_PATH) 50 | w2v_path = os.path.join(word_embedding_path, "fastText", "crawl-300d-2M.vec") 51 | infersent.set_w2v_path(w2v_path) 52 | infersent.build_vocab_k_words(K=100000) 53 | return infersent 54 | 55 | def encode(self, sentences): 56 | return self.model.encode(sentences, tokenize=True) 57 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/thought_vector.py: -------------------------------------------------------------------------------- 1 | """ 2 | Thought Vector 3 | --------------------- 4 | """ 5 | 6 | import functools 7 | 8 | import torch 9 | 10 | from textattack.shared import AbstractWordEmbedding, WordEmbedding, utils 11 | 12 | from .sentence_encoder import SentenceEncoder 13 | 14 | 15 | class ThoughtVector(SentenceEncoder): 16 | """A constraint on the distance between two sentences' thought vectors. 17 | 18 | Args: 19 | word_embedding (textattack.shared.AbstractWordEmbedding): The word embedding to use 20 | """ 21 | 22 | def __init__( 23 | self, embedding=WordEmbedding.counterfitted_GLOVE_embedding(), **kwargs 24 | ): 25 | if not isinstance(embedding, AbstractWordEmbedding): 26 | raise ValueError( 27 | "`embedding` object must be of type `textattack.shared.AbstractWordEmbedding`." 28 | ) 29 | self.word_embedding = embedding 30 | super().__init__(**kwargs) 31 | 32 | def clear_cache(self): 33 | self._get_thought_vector.cache_clear() 34 | 35 | @functools.lru_cache(maxsize=2 ** 10) 36 | def _get_thought_vector(self, text): 37 | """Sums the embeddings of all the words in ``text`` into a "thought 38 | vector".""" 39 | embeddings = [] 40 | for word in utils.words_from_text(text): 41 | embedding = self.word_embedding[word] 42 | if embedding is not None: # out-of-vocab words do not have embeddings 43 | embeddings.append(embedding) 44 | embeddings = torch.tensor(embeddings) 45 | return torch.mean(embeddings, dim=0) 46 | 47 | def encode(self, raw_text_list): 48 | return torch.stack([self._get_thought_vector(text) for text in raw_text_list]) 49 | 50 | def extra_repr_keys(self): 51 | """Set the extra representation of the constraint using these keys.""" 52 | return ["word_embedding"] + super().extra_repr_keys() 53 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Universal sentence encoder 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | """ 5 | 6 | 7 | from .universal_sentence_encoder import UniversalSentenceEncoder 8 | from .multilingual_universal_sentence_encoder import ( 9 | MultilingualUniversalSentenceEncoder, 10 | ) 11 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__pycache__/multilingual_universal_sentence_encoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__pycache__/multilingual_universal_sentence_encoder.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__pycache__/universal_sentence_encoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__pycache__/universal_sentence_encoder.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | multilingual universal sentence encoder 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | """ 5 | 6 | from textattack.constraints.semantics.sentence_encoders import SentenceEncoder 7 | from textattack.shared.utils import LazyLoader 8 | 9 | hub = LazyLoader("tensorflow_hub", globals(), "tensorflow_hub") 10 | tensorflow_text = LazyLoader( 11 | "tensorflow_text", globals(), "tensorflow_text" 12 | ) # noqa: F401 13 | 14 | 15 | class MultilingualUniversalSentenceEncoder(SentenceEncoder): 16 | """Constraint using similarity between sentence encodings of x and x_adv 17 | where the text embeddings are created using the Multilingual Universal 18 | Sentence Encoder.""" 19 | 20 | def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): 21 | super().__init__(threshold=threshold, metric=metric, **kwargs) 22 | if large: 23 | tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3" 24 | else: 25 | tfhub_url = ( 26 | "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" 27 | ) 28 | 29 | # TODO add QA SET. Details at: https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3 30 | 31 | self.model = hub.load(tfhub_url) 32 | 33 | def encode(self, sentences): 34 | return self.model(sentences).numpy() 35 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | universal sentence encoder class 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | """ 5 | 6 | from textattack.constraints.semantics.sentence_encoders import SentenceEncoder 7 | from textattack.shared.utils import LazyLoader 8 | 9 | hub = LazyLoader("tensorflow_hub", globals(), "tensorflow_hub") 10 | 11 | 12 | class UniversalSentenceEncoder(SentenceEncoder): 13 | """Constraint using similarity between sentence encodings of x and x_adv 14 | where the text embeddings are created using the Universal Sentence 15 | Encoder.""" 16 | 17 | def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): 18 | super().__init__(threshold=threshold, metric=metric, **kwargs) 19 | if large: 20 | tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5" 21 | else: 22 | tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder/4" 23 | 24 | self.model = hub.load(tfhub_url) 25 | 26 | def encode(self, sentences): 27 | return self.model(sentences).numpy() 28 | -------------------------------------------------------------------------------- /textattack/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | datasets: 4 | ====================== 5 | 6 | TextAttack allows users to provide their own dataset or load from HuggingFace. 7 | 8 | 9 | """ 10 | 11 | from .dataset import TextAttackDataset 12 | from .huggingface_dataset import HuggingFaceDataset 13 | 14 | from . import translation 15 | -------------------------------------------------------------------------------- /textattack/datasets/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/datasets/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/datasets/__pycache__/dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/datasets/__pycache__/dataset.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/datasets/__pycache__/huggingface_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/datasets/__pycache__/huggingface_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/datasets/translation/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Multi TranslationDataset 4 | ============================= 5 | """ 6 | 7 | from .ted_multi import TedMultiTranslationDataset 8 | -------------------------------------------------------------------------------- /textattack/datasets/translation/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/datasets/translation/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/datasets/translation/__pycache__/ted_multi.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/datasets/translation/__pycache__/ted_multi.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/datasets/translation/ted_multi.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Ted Multi TranslationDataset Class 4 | ------------------------------------ 5 | """ 6 | 7 | 8 | import collections 9 | 10 | import datasets 11 | import numpy as np 12 | 13 | from textattack.datasets import HuggingFaceDataset 14 | 15 | 16 | class TedMultiTranslationDataset(HuggingFaceDataset): 17 | """Loads examples from the Ted Talk translation dataset using the 18 | `datasets` package. 19 | 20 | dataset source: http://www.cs.jhu.edu/~kevinduh/a/multitarget-tedtalks/ 21 | """ 22 | 23 | def __init__(self, source_lang="en", target_lang="de", split="test"): 24 | self._dataset = datasets.load_dataset("ted_multi")[split] 25 | self.examples = self._dataset["translations"] 26 | language_options = set(self.examples[0]["language"]) 27 | if source_lang not in language_options: 28 | raise ValueError( 29 | f"Source language {source_lang} invalid. Choices: {sorted(language_options)}" 30 | ) 31 | if target_lang not in language_options: 32 | raise ValueError( 33 | f"Target language {target_lang} invalid. Choices: {sorted(language_options)}" 34 | ) 35 | self.source_lang = source_lang 36 | self.target_lang = target_lang 37 | self.label_names = ("Translation",) 38 | self._i = 0 39 | 40 | def _format_raw_example(self, raw_example): 41 | translations = np.array(raw_example["translation"]) 42 | languages = np.array(raw_example["language"]) 43 | source = translations[languages == self.source_lang][0] 44 | target = translations[languages == self.target_lang][0] 45 | source_dict = collections.OrderedDict([("Source", source)]) 46 | return (source_dict, target) 47 | -------------------------------------------------------------------------------- /textattack/goal_function_results/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Goal Function Result: 3 | ======================== 4 | 5 | Goal function results report the result of a goal function evaluation, indicating whether an attack succeeded for a given example. 6 | 7 | """ 8 | from .goal_function_result import GoalFunctionResult, GoalFunctionResultStatus 9 | 10 | from .classification_goal_function_result import ClassificationGoalFunctionResult 11 | from .text_to_text_goal_function_result import TextToTextGoalFunctionResult 12 | -------------------------------------------------------------------------------- /textattack/goal_function_results/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_function_results/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_function_results/__pycache__/classification_goal_function_result.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_function_results/__pycache__/classification_goal_function_result.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_function_results/__pycache__/goal_function_result.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_function_results/__pycache__/goal_function_result.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_function_results/__pycache__/text_to_text_goal_function_result.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_function_results/__pycache__/text_to_text_goal_function_result.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_function_results/text_to_text_goal_function_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | text2text goal function Result 4 | ==================================== 5 | 6 | """ 7 | 8 | from .goal_function_result import GoalFunctionResult 9 | 10 | 11 | class TextToTextGoalFunctionResult(GoalFunctionResult): 12 | """Represents the result of a text-to-text goal function.""" 13 | 14 | def get_text_color_input(self): 15 | """A string representing the color this result's changed portion should 16 | be if it represents the original input.""" 17 | return "red" 18 | 19 | def get_text_color_perturbed(self): 20 | """A string representing the color this result's changed portion should 21 | be if it represents the perturbed input.""" 22 | return "blue" 23 | 24 | def get_colored_output(self, color_method=None): 25 | """Returns a string representation of this result's output, colored 26 | according to `color_method`.""" 27 | return str(self.output) 28 | -------------------------------------------------------------------------------- /textattack/goal_functions/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _goal_function: 2 | 3 | Goal functions determine if an attack has been successful. 4 | =========================================================== 5 | """ 6 | 7 | from .goal_function import GoalFunction 8 | 9 | from .classification import * 10 | from .text import * 11 | -------------------------------------------------------------------------------- /textattack/goal_functions/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/__pycache__/goal_function.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/__pycache__/goal_function.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/classification/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Goal fucntion for Classification 4 | ======================================= 5 | 6 | """ 7 | 8 | from .input_reduction import InputReduction 9 | from .untargeted_classification import UntargetedClassification 10 | from .targeted_classification import TargetedClassification 11 | -------------------------------------------------------------------------------- /textattack/goal_functions/classification/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/classification/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/classification/__pycache__/classification_goal_function.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/classification/__pycache__/classification_goal_function.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/classification/__pycache__/input_reduction.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/classification/__pycache__/input_reduction.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/classification/__pycache__/targeted_classification.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/classification/__pycache__/targeted_classification.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/classification/__pycache__/untargeted_classification.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/classification/__pycache__/untargeted_classification.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/classification/input_reduction.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Determine if maintaining the same predicted label 4 | --------------------------------------------------------------------- 5 | """ 6 | 7 | 8 | from .classification_goal_function import ClassificationGoalFunction 9 | 10 | 11 | class InputReduction(ClassificationGoalFunction): 12 | """Attempts to reduce the input down to as few words as possible while 13 | maintaining the same predicted label. 14 | 15 | From Feng, Wallace, Grissom, Iyyer, Rodriguez, Boyd-Graber. (2018). 16 | Pathologies of Neural Models Make Interpretations Difficult. ArXiv, 17 | abs/1804.07781. 18 | """ 19 | 20 | def __init__(self, *args, target_num_words=1, **kwargs): 21 | self.target_num_words = target_num_words 22 | super().__init__(*args, **kwargs) 23 | 24 | def _is_goal_complete(self, model_output, attacked_text): 25 | return ( 26 | self.ground_truth_output == model_output.argmax() 27 | and attacked_text.num_words <= self.target_num_words 28 | ) 29 | 30 | def _should_skip(self, model_output, attacked_text): 31 | return self.ground_truth_output != model_output.argmax() 32 | 33 | def _get_score(self, model_output, attacked_text): 34 | # Give the lowest score possible to inputs which don't maintain the ground truth label. 35 | if self.ground_truth_output != model_output.argmax(): 36 | return 0 37 | 38 | cur_num_words = attacked_text.num_words 39 | initial_num_words = self.initial_attacked_text.num_words 40 | 41 | # The main goal is to reduce the number of words (num_words_score) 42 | # Higher model score for the ground truth label is used as a tiebreaker (model_score) 43 | num_words_score = max( 44 | (initial_num_words - cur_num_words) / initial_num_words, 0 45 | ) 46 | model_score = model_output[self.ground_truth_output] 47 | return min(num_words_score + model_score / initial_num_words, 1) 48 | 49 | def extra_repr_keys(self): 50 | if self.maximizable: 51 | return ["maximizable"] 52 | else: 53 | return ["maximizable", "target_num_words"] 54 | -------------------------------------------------------------------------------- /textattack/goal_functions/classification/targeted_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Determine if an attack has been successful in targeted Classification 4 | ----------------------------------------------------------------------- 5 | """ 6 | 7 | 8 | from .classification_goal_function import ClassificationGoalFunction 9 | 10 | 11 | class TargetedClassification(ClassificationGoalFunction): 12 | """A targeted attack on classification models which attempts to maximize 13 | the score of the target label. 14 | 15 | Complete when the arget label is the predicted label. 16 | """ 17 | 18 | def __init__(self, *args, target_class=0, **kwargs): 19 | super().__init__(*args, **kwargs) 20 | self.target_class = target_class 21 | 22 | def _is_goal_complete(self, model_output, _): 23 | return ( 24 | self.target_class == model_output.argmax() 25 | ) or self.ground_truth_output == self.target_class 26 | 27 | def _get_score(self, model_output, _): 28 | if self.target_class < 0 or self.target_class >= len(model_output): 29 | raise ValueError( 30 | f"target class set to {self.target_class} with {len(model_output)} classes." 31 | ) 32 | else: 33 | return model_output[self.target_class] 34 | 35 | def extra_repr_keys(self): 36 | if self.maximizable: 37 | return ["maximizable", "target_class"] 38 | else: 39 | return ["target_class"] 40 | -------------------------------------------------------------------------------- /textattack/goal_functions/classification/untargeted_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Determine successful in untargeted Classification 4 | ---------------------------------------------------- 5 | """ 6 | 7 | 8 | from .classification_goal_function import ClassificationGoalFunction 9 | 10 | 11 | class UntargetedClassification(ClassificationGoalFunction): 12 | """An untargeted attack on classification models which attempts to minimize 13 | the score of the correct label until it is no longer the predicted label. 14 | 15 | Args: 16 | target_max_score (float): If set, goal is to reduce model output to 17 | below this score. Otherwise, goal is to change the overall predicted 18 | class. 19 | """ 20 | 21 | def __init__(self, *args, target_max_score=None, **kwargs): 22 | self.target_max_score = target_max_score 23 | super().__init__(*args, **kwargs) 24 | 25 | def _is_goal_complete(self, model_output, _): 26 | if self.target_max_score: 27 | return model_output[self.ground_truth_output] < self.target_max_score 28 | elif (model_output.numel() == 1) and isinstance( 29 | self.ground_truth_output, float 30 | ): 31 | return abs(self.ground_truth_output - model_output.item()) >= ( 32 | self.target_max_score or 0.5 33 | ) 34 | else: 35 | return model_output.argmax() != self.ground_truth_output 36 | 37 | def _get_score(self, model_output, _): 38 | # If the model outputs a single number and the ground truth output is 39 | # a float, we assume that this is a regression task. 40 | if (model_output.numel() == 1) and isinstance(self.ground_truth_output, float): 41 | return abs(model_output.item() - self.ground_truth_output) 42 | else: 43 | return 1 - model_output[self.ground_truth_output] 44 | -------------------------------------------------------------------------------- /textattack/goal_functions/text/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Goal Function for Text to Text case 4 | ======================================= 5 | 6 | """ 7 | 8 | from .minimize_bleu import MinimizeBleu 9 | from .non_overlapping_output import NonOverlappingOutput 10 | -------------------------------------------------------------------------------- /textattack/goal_functions/text/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/text/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/text/__pycache__/minimize_bleu.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/text/__pycache__/minimize_bleu.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/text/__pycache__/non_overlapping_output.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/text/__pycache__/non_overlapping_output.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/text/__pycache__/text_to_text_goal_function.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/goal_functions/text/__pycache__/text_to_text_goal_function.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/goal_functions/text/minimize_bleu.py: -------------------------------------------------------------------------------- 1 | """ 2 | Goal Function for Attempts to minimize the BLEU score 3 | ------------------------------------------------------- 4 | 5 | 6 | """ 7 | 8 | import functools 9 | 10 | import nltk 11 | 12 | import textattack 13 | 14 | from .text_to_text_goal_function import TextToTextGoalFunction 15 | 16 | 17 | class MinimizeBleu(TextToTextGoalFunction): 18 | """Attempts to minimize the BLEU score between the current output 19 | translation and the reference translation. 20 | 21 | BLEU score was defined in (BLEU: a Method for Automatic Evaluation of Machine Translation). 22 | 23 | `ArxivURL`_ 24 | 25 | .. _ArxivURL: https://www.aclweb.org/anthology/P02-1040.pdf 26 | 27 | This goal function is defined in (It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations). 28 | 29 | `ArxivURL2`_ 30 | 31 | .. _ArxivURL2: https://www.aclweb.org/anthology/2020.acl-main.263 32 | """ 33 | 34 | EPS = 1e-10 35 | 36 | def __init__(self, *args, target_bleu=0.0, **kwargs): 37 | self.target_bleu = target_bleu 38 | super().__init__(*args, **kwargs) 39 | 40 | def clear_cache(self): 41 | if self.use_cache: 42 | self._call_model_cache.clear() 43 | get_bleu.cache_clear() 44 | 45 | def _is_goal_complete(self, model_output, _): 46 | bleu_score = 1.0 - self._get_score(model_output, _) 47 | return bleu_score <= (self.target_bleu + MinimizeBleu.EPS) 48 | 49 | def _get_score(self, model_output, _): 50 | model_output_at = textattack.shared.AttackedText(model_output) 51 | ground_truth_at = textattack.shared.AttackedText(self.ground_truth_output) 52 | bleu_score = get_bleu(model_output_at, ground_truth_at) 53 | return 1.0 - bleu_score 54 | 55 | def extra_repr_keys(self): 56 | if self.maximizable: 57 | return ["maximizable"] 58 | else: 59 | return ["maximizable", "target_bleu"] 60 | 61 | 62 | @functools.lru_cache(maxsize=2 ** 12) 63 | def get_bleu(a, b): 64 | ref = a.words 65 | hyp = b.words 66 | bleu_score = nltk.translate.bleu_score.sentence_bleu([ref], hyp) 67 | return bleu_score 68 | -------------------------------------------------------------------------------- /textattack/goal_functions/text/non_overlapping_output.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Goal Function for seq2sick 4 | ------------------------------------------------------- 5 | """ 6 | 7 | 8 | import functools 9 | 10 | import numpy as np 11 | 12 | from textattack.shared.utils import words_from_text 13 | 14 | from .text_to_text_goal_function import TextToTextGoalFunction 15 | 16 | 17 | class NonOverlappingOutput(TextToTextGoalFunction): 18 | """Ensures that none of the words at a position are equal. 19 | 20 | Defined in seq2sick (https://arxiv.org/pdf/1803.01128.pdf), equation 21 | (3). 22 | """ 23 | 24 | def clear_cache(self): 25 | if self.use_cache: 26 | self._call_model_cache.clear() 27 | get_words_cached.cache_clear() 28 | word_difference_score.cache_clear() 29 | 30 | def _is_goal_complete(self, model_output, _): 31 | return self._get_score(model_output, self.ground_truth_output) == 1.0 32 | 33 | def _get_score(self, model_output, _): 34 | num_words_diff = word_difference_score(model_output, self.ground_truth_output) 35 | if num_words_diff == 0: 36 | return 0.0 37 | else: 38 | return num_words_diff / len(get_words_cached(self.ground_truth_output)) 39 | 40 | 41 | @functools.lru_cache(maxsize=2 ** 12) 42 | def get_words_cached(s): 43 | return np.array(words_from_text(s)) 44 | 45 | 46 | @functools.lru_cache(maxsize=2 ** 12) 47 | def word_difference_score(s1, s2): 48 | """Returns the number of words that are non-overlapping between s1 and 49 | s2.""" 50 | s1_words = get_words_cached(s1) 51 | s2_words = get_words_cached(s2) 52 | min_length = min(len(s1_words), len(s2_words)) 53 | if min_length == 0: 54 | return 0 55 | s1_words = s1_words[:min_length] 56 | s2_words = s2_words[:min_length] 57 | return (s1_words != s2_words).sum() 58 | -------------------------------------------------------------------------------- /textattack/goal_functions/text/text_to_text_goal_function.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Goal Function for TextToText 4 | ------------------------------------------------------- 5 | """ 6 | 7 | 8 | from textattack.goal_function_results import TextToTextGoalFunctionResult 9 | from textattack.goal_functions import GoalFunction 10 | 11 | 12 | class TextToTextGoalFunction(GoalFunction): 13 | """A goal function defined on a model that outputs text. 14 | 15 | model: The PyTorch or TensorFlow model used for evaluation. 16 | original_output: the original output of the model 17 | """ 18 | 19 | def _goal_function_result_type(self): 20 | """Returns the class of this goal function's results.""" 21 | return TextToTextGoalFunctionResult 22 | 23 | def _process_model_outputs(self, _, outputs): 24 | """Processes and validates a list of model outputs.""" 25 | return outputs.flatten() 26 | 27 | def _get_displayed_output(self, raw_output): 28 | return raw_output 29 | -------------------------------------------------------------------------------- /textattack/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _loggers: 2 | 3 | Misc Loggers: Loggers track, visualize, and export attack results. 4 | =================================================================== 5 | """ 6 | 7 | from .csv_logger import CSVLogger 8 | from .file_logger import FileLogger 9 | from .logger import Logger 10 | from .visdom_logger import VisdomLogger 11 | from .weights_and_biases_logger import WeightsAndBiasesLogger 12 | 13 | # AttackLogManager must be imported last, since it imports the other loggers. 14 | from .attack_log_manager import AttackLogManager 15 | -------------------------------------------------------------------------------- /textattack/loggers/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/loggers/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/loggers/__pycache__/attack_log_manager.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/loggers/__pycache__/attack_log_manager.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/loggers/__pycache__/csv_logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/loggers/__pycache__/csv_logger.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/loggers/__pycache__/file_logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/loggers/__pycache__/file_logger.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/loggers/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/loggers/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/loggers/__pycache__/visdom_logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/loggers/__pycache__/visdom_logger.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/loggers/__pycache__/weights_and_biases_logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/loggers/__pycache__/weights_and_biases_logger.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/loggers/csv_logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack Logs to CSV 3 | ======================== 4 | """ 5 | 6 | import csv 7 | 8 | import pandas as pd 9 | 10 | from textattack.shared import AttackedText, logger 11 | 12 | from .logger import Logger 13 | 14 | 15 | class CSVLogger(Logger): 16 | """Logs attack results to a CSV.""" 17 | 18 | def __init__(self, filename="results.csv", color_method="file"): 19 | self.filename = filename 20 | self.color_method = color_method 21 | self.df = pd.DataFrame() 22 | self._flushed = True 23 | 24 | def log_attack_result(self, result): 25 | original_text, perturbed_text = result.diff_color(self.color_method) 26 | original_text = original_text.replace("\n", AttackedText.SPLIT_TOKEN) 27 | perturbed_text = perturbed_text.replace("\n", AttackedText.SPLIT_TOKEN) 28 | result_type = result.__class__.__name__.replace("AttackResult", "") 29 | row = { 30 | "original_text": original_text, 31 | "perturbed_text": perturbed_text, 32 | "original_score": result.original_result.score, 33 | "perturbed_score": result.perturbed_result.score, 34 | "original_output": result.original_result.output, 35 | "perturbed_output": result.perturbed_result.output, 36 | "ground_truth_output": result.original_result.ground_truth_output, 37 | "num_queries": result.num_queries, 38 | "result_type": result_type, 39 | } 40 | self.df = self.df.append(row, ignore_index=True) 41 | self._flushed = False 42 | 43 | def flush(self): 44 | self.df.to_csv(self.filename, quoting=csv.QUOTE_NONNUMERIC, index=False) 45 | self._flushed = True 46 | 47 | def __del__(self): 48 | if not self._flushed: 49 | logger.warning("CSVLogger exiting without calling flush().") 50 | -------------------------------------------------------------------------------- /textattack/loggers/file_logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack Logs to file 3 | ======================== 4 | """ 5 | 6 | import os 7 | import sys 8 | 9 | import terminaltables 10 | 11 | from .logger import Logger 12 | 13 | 14 | class FileLogger(Logger): 15 | """Logs the results of an attack to a file, or `stdout`.""" 16 | 17 | def __init__(self, filename="", stdout=False): 18 | self.stdout = stdout 19 | self.filename = filename 20 | if stdout: 21 | self.fout = sys.stdout 22 | elif isinstance(filename, str): 23 | directory = os.path.dirname(filename) 24 | if not os.path.exists(directory): 25 | os.makedirs(directory) 26 | self.fout = open(filename, "w") 27 | else: 28 | self.fout = filename 29 | self.num_results = 0 30 | 31 | def __getstate__(self): 32 | # Temporarily save file handle b/c we can't copy it 33 | state = {i: self.__dict__[i] for i in self.__dict__ if i != "fout"} 34 | return state 35 | 36 | def __setstate__(self, state): 37 | self.__dict__ = state 38 | if self.stdout: 39 | self.fout = sys.stdout 40 | else: 41 | self.fout = open(self.filename, "a") 42 | 43 | def log_attack_result(self, result): 44 | self.num_results += 1 45 | color_method = "ansi" if self.stdout else "file" 46 | self.fout.write( 47 | "-" * 45 + " Result " + str(self.num_results) + " " + "-" * 45 + "\n" 48 | ) 49 | self.fout.write(result.__str__(color_method=color_method)) 50 | self.fout.write("\n") 51 | 52 | def log_summary_rows(self, rows, title, window_id): 53 | if self.stdout: 54 | table_rows = [[title, ""]] + rows 55 | table = terminaltables.AsciiTable(table_rows) 56 | self.fout.write(table.table) 57 | else: 58 | for row in rows: 59 | self.fout.write(f"{row[0]} {row[1]}\n") 60 | 61 | def log_sep(self): 62 | self.fout.write("-" * 90 + "\n") 63 | 64 | def flush(self): 65 | self.fout.flush() 66 | -------------------------------------------------------------------------------- /textattack/loggers/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack Logger Wrapper 3 | ======================== 4 | """ 5 | 6 | 7 | from abc import ABC 8 | 9 | 10 | class Logger(ABC): 11 | """An abstract class for different methods of logging attack results.""" 12 | 13 | def __init__(self): 14 | pass 15 | 16 | def log_attack_result(self, result, examples_completed): 17 | pass 18 | 19 | def log_summary_rows(self, rows, title, window_id): 20 | pass 21 | 22 | def log_hist(self, arr, numbins, title, window_id): 23 | pass 24 | 25 | def log_sep(self): 26 | pass 27 | 28 | def flush(self): 29 | pass 30 | -------------------------------------------------------------------------------- /textattack/models/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _models: 2 | 3 | Models 4 | ========= 5 | 6 | TextAttack can attack any model that takes a list of strings as input and outputs a list of predictions. This is the idea behind *model wrappers*: to help your model conform to this API, we've provided the ``textattack.models.wrappers.ModelWrapper`` abstract class. 7 | 8 | We've also provided implementations of model wrappers for common patterns in some popular machine learning frameworks: 9 | 10 | 11 | Models User-specified 12 | -------------------------- 13 | 14 | TextAttack allows users to provide their own models for testing. Models can be loaded in three ways: 15 | 16 | 1. ``--model`` for pre-trained models and models trained with TextAttack 17 | 2. ``--model-from-huggingface`` which will attempt to load any model from the ``HuggingFace model hub `` 18 | 3. ``--model-from-file`` which will dynamically load a Python file and look for the ``model`` variable 19 | 20 | 21 | 22 | Models Pre-trained 23 | -------------------------- 24 | 25 | TextAttack also provides lots of pre-trained models for common tasks. Testing different attacks on the same model ensures attack comparisons are fair. 26 | 27 | Any of these models can be provided to ``textattack attack`` via ``--model``, for example, ``--model bert-base-uncased-mr``. For a full list of pre-trained models, see the `pre-trained models README `_. 28 | 29 | 30 | Model Wrappers 31 | -------------------------- 32 | TextAttack can attack any model that takes a list of strings as input and outputs a list of predictions. This is the idea behind *model wrappers*: to help your model conform to this API, we've provided the ``textattack.models.wrappers.ModelWrapper`` abstract class. 33 | 34 | 35 | We've also provided implementations of model wrappers for common patterns in some popular machine learning frameworks: including pytorch / sklearn / tensorflow. 36 | """ 37 | 38 | 39 | from . import helpers 40 | from . import tokenizers 41 | from . import wrappers 42 | -------------------------------------------------------------------------------- /textattack/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Moderl Helpers 3 | ------------------ 4 | """ 5 | 6 | 7 | # Helper stuff, like embeddings. 8 | from . import utils 9 | from .glove_embedding_layer import GloveEmbeddingLayer 10 | 11 | # Helper modules. 12 | from .lstm_for_classification import LSTMForClassification 13 | from .t5_for_text_to_text import T5ForTextToText 14 | from .word_cnn_for_classification import WordCNNForClassification 15 | -------------------------------------------------------------------------------- /textattack/models/helpers/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/helpers/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/helpers/__pycache__/glove_embedding_layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/helpers/__pycache__/glove_embedding_layer.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/helpers/__pycache__/lstm_for_classification.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/helpers/__pycache__/lstm_for_classification.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/helpers/__pycache__/t5_for_text_to_text.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/helpers/__pycache__/t5_for_text_to_text.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/helpers/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/helpers/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/helpers/__pycache__/word_cnn_for_classification.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/helpers/__pycache__/word_cnn_for_classification.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/helpers/bert_for_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | BERT Classification 3 | ^^^^^^^^^^^^^^^^^^^^^ 4 | 5 | """ 6 | 7 | 8 | import torch 9 | from transformers.modeling_bert import BertForSequenceClassification 10 | 11 | from textattack.models.tokenizers import AutoTokenizer 12 | from textattack.shared import utils 13 | 14 | 15 | class BERTForClassification: 16 | """BERT fine-tuned for textual classification. 17 | 18 | Args: 19 | model_path(:obj:`string`): Path to the pre-trained model. 20 | num_labels(:obj:`int`, optional): Number of class labels for 21 | prediction, if different than 2. 22 | """ 23 | 24 | def __init__(self, model_path, num_labels=2): 25 | model_file_path = utils.download_if_needed(model_path) 26 | self.model = BertForSequenceClassification.from_pretrained( 27 | model_file_path, num_labels=num_labels 28 | ) 29 | 30 | self.model.to(utils.device) 31 | self.model.eval() 32 | self.tokenizer = AutoTokenizer(model_file_path) 33 | 34 | def __call__(self, input_ids=None, **kwargs): 35 | # The tokenizer will return ``input_ids`` along with ``token_type_ids`` 36 | # and an ``attention_mask``. Our pre-trained models only need the input 37 | # IDs. 38 | pred = self.model(input_ids=input_ids)[0] 39 | return torch.nn.functional.softmax(pred, dim=-1) 40 | -------------------------------------------------------------------------------- /textattack/models/helpers/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Util function for Model Wrapper 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | 5 | """ 6 | 7 | 8 | import glob 9 | import os 10 | 11 | import torch 12 | 13 | from textattack.shared import utils 14 | 15 | 16 | def load_cached_state_dict(model_folder_path): 17 | model_folder_path = utils.download_if_needed(model_folder_path) 18 | # Take the first model matching the pattern *model.bin. 19 | model_path_list = glob.glob(os.path.join(model_folder_path, "*model.bin")) 20 | if not model_path_list: 21 | raise FileNotFoundError( 22 | f"model.bin not found in model folder {model_folder_path}." 23 | ) 24 | model_path = model_path_list[0] 25 | state_dict = torch.load(model_path, map_location=utils.device) 26 | return state_dict 27 | -------------------------------------------------------------------------------- /textattack/models/tokenizers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tokenizers for Model Wrapper 3 | ------------------------------- 4 | """ 5 | 6 | 7 | from .auto_tokenizer import AutoTokenizer 8 | from .glove_tokenizer import GloveTokenizer 9 | from .t5_tokenizer import T5Tokenizer 10 | -------------------------------------------------------------------------------- /textattack/models/tokenizers/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/tokenizers/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/tokenizers/__pycache__/auto_tokenizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/tokenizers/__pycache__/auto_tokenizer.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/tokenizers/__pycache__/glove_tokenizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/tokenizers/__pycache__/glove_tokenizer.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/tokenizers/__pycache__/t5_tokenizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/tokenizers/__pycache__/t5_tokenizer.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model Wrappers 3 | -------------------------- 4 | TextAttack can attack any model that takes a list of strings as input and outputs a list of predictions. This is the idea behind *model wrappers*: to help your model conform to this API, we've provided the ``textattack.models.wrappers.ModelWrapper`` abstract class. 5 | 6 | 7 | We've also provided implementations of model wrappers for common patterns in some popular machine learning frameworks: 8 | 9 | """ 10 | 11 | from .model_wrapper import ModelWrapper 12 | 13 | from .huggingface_model_wrapper import HuggingFaceModelWrapper 14 | # from .huggingface_model_mask_ensemble_wrapper import HuggingFaceModelMaskEnsembleWrapper 15 | # from .huggingface_model_safer_wrapper import HuggingFaceModelSaferEnsembleWrapper 16 | from .pytorch_model_wrapper import PyTorchModelWrapper 17 | from .sklearn_model_wrapper import SklearnModelWrapper 18 | from .tensorflow_model_wrapper import TensorFlowModelWrapper 19 | from .huggingface_model_ensemble_wrapper import HuggingFaceModelEnsembleWrapper 20 | -------------------------------------------------------------------------------- /textattack/models/wrappers/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/wrappers/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/wrappers/__pycache__/huggingface_model_ensemble_wrapper.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/wrappers/__pycache__/huggingface_model_ensemble_wrapper.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/wrappers/__pycache__/huggingface_model_wrapper.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/wrappers/__pycache__/huggingface_model_wrapper.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/wrappers/__pycache__/model_wrapper.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/wrappers/__pycache__/model_wrapper.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/wrappers/__pycache__/pytorch_model_wrapper.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/wrappers/__pycache__/pytorch_model_wrapper.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/wrappers/__pycache__/sklearn_model_wrapper.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/wrappers/__pycache__/sklearn_model_wrapper.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/wrappers/__pycache__/tensorflow_model_wrapper.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/models/wrappers/__pycache__/tensorflow_model_wrapper.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/models/wrappers/sklearn_model_wrapper.py: -------------------------------------------------------------------------------- 1 | """ 2 | scikit-learn Model Wrapper 3 | -------------------------- 4 | """ 5 | 6 | 7 | import pandas as pd 8 | 9 | from .model_wrapper import ModelWrapper 10 | 11 | 12 | class SklearnModelWrapper(ModelWrapper): 13 | """Loads a scikit-learn model and tokenizer (tokenizer implements 14 | `transform` and model implements `predict_proba`). 15 | 16 | May need to be extended and modified for different types of 17 | tokenizers. 18 | """ 19 | 20 | def __init__(self, model, tokenizer): 21 | self.model = model 22 | self.tokenizer = tokenizer 23 | 24 | def __call__(self, text_input_list): 25 | encoded_text_matrix = self.tokenizer.transform(text_input_list).toarray() 26 | tokenized_text_df = pd.DataFrame( 27 | encoded_text_matrix, columns=self.tokenizer.get_feature_names() 28 | ) 29 | return self.model.predict_proba(tokenized_text_df) 30 | 31 | def get_grad(self, text_input): 32 | raise NotImplementedError() 33 | -------------------------------------------------------------------------------- /textattack/models/wrappers/tensorflow_model_wrapper.py: -------------------------------------------------------------------------------- 1 | """ 2 | TensorFlow Model Wrapper 3 | -------------------------- 4 | """ 5 | 6 | 7 | import numpy as np 8 | 9 | from .model_wrapper import ModelWrapper 10 | 11 | 12 | class TensorFlowModelWrapper(ModelWrapper): 13 | """Loads a TensorFlow model and tokenizer. 14 | 15 | TensorFlow models can use many different architectures and 16 | tokenization strategies. This assumes that the model takes an 17 | np.array of strings as input and returns a tf.Tensor of outputs, as 18 | is typical with Keras modules. You may need to subclass this for 19 | models that have dedicated tokenizers or otherwise take input 20 | differently. 21 | """ 22 | 23 | def __init__(self, model): 24 | self.model = model 25 | 26 | def __call__(self, text_input_list): 27 | text_array = np.array(text_input_list) 28 | preds = self.model(text_array) 29 | return preds.numpy() 30 | 31 | def get_grad(self, text_input): 32 | raise NotImplementedError() 33 | -------------------------------------------------------------------------------- /textattack/search_methods/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _search_methods: 2 | 3 | Search Methods: 4 | =================== 5 | 6 | Search methods explore the transformation space in an attempt to find a successful attack as determined by a :ref:`Goal Functions ` and list of :ref:`Constraints ` 7 | """ 8 | from .search_method import SearchMethod 9 | from .beam_search import BeamSearch 10 | from .greedy_search import GreedySearch 11 | from .greedy_word_swap_wir import GreedyWordSwapWIR 12 | from .population_based_search import PopulationBasedSearch, PopulationMember 13 | from .genetic_algorithm import GeneticAlgorithm 14 | from .alzantot_genetic_algorithm import AlzantotGeneticAlgorithm 15 | from .improved_genetic_algorithm import ImprovedGeneticAlgorithm 16 | from .particle_swarm_optimization import ParticleSwarmOptimization 17 | from .greedy_word_swap_wir_pwws import GreedyWordSwapWIRPWWS 18 | -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/alzantot_genetic_algorithm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/alzantot_genetic_algorithm.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/beam_search.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/beam_search.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/genetic_algorithm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/genetic_algorithm.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/greedy_search.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/greedy_search.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/greedy_word_swap_wir.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/greedy_word_swap_wir.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/greedy_word_swap_wir_pwws.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/greedy_word_swap_wir_pwws.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/improved_genetic_algorithm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/improved_genetic_algorithm.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/particle_swarm_optimization.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/particle_swarm_optimization.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/population_based_search.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/population_based_search.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/__pycache__/search_method.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/search_methods/__pycache__/search_method.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/search_methods/greedy_search.py: -------------------------------------------------------------------------------- 1 | """ 2 | Greedy Search 3 | ================= 4 | """ 5 | from .beam_search import BeamSearch 6 | 7 | 8 | class GreedySearch(BeamSearch): 9 | """A search method that greedily chooses from a list of possible 10 | perturbations. 11 | 12 | Implemented by calling ``BeamSearch`` with beam_width set to 1. 13 | """ 14 | 15 | def __init__(self): 16 | super().__init__(beam_width=1) 17 | 18 | def extra_repr_keys(self): 19 | return [] 20 | -------------------------------------------------------------------------------- /textattack/shared/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Shared TextAttack Functions 3 | ============================= 4 | 5 | This package includes functions shared across packages. 6 | 7 | """ 8 | 9 | 10 | from . import data 11 | from . import utils 12 | from .utils import logger 13 | from . import validators 14 | 15 | from .attacked_text import AttackedText 16 | from .word_embedding import * 17 | from .attack import Attack 18 | from .checkpoint import Checkpoint 19 | -------------------------------------------------------------------------------- /textattack/shared/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/__pycache__/attack.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/__pycache__/attack.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/__pycache__/attacked_text.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/__pycache__/attacked_text.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/__pycache__/checkpoint.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/__pycache__/checkpoint.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/__pycache__/data.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/__pycache__/data.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/__pycache__/validators.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/__pycache__/validators.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/__pycache__/word_embedding.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/__pycache__/word_embedding.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .install import * 2 | from .misc import * 3 | from .strings import * 4 | from .tensor import * 5 | from .importing import * 6 | -------------------------------------------------------------------------------- /textattack/shared/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/utils/__pycache__/importing.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/utils/__pycache__/importing.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/utils/__pycache__/install.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/utils/__pycache__/install.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/utils/__pycache__/misc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/utils/__pycache__/misc.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/utils/__pycache__/strings.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/utils/__pycache__/strings.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/utils/__pycache__/tensor.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/shared/utils/__pycache__/tensor.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/shared/utils/importing.py: -------------------------------------------------------------------------------- 1 | # Code copied from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/util/lazy_loader.py 2 | 3 | import importlib 4 | import types 5 | 6 | 7 | class LazyLoader(types.ModuleType): 8 | """Lazily import a module, mainly to avoid pulling in large dependencies. 9 | 10 | This allows them to only be loaded when they are used. 11 | """ 12 | 13 | def __init__(self, local_name, parent_module_globals, name): 14 | self._local_name = local_name 15 | self._parent_module_globals = parent_module_globals 16 | 17 | super(LazyLoader, self).__init__(name) 18 | 19 | def _load(self): 20 | """Load the module and insert it into the parent's globals.""" 21 | # Import the target module and insert it into the parent's namespace 22 | try: 23 | module = importlib.import_module(self.__name__) 24 | except ModuleNotFoundError as e: 25 | raise ModuleNotFoundError( 26 | f"Lazy module loader cannot find module named `{self.__name__}`. " 27 | f"This might be because TextAttack does not automatically install some optional dependencies. " 28 | f"Please run `pip install {self.__name__}` to install the package." 29 | ) from e 30 | self._parent_module_globals[self._local_name] = module 31 | 32 | # Update this object's dict so that if someone keeps a reference to the 33 | # LazyLoader, lookups are efficient (__getattr__ is only called on lookups 34 | # that fail). 35 | self.__dict__.update(module.__dict__) 36 | 37 | return module 38 | 39 | def __getattr__(self, item): 40 | module = self._load() 41 | return getattr(module, item) 42 | 43 | def __dir__(self): 44 | module = self._load() 45 | return dir(module) 46 | -------------------------------------------------------------------------------- /textattack/shared/utils/tensor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def batch_model_predict(model_predict, inputs, batch_size=32): 6 | """Runs prediction on iterable ``inputs`` using batch size ``batch_size``. 7 | 8 | Aggregates all predictions into an ``np.ndarray``. 9 | """ 10 | outputs = [] 11 | i = 0 12 | while i < len(inputs): 13 | batch = inputs[i : i + batch_size] 14 | batch_preds = model_predict(batch) 15 | 16 | # Some seq-to-seq models will return a single string as a prediction 17 | # for a single-string list. Wrap these in a list. 18 | if isinstance(batch_preds, str): 19 | batch_preds = [batch_preds] 20 | 21 | # Get PyTorch tensors off of other devices. 22 | if isinstance(batch_preds, torch.Tensor): 23 | batch_preds = batch_preds.cpu() 24 | 25 | # Cast all predictions iterables to ``np.ndarray`` types. 26 | if not isinstance(batch_preds, np.ndarray): 27 | batch_preds = np.array(batch_preds) 28 | outputs.append(batch_preds) 29 | i += batch_size 30 | 31 | return np.concatenate(outputs, axis=0) 32 | -------------------------------------------------------------------------------- /textattack/transformations/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _transformations: 2 | 3 | Transformations 4 | ========================== 5 | 6 | A transformation is a method which perturbs a text input through the insertion, deletion and substiution of words, characters, and phrases. All transformations take a ``TokenizedText`` as input and return a list of ``TokenizedText`` that contains possible transformations. Every transformation is a subclass of the abstract ``Transformation`` class. 7 | """ 8 | 9 | from .transformation import Transformation 10 | from .composite_transformation import CompositeTransformation 11 | from .random_composite_transformation import RandomCompositeTransformation 12 | from .word_swap import WordSwap 13 | 14 | # Black-box transformations 15 | from .word_deletion import WordDeletion 16 | from .word_swap_embedding import WordSwapEmbedding 17 | from .word_swap_hownet import WordSwapHowNet 18 | from .word_swap_homoglyph_swap import WordSwapHomoglyphSwap 19 | from .word_swap_inflections import WordSwapInflections 20 | from .word_swap_neighboring_character_swap import WordSwapNeighboringCharacterSwap 21 | from .word_swap_random_character_deletion import WordSwapRandomCharacterDeletion 22 | from .word_swap_random_character_insertion import WordSwapRandomCharacterInsertion 23 | from .word_swap_random_character_substitution import WordSwapRandomCharacterSubstitution 24 | from .word_swap_wordnet import WordSwapWordNet 25 | from .word_swap_masked_lm import WordSwapMaskedLM 26 | from .word_swap_random_word import RandomSwap 27 | from .random_synonym_insertion import RandomSynonymInsertion 28 | from .word_swap_qwerty import WordSwapQWERTY 29 | from .word_swap_contract import WordSwapContract 30 | from .word_swap_extend import WordSwapExtend 31 | from .word_swap_change_number import WordSwapChangeNumber 32 | from .word_swap_change_location import WordSwapChangeLocation 33 | from .word_swap_change_name import WordSwapChangeName 34 | 35 | # White-box transformations 36 | from .word_swap_gradient_based import WordSwapGradientBased 37 | -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/composite_transformation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/composite_transformation.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/random_composite_transformation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/random_composite_transformation.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/random_synonym_insertion.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/random_synonym_insertion.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/transformation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/transformation.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_deletion.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_deletion.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_change_location.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_change_location.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_change_name.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_change_name.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_change_number.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_change_number.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_contract.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_contract.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_embedding.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_embedding.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_extend.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_extend.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_gradient_based.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_gradient_based.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_homoglyph_swap.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_homoglyph_swap.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_hownet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_hownet.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_inflections.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_inflections.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_masked_lm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_masked_lm.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_neighboring_character_swap.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_neighboring_character_swap.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_qwerty.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_qwerty.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_random_character_deletion.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_random_character_deletion.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_random_character_insertion.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_random_character_insertion.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_random_character_substitution.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_random_character_substitution.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_random_word.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_random_word.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/__pycache__/word_swap_wordnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/textattack/transformations/__pycache__/word_swap_wordnet.cpython-37.pyc -------------------------------------------------------------------------------- /textattack/transformations/composite_transformation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Composite Transformation 3 | ============================================ 4 | Multiple transformations can be used by providing a list of ``Transformation`` to ``CompositeTransformation`` 5 | 6 | """ 7 | 8 | from textattack.shared import utils 9 | from textattack.transformations import Transformation 10 | 11 | 12 | class CompositeTransformation(Transformation): 13 | """A transformation which applies each of a list of transformations, 14 | returning a set of all optoins. 15 | 16 | Args: 17 | transformations: The list of ``Transformation`` to apply. 18 | """ 19 | 20 | def __init__(self, transformations): 21 | if not ( 22 | isinstance(transformations, list) or isinstance(transformations, tuple) 23 | ): 24 | raise TypeError("transformations must be list or tuple") 25 | elif not len(transformations): 26 | raise ValueError("transformations cannot be empty") 27 | self.transformations = transformations 28 | 29 | def _get_transformations(self, *_): 30 | """Placeholder method that would throw an error if a user tried to 31 | treat the CompositeTransformation as a 'normal' transformation.""" 32 | raise RuntimeError( 33 | "CompositeTransformation does not support _get_transformations()." 34 | ) 35 | 36 | def __call__(self, *args, **kwargs): 37 | new_attacked_texts = set() 38 | for transformation in self.transformations: 39 | new_attacked_texts.update(transformation(*args, **kwargs)) 40 | return list(new_attacked_texts) 41 | 42 | def __repr__(self): 43 | main_str = "CompositeTransformation" + "(" 44 | transformation_lines = [] 45 | for i, transformation in enumerate(self.transformations): 46 | transformation_lines.append(utils.add_indent(f"({i}): {transformation}", 2)) 47 | transformation_lines.append(")") 48 | main_str += utils.add_indent("\n" + "\n".join(transformation_lines), 2) 49 | return main_str 50 | 51 | __str__ = __repr__ 52 | -------------------------------------------------------------------------------- /textattack/transformations/random_composite_transformation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Composite Transformation 3 | ============================================ 4 | Multiple transformations can be used by providing a list of ``Transformation`` to ``CompositeTransformation`` 5 | 6 | add count & random one every time 7 | 8 | """ 9 | 10 | from textattack.shared import utils 11 | from textattack.transformations import CompositeTransformation 12 | import numpy as np 13 | 14 | 15 | class RandomCompositeTransformation(CompositeTransformation): 16 | """A transformation which applies each of a list of transformations, 17 | returning a set of all optoins. 18 | 19 | Args: 20 | transformations: The list of ``Transformation`` to apply. 21 | """ 22 | 23 | def __init__(self, transformations, total_count=20): 24 | super().__init__(transformations) 25 | self.total_count = total_count 26 | 27 | def __call__(self, *args, **kwargs): 28 | new_attacked_texts = set() 29 | transformation_num = len(self.transformations) 30 | if transformation_num <= 0: 31 | raise ValueError 32 | index = np.random.choice(transformation_num, self.total_count, replace=True) 33 | 34 | for i in index: 35 | new_attacked_texts.update(self.transformations[i](*args, **kwargs)) 36 | return list(new_attacked_texts) 37 | 38 | -------------------------------------------------------------------------------- /textattack/transformations/random_synonym_insertion.py: -------------------------------------------------------------------------------- 1 | """ 2 | random synonym insertation Transformation 3 | ============================================ 4 | 5 | """ 6 | 7 | import random 8 | 9 | from nltk.corpus import wordnet 10 | 11 | from textattack.transformations import Transformation 12 | 13 | 14 | class RandomSynonymInsertion(Transformation): 15 | """Transformation that inserts synonyms of words that are already in the 16 | sequence.""" 17 | 18 | def _get_synonyms(self, word): 19 | synonyms = set() 20 | for syn in wordnet.synsets(word): 21 | for lemma in syn.lemmas(): 22 | if lemma.name() != word and check_if_one_word(lemma.name()): 23 | synonyms.add(lemma.name()) 24 | return list(synonyms) 25 | 26 | def _get_transformations(self, current_text, indices_to_modify): 27 | transformed_texts = [] 28 | for idx in indices_to_modify: 29 | synonyms = [] 30 | # try to find a word with synonyms, and deal with edge case where there aren't any 31 | for attempt in range(7): 32 | synonyms = self._get_synonyms(random.choice(current_text.words)) 33 | if synonyms: 34 | break 35 | elif attempt == 6: 36 | return [current_text] 37 | random_synonym = random.choice(synonyms) 38 | transformed_texts.append( 39 | current_text.insert_text_after_word_index(idx, random_synonym) 40 | ) 41 | return transformed_texts 42 | 43 | @property 44 | def deterministic(self): 45 | return False 46 | 47 | 48 | def check_if_one_word(word): 49 | for c in word: 50 | if not c.isalpha(): 51 | return False 52 | return True 53 | -------------------------------------------------------------------------------- /textattack/transformations/word_deletion.py: -------------------------------------------------------------------------------- 1 | """ 2 | word deletion Transformation 3 | ============================================ 4 | 5 | """ 6 | 7 | from .transformation import Transformation 8 | 9 | 10 | class WordDeletion(Transformation): 11 | """An abstract class that takes a sentence and transforms it by deleting a 12 | single word. 13 | 14 | letters_to_insert (string): letters allowed for insertion into words 15 | """ 16 | 17 | def _get_transformations(self, current_text, indices_to_modify): 18 | # words = current_text.words 19 | transformed_texts = [] 20 | if len(current_text.words) > 1: 21 | for i in indices_to_modify: 22 | transformed_texts.append(current_text.delete_word_at_index(i)) 23 | return transformed_texts 24 | -------------------------------------------------------------------------------- /textattack/transformations/word_swap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap 3 | ============================================ 4 | Word swap transformations act by replacing some words in the input. Subclasses can implement the abstract ``WordSwap`` class by overriding ``self._get_replacement_words`` 5 | 6 | """ 7 | import random 8 | import string 9 | 10 | from .transformation import Transformation 11 | 12 | 13 | class WordSwap(Transformation): 14 | """An abstract class that takes a sentence and transforms it by replacing 15 | some of its words. 16 | 17 | letters_to_insert (string): letters allowed for insertion into words 18 | (used by some char-based transformations) 19 | """ 20 | 21 | def __init__(self, letters_to_insert=None): 22 | self.letters_to_insert = letters_to_insert 23 | if not self.letters_to_insert: 24 | self.letters_to_insert = string.ascii_letters 25 | 26 | def _get_replacement_words(self, word): 27 | """Returns a set of replacements given an input word. Must be overriden 28 | by specific word swap transformations. 29 | 30 | Args: 31 | word: The input word to find replacements for. 32 | """ 33 | raise NotImplementedError() 34 | 35 | def _get_random_letter(self): 36 | """Helper function that returns a random single letter from the English 37 | alphabet that could be lowercase or uppercase.""" 38 | return random.choice(self.letters_to_insert) 39 | 40 | def _get_transformations(self, current_text, indices_to_modify): 41 | words = current_text.words 42 | transformed_texts = [] 43 | 44 | for i in indices_to_modify: 45 | word_to_replace = words[i] 46 | replacement_words = self._get_replacement_words(word_to_replace) 47 | transformed_texts_idx = [] 48 | for r in replacement_words: 49 | if r == word_to_replace: 50 | continue 51 | transformed_texts_idx.append(current_text.replace_word_at_index(i, r)) 52 | transformed_texts.extend(transformed_texts_idx) 53 | 54 | return transformed_texts 55 | -------------------------------------------------------------------------------- /textattack/transformations/word_swap_contract.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by Contraction 3 | ============================================ 4 | """ 5 | 6 | from textattack.shared.data import EXTENSION_MAP 7 | from textattack.transformations import Transformation 8 | 9 | 10 | class WordSwapContract(Transformation): 11 | """Transforms an input by performing contraction on recognized 12 | combinations.""" 13 | 14 | reverse_contraction_map = {v: k for k, v in EXTENSION_MAP.items()} 15 | 16 | def _get_transformations(self, current_text, indices_to_modify): 17 | """Return all possible transformed sentences, each with one 18 | contraction.""" 19 | transformed_texts = [] 20 | 21 | words = current_text.words 22 | indices_to_modify = sorted(indices_to_modify) 23 | 24 | # search for every 2-words combination in reverse_contraction_map 25 | for idx, word_idx in enumerate(indices_to_modify[:-1]): 26 | next_idx = indices_to_modify[idx + 1] 27 | if (idx + 1) != next_idx: 28 | continue 29 | word = words[word_idx] 30 | next_word = words[next_idx] 31 | 32 | # generating the words to search for 33 | key = " ".join([word, next_word]) 34 | 35 | # when a possible contraction is found in map, contract the current text 36 | if key in self.reverse_contraction_map: 37 | transformed_text = current_text.replace_word_at_index( 38 | idx, self.reverse_contraction_map[key] 39 | ) 40 | transformed_text = transformed_text.delete_word_at_index(next_idx) 41 | transformed_texts.append(transformed_text) 42 | 43 | return transformed_texts 44 | -------------------------------------------------------------------------------- /textattack/transformations/word_swap_extend.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by Extension 3 | ============================================ 4 | """ 5 | 6 | from textattack.shared.data import EXTENSION_MAP 7 | from textattack.transformations import Transformation 8 | 9 | 10 | class WordSwapExtend(Transformation): 11 | """Transforms an input by performing extension on recognized 12 | combinations.""" 13 | 14 | def _get_transformations(self, current_text, indices_to_modify): 15 | """Return all possible transformed sentences, each with one 16 | extension.""" 17 | transformed_texts = [] 18 | words = current_text.words 19 | for idx in indices_to_modify: 20 | word = words[idx] 21 | # expend when word in map 22 | if word in EXTENSION_MAP: 23 | expanded = EXTENSION_MAP[word] 24 | transformed_text = current_text.replace_word_at_index(idx, expanded) 25 | transformed_texts.append(transformed_text) 26 | 27 | return transformed_texts 28 | -------------------------------------------------------------------------------- /textattack/transformations/word_swap_neighboring_character_swap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by Neighboring Character Swap 3 | ============================================ 4 | """ 5 | 6 | import numpy as np 7 | 8 | # from textattack.shared import utils 9 | from textattack.transformations.word_swap import WordSwap 10 | 11 | 12 | class WordSwapNeighboringCharacterSwap(WordSwap): 13 | """Transforms an input by replacing its words with a neighboring character 14 | swap. 15 | 16 | Args: 17 | random_one (bool): Whether to return a single word with two characters 18 | swapped. If not, returns all possible options. 19 | skip_first_char (bool): Whether to disregard perturbing the first 20 | character. 21 | skip_last_char (bool): Whether to disregard perturbing the last 22 | character. 23 | """ 24 | 25 | def __init__( 26 | self, random_one=True, skip_first_char=False, skip_last_char=False, **kwargs 27 | ): 28 | super().__init__(**kwargs) 29 | self.random_one = random_one 30 | self.skip_first_char = skip_first_char 31 | self.skip_last_char = skip_last_char 32 | 33 | def _get_replacement_words(self, word): 34 | """Returns a list containing all possible words with 1 pair of 35 | neighboring characters swapped.""" 36 | 37 | if len(word) <= 1: 38 | return [] 39 | 40 | candidate_words = [] 41 | 42 | start_idx = 1 if self.skip_first_char else 0 43 | end_idx = (len(word) - 2) if self.skip_last_char else (len(word) - 1) 44 | 45 | if start_idx >= end_idx: 46 | return [] 47 | 48 | if self.random_one: 49 | i = np.random.randint(start_idx, end_idx) 50 | candidate_word = word[:i] + word[i + 1] + word[i] + word[i + 2 :] 51 | candidate_words.append(candidate_word) 52 | else: 53 | for i in range(start_idx, end_idx): 54 | candidate_word = word[:i] + word[i + 1] + word[i] + word[i + 2 :] 55 | candidate_words.append(candidate_word) 56 | 57 | return candidate_words 58 | 59 | @property 60 | def deterministic(self): 61 | return not self.random_one 62 | 63 | def extra_repr_keys(self): 64 | return super().extra_repr_keys() + ["random_one"] 65 | -------------------------------------------------------------------------------- /textattack/transformations/word_swap_random_character_deletion.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by Random Character Deletion 3 | ========================================================== 4 | """ 5 | 6 | import numpy as np 7 | 8 | # from textattack.shared import utils 9 | from textattack.transformations.word_swap import WordSwap 10 | 11 | 12 | class WordSwapRandomCharacterDeletion(WordSwap): 13 | """Transforms an input by deleting its characters. 14 | 15 | Args: 16 | random_one (bool): Whether to return a single word with a random 17 | character deleted. If not, returns all possible options. 18 | skip_first_char (bool): Whether to disregard deleting the first 19 | character. 20 | skip_last_char (bool): Whether to disregard deleting the last 21 | character. 22 | """ 23 | 24 | def __init__( 25 | self, random_one=True, skip_first_char=False, skip_last_char=False, **kwargs 26 | ): 27 | super().__init__(**kwargs) 28 | self.random_one = random_one 29 | self.skip_first_char = skip_first_char 30 | self.skip_last_char = skip_last_char 31 | 32 | def _get_replacement_words(self, word): 33 | """Returns returns a list containing all possible words with 1 letter 34 | deleted.""" 35 | if len(word) <= 1: 36 | return [] 37 | 38 | candidate_words = [] 39 | 40 | start_idx = 1 if self.skip_first_char else 0 41 | end_idx = (len(word) - 1) if self.skip_last_char else len(word) 42 | 43 | if start_idx >= end_idx: 44 | return [] 45 | 46 | if self.random_one: 47 | i = np.random.randint(start_idx, end_idx) 48 | candidate_word = word[:i] + word[i + 1 :] 49 | candidate_words.append(candidate_word) 50 | else: 51 | for i in range(start_idx, end_idx): 52 | candidate_word = word[:i] + word[i + 1 :] 53 | candidate_words.append(candidate_word) 54 | 55 | return candidate_words 56 | 57 | @property 58 | def deterministic(self): 59 | return not self.random_one 60 | 61 | def extra_repr_keys(self): 62 | return super().extra_repr_keys() + ["random_one"] 63 | -------------------------------------------------------------------------------- /textattack/transformations/word_swap_random_character_insertion.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by Random Character Insertion 3 | ========================================================== 4 | 5 | """ 6 | import numpy as np 7 | 8 | # from textattack.shared import utils 9 | from textattack.transformations.word_swap import WordSwap 10 | 11 | 12 | class WordSwapRandomCharacterInsertion(WordSwap): 13 | """Transforms an input by inserting a random character. 14 | 15 | random_one (bool): Whether to return a single word with a random 16 | character deleted. If not, returns all possible options. 17 | skip_first_char (bool): Whether to disregard inserting as the first 18 | character. skip_last_char (bool): Whether to disregard inserting as 19 | the last character. 20 | """ 21 | 22 | def __init__( 23 | self, random_one=True, skip_first_char=False, skip_last_char=False, **kwargs 24 | ): 25 | super().__init__(**kwargs) 26 | self.random_one = random_one 27 | self.skip_first_char = skip_first_char 28 | self.skip_last_char = skip_last_char 29 | 30 | def _get_replacement_words(self, word): 31 | """Returns returns a list containing all possible words with 1 random 32 | character inserted.""" 33 | if len(word) <= 1: 34 | return [] 35 | 36 | candidate_words = [] 37 | 38 | start_idx = 1 if self.skip_first_char else 0 39 | end_idx = (len(word) - 1) if self.skip_last_char else len(word) 40 | 41 | if start_idx >= end_idx: 42 | return [] 43 | 44 | if self.random_one: 45 | i = np.random.randint(start_idx, end_idx) 46 | candidate_word = word[:i] + self._get_random_letter() + word[i:] 47 | candidate_words.append(candidate_word) 48 | else: 49 | for i in range(start_idx, end_idx): 50 | candidate_word = word[:i] + self._get_random_letter() + word[i:] 51 | candidate_words.append(candidate_word) 52 | 53 | return candidate_words 54 | 55 | @property 56 | def deterministic(self): 57 | return not self.random_one 58 | 59 | def extra_repr_keys(self): 60 | return super().extra_repr_keys() + ["random_one"] 61 | -------------------------------------------------------------------------------- /textattack/transformations/word_swap_random_character_substitution.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by Random Character Substitution 3 | ========================================================== 4 | """ 5 | import numpy as np 6 | 7 | # from textattack.shared import utils 8 | from textattack.transformations.word_swap import WordSwap 9 | 10 | 11 | class WordSwapRandomCharacterSubstitution(WordSwap): 12 | """Transforms an input by replacing one character in a word with a random 13 | new character. 14 | 15 | Args: 16 | random_one (bool): Whether to return a single word with a random 17 | character deleted. If not set, returns all possible options. 18 | """ 19 | 20 | def __init__(self, random_one=True, **kwargs): 21 | super().__init__(**kwargs) 22 | self.random_one = random_one 23 | 24 | def _get_replacement_words(self, word): 25 | """Returns returns a list containing all possible words with 1 letter 26 | substituted for a random letter.""" 27 | if len(word) <= 1: 28 | return [] 29 | 30 | candidate_words = [] 31 | 32 | if self.random_one: 33 | i = np.random.randint(0, len(word)) 34 | candidate_word = word[:i] + self._get_random_letter() + word[i + 1 :] 35 | candidate_words.append(candidate_word) 36 | else: 37 | for i in range(len(word)): 38 | candidate_word = word[:i] + self._get_random_letter() + word[i + 1 :] 39 | candidate_words.append(candidate_word) 40 | 41 | return candidate_words 42 | 43 | @property 44 | def deterministic(self): 45 | return not self.random_one 46 | 47 | def extra_repr_keys(self): 48 | return super().extra_repr_keys() + ["random_one"] 49 | -------------------------------------------------------------------------------- /textattack/transformations/word_swap_random_word.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by swaping the order of words 3 | ========================================================== 4 | """ 5 | 6 | 7 | import random 8 | 9 | from textattack.transformations import Transformation 10 | 11 | 12 | class RandomSwap(Transformation): 13 | """Transformation that swaps the order of words in a sequence.""" 14 | 15 | def _get_transformations(self, current_text, indices_to_modify): 16 | transformed_texts = [] 17 | words = current_text.words 18 | for idx in indices_to_modify: 19 | word = words[idx] 20 | swap_idxs = list(set(range(len(words))) - {idx}) 21 | if swap_idxs: 22 | swap_idx = random.choice(swap_idxs) 23 | swapped_text = current_text.replace_word_at_index( 24 | idx, words[swap_idx] 25 | ).replace_word_at_index(swap_idx, word) 26 | transformed_texts.append(swapped_text) 27 | return transformed_texts 28 | 29 | @property 30 | def deterministic(self): 31 | return False 32 | -------------------------------------------------------------------------------- /textattack/transformations/word_swap_wordnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by swaping synonyms in WordNet 3 | ========================================================== 4 | """ 5 | 6 | 7 | from nltk.corpus import wordnet 8 | 9 | import textattack 10 | from textattack.transformations.word_swap import WordSwap 11 | 12 | 13 | class WordSwapWordNet(WordSwap): 14 | """Transforms an input by replacing its words with synonyms provided by 15 | WordNet.""" 16 | 17 | def __init__(self, language="eng"): 18 | if language not in wordnet.langs(): 19 | raise ValueError(f"Language {language} not one of {wordnet.langs()}") 20 | self.language = language 21 | 22 | def _get_replacement_words(self, word, random=False): 23 | """Returns a list containing all possible words with 1 character 24 | replaced by a homoglyph.""" 25 | synonyms = set() 26 | for syn in wordnet.synsets(word, lang=self.language): 27 | for syn_word in syn.lemma_names(lang=self.language): 28 | if ( 29 | (syn_word != word) 30 | and ("_" not in syn_word) 31 | and (textattack.shared.utils.is_one_word(syn_word)) 32 | ): 33 | # WordNet can suggest phrases that are joined by '_' but we ignore phrases. 34 | synonyms.add(syn_word) 35 | return list(synonyms) 36 | -------------------------------------------------------------------------------- /trainer/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import importlib 3 | from .base import BaseTrainer 4 | 5 | import argparse 6 | 7 | TRAINER_REGISTRY = {'base':BaseTrainer} 8 | 9 | __all__ = [ 10 | "BaseTrainer" 11 | ] 12 | 13 | def register_trainer(name): 14 | def register_trainer_cls(cls): 15 | if name in TRAINER_REGISTRY: 16 | raise ValueError("Cannot register duplicate trainer ({})".format(name)) 17 | if not issubclass(cls, BaseTrainer): 18 | raise ValueError( 19 | "Model ({}: {}) must extend Trainer".format(name, cls.__name__) 20 | ) 21 | TRAINER_REGISTRY[name] = cls 22 | 23 | return cls 24 | return register_trainer_cls 25 | 26 | 27 | trainer_dir = os.path.dirname(__file__) 28 | for file in os.listdir(trainer_dir): 29 | path = os.path.join(trainer_dir, file) 30 | if ( 31 | not file.startswith("_") 32 | and not file.startswith(".") 33 | and (file.endswith(".py") or os.path.isdir(path)) 34 | ): 35 | model_name = file[: file.find(".py")] if file.endswith(".py") else file 36 | module = importlib.import_module("trainer." + model_name) 37 | 38 | if model_name in TRAINER_REGISTRY: 39 | parser = argparse.ArgumentParser(add_help=False) 40 | group_archs = parser.add_argument_group("Named Trainer") 41 | group_archs.add_argument( 42 | "--trainer", choices=TRAINER_REGISTRY[model_name] 43 | ) 44 | group_args = parser.add_argument_group("Additional command-line arguments") 45 | TRAINER_REGISTRY[model_name].add_args(group_args) 46 | globals()[model_name + "_parser"] = parser 47 | 48 | from .freelb import FreeLBTrainer 49 | from .pgd import PGDTrainer 50 | from .hotflip import HotflipTrainer 51 | from .ibp import IBPTrainer 52 | from .tavat import TokenAwareVirtualAdversarialTrainer 53 | from .infobert import InfoBertTrainer 54 | from .dne import DNETrainer 55 | from .mixup import MixUpTrainer 56 | # from .metric import EmbeddingLevelMetricTrainer 57 | # from .metric_token import TokenLevelMetricTrainer 58 | # from .mask import MaskTrainer 59 | from .safer import SAFERTrainer 60 | from .mask import MaskTrainer 61 | from .ascc import ASCCTrainer -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/__init__.py -------------------------------------------------------------------------------- /utils/certified/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/certified/__init__.py -------------------------------------------------------------------------------- /utils/certified/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/certified/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/certified/__pycache__/attacks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/certified/__pycache__/attacks.cpython-37.pyc -------------------------------------------------------------------------------- /utils/certified/__pycache__/data_util.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/certified/__pycache__/data_util.cpython-37.pyc -------------------------------------------------------------------------------- /utils/certified/__pycache__/ibp_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/certified/__pycache__/ibp_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/certified/__pycache__/vocabulary.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/certified/__pycache__/vocabulary.cpython-37.pyc -------------------------------------------------------------------------------- /utils/config.py: -------------------------------------------------------------------------------- 1 | from torch.nn import MSELoss, CrossEntropyLoss 2 | from transformers import BertConfig, BertForSequenceClassification, BertTokenizer 3 | from transformers import XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer 4 | from transformers import ElectraConfig, ElectraForSequenceClassification, ElectraTokenizer 5 | from transformers import AlbertConfig, AlbertForSequenceClassification, AlbertTokenizer 6 | from transformers import RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer 7 | 8 | 9 | MODEL_CLASSES = { 10 | # Note: there may be some bug in `dcnn` modeling, if you want to pretraining. 11 | 'bert': (BertConfig, BertForSequenceClassification, BertTokenizer), 12 | 'xlnet': (XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer), 13 | 'roberta': (RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer), 14 | 'albert': (AlbertConfig, AlbertForSequenceClassification, AlbertTokenizer), 15 | 'electra': (ElectraConfig, ElectraForSequenceClassification, ElectraTokenizer), 16 | } 17 | 18 | DATASET_LABEL_NUM = { 19 | 'sst2': 2, 20 | 'agnews': 4, 21 | 'imdb': 2, 22 | 'mr': 2, 23 | 'onlineshopping': 2, 24 | 'snli': 3, 25 | } 26 | 27 | LABEL_MAP = { 28 | 'nli': {'entailment': 0, 'contradiction': 1, 'neutral': 2}, 29 | 'agnews': {'0': 0, '1': 1, '2': 2, '3': 3}, 30 | 'binary': {'0': 0, '1': 1} 31 | } 32 | 33 | GLOVE_CONFIGS = { 34 | '6B.50d': {'size': 50, 'lines': 400000}, 35 | '840B.300d': {'size': 300, 'lines': 2196017} 36 | } 37 | -------------------------------------------------------------------------------- /utils/hook.py: -------------------------------------------------------------------------------- 1 | from torch import nn as nn 2 | 3 | class EmbeddingHook: 4 | forward_value = None 5 | backward_gradient = None 6 | 7 | @classmethod 8 | def fw_hook_layers(cls, module, inputs, outputs): 9 | cls.forward_value = outputs 10 | 11 | @classmethod 12 | def bw_hook_layers(cls, module, grad_in, grad_out): 13 | cls.backward_gradient = grad_out[0] 14 | 15 | @classmethod 16 | def register_embedding_hook(cls, embedding: nn.Embedding): 17 | fw_hook = embedding.register_forward_hook(cls.fw_hook_layers) 18 | bw_hook = embedding.register_backward_hook(cls.bw_hook_layers) 19 | return [fw_hook, bw_hook] 20 | 21 | @classmethod 22 | def reading_embedding_hook(cls): 23 | return cls.forward_value, cls.backward_gradient 24 | -------------------------------------------------------------------------------- /utils/luna/__init__.py: -------------------------------------------------------------------------------- 1 | from .public import * 2 | from .pytorch import * 3 | from .sequence import * 4 | from .logging import * 5 | from .tables import * 6 | from .ckpt_utils import * 7 | from .pretty_printing import * 8 | from .ram import * 9 | from .program_args import * 10 | -------------------------------------------------------------------------------- /utils/luna/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/adv_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/adv_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/cached_searcher.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/cached_searcher.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/ckpt_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/ckpt_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/logging.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/logging.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/pretty_printing.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/pretty_printing.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/program_args.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/program_args.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/public.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/public.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/pytorch.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/pytorch.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/ram.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/ram.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/searcher.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/searcher.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/sequence.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/sequence.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/tables.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/tables.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/__pycache__/word_index_searcher.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RockyLzy/TextDefender/06a50688225d1bd618f2a8efaa3334b732eaa527/utils/luna/__pycache__/word_index_searcher.cpython-37.pyc -------------------------------------------------------------------------------- /utils/luna/cached_searcher.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from typing import Union, Callable, Dict 3 | import json 4 | from collections import defaultdict, Counter 5 | from functools import lru_cache 6 | from .searcher import Searcher 7 | import numpy as np 8 | 9 | 10 | class CachedWordSearcher(Searcher): 11 | """ 12 | Load words from a json file 13 | """ 14 | def __init__( 15 | self, 16 | file_name: str, 17 | vocab_list, 18 | second_order: bool = False, 19 | ): 20 | super().__init__() 21 | loaded = json.load(open(file_name)) 22 | # filter by a given vocabulary 23 | if vocab_list: 24 | filtered = defaultdict(lambda: [], {}) 25 | for k in loaded: 26 | if k in vocab_list: 27 | for v in loaded[k]: 28 | if v in vocab_list: 29 | filtered[k].append(v) 30 | filtered = dict(filtered) 31 | else: 32 | filtered = loaded 33 | # add second order words 34 | if second_order: 35 | nbrs = defaultdict(lambda: [], {}) 36 | for k in filtered: 37 | for v in filtered[k]: 38 | nbrs[k].append(v) 39 | # some neighbours have no neighbours 40 | if v not in filtered: 41 | continue 42 | for vv in filtered[v]: 43 | if vv != k and vv not in nbrs[k]: 44 | nbrs[k].append(vv) 45 | nbrs = dict(nbrs) 46 | else: 47 | nbrs = filtered 48 | self.nbrs = nbrs 49 | 50 | def show_verbose(self): 51 | nbr_num = list(map(len, list(self.nbrs.values()))) 52 | print(f"total word: {len(self.nbrs)}, ", 53 | f"mean: {round(np.mean(nbr_num), 2)}, ", 54 | f"median: {round(np.median(nbr_num), 2)}, " 55 | f"max: {np.max(nbr_num)}, ") 56 | print(Counter(nbr_num)) 57 | 58 | def search(self, word): 59 | if word in self.nbrs: 60 | return self.nbrs[word] 61 | else: 62 | return [] 63 | 64 | -------------------------------------------------------------------------------- /utils/luna/ckpt_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | 4 | 5 | def checkpoint_paths(path, pattern=r'checkpoint@(\d+)\.pt'): 6 | """Retrieves all checkpoints found in `path` directory. 7 | Checkpoints are identified by matching filename to the specified pattern. If 8 | the pattern contains groups, the result will be sorted by the first group in 9 | descending order. 10 | """ 11 | pt_regexp = re.compile(pattern) 12 | files = os.listdir(path) 13 | 14 | entries = [] 15 | for i, f in enumerate(files): 16 | m = pt_regexp.fullmatch(f) 17 | if m is not None: 18 | idx = int(m.group(1)) if len(m.groups()) > 0 else i 19 | entries.append((idx, m.group(0))) 20 | return [os.path.join(path, x[1]) for x in sorted(entries, reverse=True)] 21 | 22 | 23 | # model_name = /aaa/bbb/ccc/model 24 | # find: 25 | # /aaa/bbb/ccc/model.1 26 | # /aaa/bbb/ccc/model.2 27 | # * /aaa/bbb/ccc/model.best 28 | 29 | def fetch_best_ckpt_name(model_path): 30 | model_name = model_path + '.best' 31 | if os.path.exists(model_name): 32 | print("Found checkpoint {}".format(model_name)) 33 | else: 34 | model_name = fetch_last_ckpt_name(model_path) 35 | print("Best checkpoint not found, use latest {} instead".format(model_name)) 36 | return model_name 37 | 38 | 39 | def fetch_last_ckpt_name(model_path): 40 | splash_index = model_path.rindex('/') 41 | model_folder = model_path[:splash_index] 42 | model_file = model_path[splash_index+1:] 43 | files = checkpoint_paths(model_folder,r'{}.(\d+)'.format(model_file)) 44 | return files[0] 45 | 46 | 47 | 48 | # print(fetch_last_ckpt_name("/disks/sdb/zjiehang/zhou_data/saved_models/word_tag/lzynb")) 49 | -------------------------------------------------------------------------------- /utils/luna/logging.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | import arrow 4 | from inspect import isfunction 5 | 6 | __log_path__ = "logs" 7 | globals()["__default_target__"] = 'c' 8 | 9 | 10 | def log_config(filename, 11 | default_target, 12 | log_path=__log_path__, 13 | append=False, 14 | ): 15 | if not os.path.exists(log_path): 16 | os.makedirs(log_path, exist_ok=True) 17 | log_time = arrow.now().format('MMMDD_HH-mm-ss') 18 | 19 | def __lazy(): 20 | return open("{}/{}.{}.txt".format(log_path, filename, log_time), 21 | "a" if append else "w") 22 | logger = __lazy 23 | globals()["__logger__"] = logger 24 | globals()["__default_target__"] = default_target 25 | 26 | 27 | def log(*info, target=None, color=None): 28 | if target is None: 29 | target = globals()["__default_target__"] 30 | assert target in ['c', 'f', 'cf', 'fc'] 31 | if len(info) == 1: 32 | info_str = str(info[0]) 33 | else: 34 | info = list(map(str, info)) 35 | info_str = " ".join(info) 36 | if 'c' in target: 37 | if isfunction(color): 38 | print(color(info_str)) 39 | else: 40 | print(info_str) 41 | if 'f' in target: 42 | logger = globals()["__logger__"] 43 | if isfunction(logger): 44 | logger = logger() 45 | globals()["__logger__"] = logger 46 | logger.write("{}\n".format(info_str)) 47 | logger.flush() 48 | 49 | 50 | log_buffer = [] # type:List 51 | 52 | 53 | def log_to_buffer(*info): 54 | for ele in info: 55 | log_buffer.append(ele) 56 | 57 | 58 | def log_flush_buffer(target=None): 59 | log("\n".join(log_buffer), target=target) 60 | log_buffer.clear() 61 | 62 | 63 | -------------------------------------------------------------------------------- /utils/luna/pretty_printing.py: -------------------------------------------------------------------------------- 1 | from colorama import Fore, Back 2 | 3 | 4 | class Color(object): 5 | @staticmethod 6 | def red(s): 7 | return Fore.RED + str(s) + Fore.RESET 8 | 9 | @staticmethod 10 | def green(s): 11 | return Fore.GREEN + str(s) + Fore.RESET 12 | 13 | @staticmethod 14 | def yellow(s): 15 | return Fore.YELLOW + str(s) + Fore.RESET 16 | 17 | @staticmethod 18 | def blue(s): 19 | return Fore.BLUE + str(s) + Fore.RESET 20 | 21 | @staticmethod 22 | def magenta(s): 23 | return Fore.MAGENTA + str(s) + Fore.RESET 24 | 25 | @staticmethod 26 | def cyan(s): 27 | return Fore.CYAN + str(s) + Fore.RESET 28 | 29 | @staticmethod 30 | def white(s): 31 | return Fore.WHITE + str(s) + Fore.RESET 32 | 33 | @staticmethod 34 | def white_green(s): 35 | return Fore.WHITE + Back.GREEN + str(s) + Fore.RESET + Back.RESET 36 | -------------------------------------------------------------------------------- /utils/luna/program_args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | class ProgramArgs: 5 | # Never use True/False ! 6 | def __init__(self): 7 | pass 8 | 9 | def _check_args(self): 10 | assert True 11 | 12 | def __repr__(self): 13 | basic_ret = "" 14 | for key, value in self.__dict__.items(): 15 | basic_ret += "\t--{}={}\n".format(key, value) 16 | 17 | deduced_ret = "" 18 | deduced_args = [ele for ele in dir(self) if ele[0] != '_' and ele not in self.__dict__] 19 | for key in deduced_args: 20 | deduced_ret += "\t--{}={}\n".format(key, getattr(self, key)) 21 | 22 | ret = "Basic Args:\n" + basic_ret 23 | if deduced_ret != "": 24 | ret += "Deduced Args:\n" + deduced_ret 25 | return ret 26 | 27 | def _parse_args(self): 28 | parser = argparse.ArgumentParser() 29 | bool_keys = [] 30 | for key, value in self.__dict__.items(): 31 | # Hack support for true/false 32 | if isinstance(value, bool): 33 | bool_keys.append(key) 34 | value = str(value) 35 | parser.add_argument('--{}'.format(key), 36 | action='store', 37 | default=value, 38 | type=type(value), 39 | dest=str(key)) 40 | parsed_args = parser.parse_args().__dict__ 41 | for ele in bool_keys: 42 | if parsed_args[ele] in ['True', 'true', 'on', '1', 'yes']: 43 | parsed_args[ele] = True 44 | elif parsed_args[ele] in ['False', 'false', 'off', '0', 'no']: 45 | parsed_args[ele] = False 46 | else: 47 | raise Exception('You must pass a boolean value for arg {}'.format(ele)) 48 | self.__dict__.update(parsed_args) 49 | self._check_args() 50 | return self 51 | -------------------------------------------------------------------------------- /utils/luna/registry.py: -------------------------------------------------------------------------------- 1 | REGISTRIES = {} 2 | 3 | def setup_registry(registry_name): 4 | if registry_name in REGISTRIES: 5 | raise ValueError(f'Cannot register duplicate registry {name}') 6 | REGISTRY = {} 7 | REGISTRIES[registry_name] = REGISTRY 8 | def register(name): 9 | 10 | def register_cls(cls): 11 | for k, v in REGISTRY.items(): 12 | if k == v.__name__: 13 | raise ValueError(f'Cannot register duplicate key {name}') 14 | if cls.__name__ == v: 15 | raise ValueError(f'Cannot register duplicate class name {cls.__name__}') 16 | REGISTRY[name] = cls 17 | return cls 18 | 19 | return register_cls 20 | 21 | return register, REGISTRY 22 | 23 | def get_registry(registry_name): 24 | return REGISTRIES[registry_name] -------------------------------------------------------------------------------- /utils/luna/requirements.txt: -------------------------------------------------------------------------------- 1 | colorama 2 | arrow 3 | psutil 4 | sklearn 5 | tabulate -------------------------------------------------------------------------------- /utils/luna/searcher.py: -------------------------------------------------------------------------------- 1 | class Searcher: 2 | def search(self, element): 3 | raise NotImplementedError 4 | 5 | def batch_search(self, elements): 6 | return [self.search(ele) for ele in elements] -------------------------------------------------------------------------------- /utils/luna/word_index_searcher.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Callable, Dict 2 | from functools import lru_cache 3 | from .searcher import Searcher 4 | 5 | 6 | class WordIndexSearcher(Searcher): 7 | def __init__( 8 | self, 9 | word_searcher, 10 | word2idx: Union[Callable, Dict], 11 | idx2word: Union[Callable, Dict] 12 | ): 13 | self._word_searcher = word_searcher 14 | if isinstance(word2idx, dict): 15 | self.word2idx = word2idx.__getitem__ 16 | else: 17 | self.word2idx = word2idx 18 | if isinstance(idx2word, dict): 19 | self.idx2word = idx2word.__getitem__ 20 | else: 21 | self.idx2word = idx2word 22 | 23 | @lru_cache(maxsize=None) 24 | def search(self, idx): 25 | words = self._word_searcher.search(self.idx2word(idx)) 26 | idxes = [self.word2idx(ele) for ele in words] 27 | assert 0 not in idxes, "Something must be wrong" 28 | return idxes 29 | 30 | -------------------------------------------------------------------------------- /utils/perturbation_creater.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import json 4 | import numpy as np 5 | 6 | from textattack.transformations import WordSwapEmbedding, WordSwapMaskedLM 7 | 8 | 9 | def create_perturbation_set_from_attacker(output_file_path, max_candidate): 10 | perturb = {} 11 | 12 | transformation = WordSwapEmbedding(max_candidates=max_candidate) 13 | 14 | word_list_file = '/home/lizongyi/.cache/textattack/word_embeddings/paragramcf/wordlist.pickle' 15 | word2index = np.load(word_list_file, allow_pickle=True) 16 | 17 | for word in word2index.keys(): 18 | perturb[word] = transformation._get_replacement_words(word) 19 | 20 | with open(f'{output_file_path}/textfooler_{max_candidate}.json', 'w', encoding='utf-8') as fout: 21 | json.dump(perturb, fout) 22 | 23 | 24 | if __name__ == '__main__': 25 | create_perturbation_set_from_attacker('/disks/sdb/lzy/adversarialBenchmark/dne_external_data', 50) 26 | -------------------------------------------------------------------------------- /utils/safer.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import torch 4 | import string 5 | import pickle 6 | from typing import Union, Dict 7 | 8 | 9 | class WordSubstitude: 10 | def __init__(self, table: Union[str, Dict]): 11 | ''' 12 | table: str or Dict, when type(table) is string, mean path, using pickle to load. 13 | ''' 14 | if isinstance(table, str): 15 | pkl_file = open(table, 'rb') 16 | self.table = pickle.load(pkl_file) 17 | pkl_file.close() 18 | else: 19 | self.table = table 20 | self.table_key = set(list(self.table.keys())) 21 | self.exclude = set(string.punctuation) 22 | 23 | def get_perturbed_batch(self, sentence: str, rep:int = 1): 24 | out_batch = [] 25 | sentence_in_list = sentence.split() 26 | for k in range(rep): 27 | tmp_sentence = [] 28 | for i in range(len(sentence_in_list)): 29 | token = sentence_in_list[i] 30 | if token[-1] in self.exclude: 31 | tmp_sentence.append(self.sample_from_table(token[0:-1]) + token[-1]) 32 | else: 33 | tmp_sentence.append(self.sample_from_table(token)) 34 | out_batch.append(' '.join(tmp_sentence)) 35 | return out_batch 36 | 37 | def sample_from_table(self, word): 38 | if word in self.table_key: 39 | tem_words = self.table[word]['set'] 40 | num_words = len(tem_words) 41 | index = np.random.randint(0, num_words) 42 | return tem_words[index] 43 | else: 44 | return word --------------------------------------------------------------------------------