├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── custom.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── check-formatting.yml │ ├── codeql-analysis.yml │ ├── make-docs.yml │ ├── publish-to-pypi.yml │ └── run-pytest.yml ├── .gitignore ├── .readthedocs.yaml ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── README_ZH.md ├── docs ├── 0_get_started │ ├── basic-Intro.rst │ ├── command_line_usage.md │ ├── installation.md │ └── quick_api_tour.rst ├── 1start │ ├── A2TforVanillaAT.md │ ├── FAQ.md │ ├── api-design-tips.md │ ├── attacks4Components.md │ ├── benchmark-search.md │ ├── multilingual-visualization.md │ ├── quality-SOTA-recipes.md │ ├── references.md │ ├── support.md │ └── what_is_an_adversarial_attack.md ├── 2notebook │ ├── .gitignore │ ├── 0_End_to_End.ipynb │ ├── 1_Introduction_and_Transformations.ipynb │ ├── 2_Constraints.ipynb │ ├── 3_Augmentations.ipynb │ ├── 4_Custom_Datasets_Word_Embedding.ipynb │ ├── Example_0_tensorflow.ipynb │ ├── Example_1_sklearn.ipynb │ ├── Example_2_allennlp.ipynb │ ├── Example_3_Keras.ipynb │ ├── Example_4_CamemBERT.ipynb │ ├── Example_5_Explain_BERT.ipynb │ └── Example_6_Chinese_Attack.ipynb ├── 3recipes │ ├── attack_recipes.rst │ ├── attack_recipes_cmd.md │ ├── augmenter_recipes.rst │ ├── augmenter_recipes_cmd.md │ └── models.md ├── Makefile ├── __init__.py ├── _static │ ├── css │ │ └── custom.css │ └── imgs │ │ ├── benchmark │ │ ├── search-example.pdf │ │ ├── search-fig1.png │ │ ├── search-fig2.png │ │ ├── search-table1.png │ │ ├── search-table2.png │ │ ├── search-table31.png │ │ ├── search-table32.png │ │ ├── table3.png │ │ ├── table4.png │ │ ├── table5-main.png │ │ ├── table7.png │ │ └── table9.png │ │ ├── intro │ │ ├── 01-categorized-attacks.png │ │ ├── ae_papers.png │ │ ├── mr_aes.png │ │ ├── mr_aes_table.png │ │ ├── pig_airliner.png │ │ ├── textattack_components.png │ │ └── textattack_ecosystem.png │ │ └── overview.png ├── api │ ├── attack.rst │ ├── attack_results.rst │ ├── attacker.rst │ ├── constraints.rst │ ├── datasets.rst │ ├── goal_functions.rst │ ├── search_methods.rst │ ├── trainer.rst │ └── transformations.rst ├── apidoc │ ├── textattack.attack_recipes.rst │ ├── textattack.attack_results.rst │ ├── textattack.augmentation.rst │ ├── textattack.commands.rst │ ├── textattack.constraints.grammaticality.language_models.google_language_model.rst │ ├── textattack.constraints.grammaticality.language_models.learning_to_write.rst │ ├── textattack.constraints.grammaticality.language_models.rst │ ├── textattack.constraints.grammaticality.rst │ ├── textattack.constraints.overlap.rst │ ├── textattack.constraints.pre_transformation.rst │ ├── textattack.constraints.rst │ ├── textattack.constraints.semantics.rst │ ├── textattack.constraints.semantics.sentence_encoders.infer_sent.rst │ ├── textattack.constraints.semantics.sentence_encoders.rst │ ├── textattack.constraints.semantics.sentence_encoders.sentence_bert.rst │ ├── textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder.rst │ ├── textattack.datasets.helpers.rst │ ├── textattack.datasets.rst │ ├── textattack.goal_function_results.rst │ ├── textattack.goal_functions.classification.rst │ ├── textattack.goal_functions.rst │ ├── textattack.goal_functions.text.rst │ ├── textattack.llms.rst │ ├── textattack.loggers.rst │ ├── textattack.metrics.attack_metrics.rst │ ├── textattack.metrics.quality_metrics.rst │ ├── textattack.metrics.rst │ ├── textattack.models.helpers.rst │ ├── textattack.models.rst │ ├── textattack.models.tokenizers.rst │ ├── textattack.models.wrappers.rst │ ├── textattack.prompt_augmentation.rst │ ├── textattack.rst │ ├── textattack.search_methods.rst │ ├── textattack.shared.rst │ ├── textattack.shared.utils.rst │ ├── textattack.transformations.rst │ ├── textattack.transformations.sentence_transformations.rst │ ├── textattack.transformations.word_insertions.rst │ ├── textattack.transformations.word_merges.rst │ └── textattack.transformations.word_swaps.rst ├── conf.py ├── environment.yml ├── favicon.png ├── index.rst ├── make.bat └── requirements.txt ├── examples ├── attack │ ├── attack_camembert.py │ ├── attack_from_components.sh │ ├── attack_huggingface_deepwordbug.sh │ ├── attack_keras_parallel.py │ └── attack_roberta_sst2_textfooler.sh ├── augmentation │ ├── .gitignore │ ├── augment.csv │ ├── augment.sh │ └── examples.csv ├── dataset │ └── chinese_data_demo.tsv └── train │ ├── train_albert_snli_entailment.sh │ ├── train_bert_stsb_similarity.sh │ ├── train_lstm_imdb_sentiment_classification.sh │ └── train_lstm_rotten_tomatoes_sentiment_classification.sh ├── pytest.ini ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── sample_inputs │ ├── __init__.py │ ├── attack_from_file.py │ ├── augment.csv.txt │ └── sst_model_and_dataset.py ├── sample_outputs │ ├── augment_test.csv.txt │ ├── csv_attack_log.csv │ ├── eval_model_hub_rt.txt │ ├── eval_snli.txt │ ├── interactive_mode.txt │ ├── json_attack_summary.json │ ├── kuleshov_cnn_sst_2.txt │ ├── list_augmentation_recipes.txt │ ├── run_attack_cnn_cola.txt │ ├── run_attack_deepwordbug_bert_snli_10.txt │ ├── run_attack_deepwordbug_lstm_mr_2.txt │ ├── run_attack_faster_alzantot_recipe.txt │ ├── run_attack_flair_pos_tagger_bert_score.txt │ ├── run_attack_from_file.txt │ ├── run_attack_gradient_greedy_word_wir.txt │ ├── run_attack_hotflip_lstm_mr_4.txt │ ├── run_attack_hotflip_lstm_mr_4_adv_metrics.txt │ ├── run_attack_nonoverlapping_t5ende_editdistance_bleu.txt │ ├── run_attack_stanza_pos_tagger.txt │ ├── run_attack_targetedclassification2_wordnet_langtool_log-to-csv_beamsearch2_attack_n.txt │ ├── run_attack_transformers_datasets.txt │ ├── run_attack_transformers_datasets_adv_metrics.txt │ └── txt_attack_log.txt ├── test_attacked_text.py ├── test_augment_api.py ├── test_command_line │ ├── helpers.py │ ├── test_attack.py │ ├── test_augment.py │ ├── test_eval.py │ ├── test_list.py │ ├── test_loggers.py │ ├── test_train.py │ └── update_test_outputs.py ├── test_constraints │ └── test_pretransformation_constraints.py ├── test_metric_api.py ├── test_prompt_augmentation.py ├── test_tokenizers.py ├── test_transformations.py └── test_word_embedding.py └── textattack ├── __init__.py ├── __main__.py ├── attack.py ├── attack_args.py ├── attack_recipes ├── __init__.py ├── a2t_yoo_2021.py ├── attack_recipe.py ├── bae_garg_2019.py ├── bert_attack_li_2020.py ├── checklist_ribeiro_2020.py ├── chinese_recipe.py ├── clare_li_2020.py ├── deepwordbug_gao_2018.py ├── faster_genetic_algorithm_jia_2019.py ├── french_recipe.py ├── genetic_algorithm_alzantot_2018.py ├── hotflip_ebrahimi_2017.py ├── iga_wang_2019.py ├── input_reduction_feng_2018.py ├── kuleshov_2017.py ├── morpheus_tan_2020.py ├── pruthi_2019.py ├── pso_zang_2020.py ├── pwws_ren_2019.py ├── seq2sick_cheng_2018_blackbox.py ├── spanish_recipe.py ├── textbugger_li_2018.py └── textfooler_jin_2019.py ├── attack_results ├── __init__.py ├── attack_result.py ├── failed_attack_result.py ├── maximized_attack_result.py ├── skipped_attack_result.py └── successful_attack_result.py ├── attacker.py ├── augment_args.py ├── augmentation ├── __init__.py ├── augmenter.py └── recipes.py ├── commands ├── __init__.py ├── attack_command.py ├── attack_resume_command.py ├── augment_command.py ├── benchmark_recipe_command.py ├── eval_model_command.py ├── list_things_command.py ├── peek_dataset_command.py ├── textattack_cli.py ├── textattack_command.py └── train_model_command.py ├── constraints ├── __init__.py ├── constraint.py ├── grammaticality │ ├── __init__.py │ ├── cola.py │ ├── language_models │ │ ├── __init__.py │ │ ├── google_language_model │ │ │ ├── __init__.py │ │ │ ├── alzantot_goog_lm.py │ │ │ ├── google_language_model.py │ │ │ ├── lm_data_utils.py │ │ │ └── lm_utils.py │ │ ├── gpt2.py │ │ ├── language_model_constraint.py │ │ └── learning_to_write │ │ │ ├── __init__.py │ │ │ ├── adaptive_softmax.py │ │ │ ├── language_model_helpers.py │ │ │ ├── learning_to_write.py │ │ │ └── rnn_model.py │ ├── language_tool.py │ └── part_of_speech.py ├── overlap │ ├── __init__.py │ ├── bleu_score.py │ ├── chrf_score.py │ ├── levenshtein_edit_distance.py │ ├── max_words_perturbed.py │ └── meteor_score.py ├── pre_transformation │ ├── __init__.py │ ├── input_column_modification.py │ ├── max_modification_rate.py │ ├── max_num_words_modified.py │ ├── max_word_index_modification.py │ ├── min_word_length.py │ ├── repeat_modification.py │ ├── stopword_modification.py │ ├── unmodifiable_indices.py │ └── unmodifiable_phrases.py ├── pre_transformation_constraint.py └── semantics │ ├── __init__.py │ ├── bert_score.py │ ├── sentence_encoders │ ├── __init__.py │ ├── infer_sent │ │ ├── __init__.py │ │ ├── infer_sent.py │ │ └── infer_sent_model.py │ ├── sentence_bert │ │ ├── __init__.py │ │ └── sbert.py │ ├── sentence_encoder.py │ ├── thought_vector.py │ └── universal_sentence_encoder │ │ ├── __init__.py │ │ ├── multilingual_universal_sentence_encoder.py │ │ └── universal_sentence_encoder.py │ └── word_embedding_distance.py ├── dataset_args.py ├── datasets ├── __init__.py ├── dataset.py ├── helpers │ ├── __init__.py │ └── ted_multi.py └── huggingface_dataset.py ├── goal_function_results ├── __init__.py ├── classification_goal_function_result.py ├── goal_function_result.py └── text_to_text_goal_function_result.py ├── goal_functions ├── __init__.py ├── classification │ ├── __init__.py │ ├── classification_goal_function.py │ ├── hardlabel_classification.py │ ├── input_reduction.py │ ├── targeted_classification.py │ └── untargeted_classification.py ├── goal_function.py └── text │ ├── __init__.py │ ├── minimize_bleu.py │ ├── non_overlapping_output.py │ └── text_to_text_goal_function.py ├── llms ├── __init__.py ├── chat_gpt_wrapper.py └── huggingface_llm_wrapper.py ├── loggers ├── __init__.py ├── attack_log_manager.py ├── csv_logger.py ├── file_logger.py ├── json_summary_logger.py ├── logger.py ├── visdom_logger.py └── weights_and_biases_logger.py ├── metrics ├── __init__.py ├── attack_metrics │ ├── __init__.py │ ├── attack_queries.py │ ├── attack_success_rate.py │ └── words_perturbed.py ├── metric.py ├── quality_metrics │ ├── __init__.py │ ├── bert_score.py │ ├── meteor_score.py │ ├── perplexity.py │ ├── sentence_bert.py │ └── use.py └── recipe.py ├── model_args.py ├── models ├── README.md ├── __init__.py ├── helpers │ ├── __init__.py │ ├── glove_embedding_layer.py │ ├── lstm_for_classification.py │ ├── t5_for_text_to_text.py │ ├── utils.py │ └── word_cnn_for_classification.py ├── tokenizers │ ├── __init__.py │ ├── glove_tokenizer.py │ └── t5_tokenizer.py └── wrappers │ ├── __init__.py │ ├── huggingface_model_wrapper.py │ ├── model_wrapper.py │ ├── pytorch_model_wrapper.py │ ├── sklearn_model_wrapper.py │ └── tensorflow_model_wrapper.py ├── prompt_augmentation ├── __init__.py └── prompt_augmentation_pipeline.py ├── search_methods ├── __init__.py ├── alzantot_genetic_algorithm.py ├── beam_search.py ├── genetic_algorithm.py ├── greedy_search.py ├── greedy_word_swap_wir.py ├── improved_genetic_algorithm.py ├── particle_swarm_optimization.py ├── population_based_search.py └── search_method.py ├── shared ├── __init__.py ├── attacked_text.py ├── checkpoint.py ├── chinese_homophone_char.txt ├── data.py ├── utils │ ├── __init__.py │ ├── importing.py │ ├── install.py │ ├── misc.py │ ├── strings.py │ └── tensor.py ├── validators.py └── word_embeddings.py ├── trainer.py ├── training_args.py └── transformations ├── __init__.py ├── composite_transformation.py ├── sentence_transformations ├── __init__.py ├── back_transcription.py ├── back_translation.py └── sentence_transformation.py ├── transformation.py ├── word_deletion.py ├── word_innerswap_random.py ├── word_insertions ├── __init__.py ├── word_insertion.py ├── word_insertion_masked_lm.py └── word_insertion_random_synonym.py ├── word_merges ├── __init__.py ├── word_merge.py └── word_merge_masked_lm.py └── word_swaps ├── __init__.py ├── chn_transformations ├── __init__.py ├── chinese_homophone_character_swap.py ├── chinese_morphonym_character_swap.py ├── chinese_word_swap_hownet.py └── chinese_word_swap_masked.py ├── word_swap.py ├── word_swap_change_location.py ├── word_swap_change_name.py ├── word_swap_change_number.py ├── word_swap_contract.py ├── word_swap_embedding.py ├── word_swap_extend.py ├── word_swap_gradient_based.py ├── word_swap_homoglyph_swap.py ├── word_swap_hownet.py ├── word_swap_inflections.py ├── word_swap_masked_lm.py ├── word_swap_neighboring_character_swap.py ├── word_swap_qwerty.py ├── word_swap_random_character_deletion.py ├── word_swap_random_character_insertion.py ├── word_swap_random_character_substitution.py └── word_swap_wordnet.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Run following command `textattack ...` 16 | 2. Run following code ... 17 | 4. See error 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Screenshots or Traceback** 23 | If applicable, add screenshots to help explain your problem. Also, copy and paste tracebacks produced by the bug. 24 | 25 | **System Information (please complete the following information):** 26 | - OS: [e.g. MacOS, Linux, Windows] 27 | - Library versions (e.g. `torch==1.7.0, transformers==3.3.0`) 28 | - Textattack version 29 | 30 | **Additional context** 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # What does this PR do? 2 | 3 | ## Summary 4 | *Example: This PR adds [CLARE](https://arxiv.org/abs/2009.07502) attack, which uses distilled RoBERTa masked language model to perform word swaps, word insertions, word merges (which is where we combine two adjacent words and replace it with another word) in a greedy manner. s* 5 | 6 | ## Additions 7 | - *Example: Added `clare` recipe as `textattack.attack_recipes.CLARE2020`.* 8 | 9 | ## Changes 10 | - *Example: `WordSwapMaskedLM` has been updated to have a minimum confidence score cutoff and batching has been added for faster performance.* 11 | 12 | ## Deletions 13 | - *Example: Remove unnecessary files under `textattack.models...`* 14 | 15 | ## Checklist 16 | - [ ] The title of your pull request should be a summary of its contribution. 17 | - [ ] Please write detailed description of what parts have been newly added and what parts have been modified. Please also explain why certain changes were made. 18 | - [ ] If your pull request addresses an issue, please mention the issue number in the pull request description to make sure they are linked (and people consulting the issue know you are working on it) 19 | - [ ] To indicate a work in progress please mark it as a draft on Github. 20 | - [ ] Make sure existing tests pass. 21 | - [ ] Add relevant tests. No quality testing = no merge. 22 | - [ ] All public methods must have informative docstrings that work nicely with sphinx. For new modules/files, please add/modify the appropriate `.rst` file in `TextAttack/docs/apidoc`.' 23 | -------------------------------------------------------------------------------- /.github/workflows/check-formatting.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Formatting with black & isort 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [3.9] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip setuptools wheel 29 | python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537 30 | pip install -e .[dev] 31 | pip install black flake8 isort --upgrade # Testing packages 32 | - name: Check code format with black and isort 33 | run: | 34 | make lint 35 | -------------------------------------------------------------------------------- /.github/workflows/make-docs.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Build documentation with Sphinx 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [3.8] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | sudo sed -i 's/azure\.//' /etc/apt/sources.list # workaround for flaky pandoc install 29 | sudo apt-get update # from here https://github.com/actions/virtual-environments/issues/675 30 | sudo apt-get install pandoc -o Acquire::Retries=3 # install pandoc 31 | python -m pip install --upgrade pip setuptools wheel # update python 32 | pip install ipython --upgrade # needed for Github for whatever reason 33 | python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537 34 | pip install -e .[dev] 35 | pip install jupyter 'ipykernel<5.0.0' 'ipython<7.0.0' # ipykernel workaround: github.com/jupyter/notebook/issues/4050 36 | - name: Build docs with Sphinx and check for errors 37 | run: | 38 | sphinx-build -b html docs docs/_build/html 39 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package to PyPI 5 | 6 | on: 7 | release: 8 | types: [published] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip setuptools wheel 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # cache for test output 2 | .cache 3 | 4 | # compiled python files 5 | __pycache__/ 6 | 7 | # run logs 8 | outputs/ 9 | 10 | # IDE files 11 | .c9* 12 | .idea/ 13 | 14 | # Jupyter notebook files 15 | .ipynb_checkpoints/ 16 | 17 | # Sphinx documentation 18 | docs/_build/ 19 | 20 | # Packaging 21 | *.egg-info/ 22 | 23 | # Files from IDES 24 | .*.py 25 | 26 | # TF Hub modules 27 | tensorflow-hub 28 | 29 | # for Macs 30 | .DS_Store 31 | 32 | # build outputs for PyPI 33 | build/ 34 | dist/ 35 | 36 | # Weights & Biases outputs 37 | wandb/ 38 | 39 | # Tensorboard logs 40 | runs/ 41 | 42 | # checkpoints 43 | checkpoints/ 44 | 45 | # vim 46 | *.swp 47 | 48 | .vscode 49 | *.csv 50 | !tests/sample_outputs/csv_attack_log.csv 51 | tests/test_command_line/attack_log.txt 52 | textattack/=22.3.0 53 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the OS, Python version and other tools you might need 9 | build: 10 | os: ubuntu-20.04 11 | tools: 12 | python: "3.8" 13 | # You can also specify other tool versions: 14 | # nodejs: "20" 15 | # rust: "1.70" 16 | # golang: "1.20" 17 | jobs: 18 | post_checkout: 19 | - git fetch --unshallow || true 20 | 21 | # Build documentation in the docs/ directory with Sphinx 22 | sphinx: 23 | configuration: docs/conf.py 24 | 25 | # Optionally set the version of Python and requirements required to build your docs 26 | python: 27 | install: 28 | - requirements: docs/requirements.txt 29 | - requirements: requirements.txt 30 | - method: pip 31 | path: . 32 | extra_requirements: 33 | - docs 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 QData 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PEP_IGNORE_ERRORS="C901 E501 W503 E203 E231 E266 F403" 2 | 3 | format: FORCE ## Run black and isort (rewriting files) 4 | black . 5 | isort --atomic tests textattack 6 | docformatter --in-place --recursive textattack tests 7 | 8 | lint: FORCE ## Run black, isort, flake8 (in check mode) 9 | black . --check 10 | isort --check-only tests textattack 11 | flake8 . --count --ignore=$(PEP_IGNORE_ERRORS) --show-source --statistics --exclude=./.*,build,dist 12 | 13 | test: FORCE ## Run tests using pytest 14 | python -m pytest --dist=loadfile -n auto 15 | 16 | docs: FORCE ## Build docs using Sphinx. 17 | sphinx-build -b html docs docs/_build/html 18 | 19 | docs-check: FORCE ## Builds docs using Sphinx. If there is an error, exit with an error code (instead of warning & continuing). 20 | sphinx-build -b html docs docs/_build/html 21 | 22 | docs-auto: FORCE ## Build docs using Sphinx and run hotreload server using Sphinx autobuild. 23 | sphinx-autobuild docs docs/_build/html --port 8765 24 | 25 | all: format lint docs-check test ## Format, lint, and test. 26 | 27 | .PHONY: help 28 | 29 | .DEFAULT_GOAL := help 30 | 31 | help: 32 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 33 | 34 | FORCE: 35 | -------------------------------------------------------------------------------- /docs/0_get_started/quick_api_tour.rst: -------------------------------------------------------------------------------- 1 | Quick Tour 2 | ========================== 3 | 4 | Let us have a quick look at how TextAttack can be used to carry out adversarial attack. 5 | 6 | Attacking a BERT model 7 | ------------------------------ 8 | Let us attack a BERT model fine-tuned for sentimental classification task. We are going to use a model that has already been fine-tuned on IMDB dataset using the Transformers library. 9 | 10 | .. code-block:: 11 | 12 | >>> import transformers 13 | >>> model = transformers.AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-imdb") 14 | >>> tokenizer = transformers.AutoTokenizer.from_pretrained("textattack/bert-base-uncased-imdb") 15 | 16 | 17 | TextAttack requires both the model and the tokenizer to be wrapped by a :class:`~transformers.models.wrapper.ModelWrapper` class that implements the forward pass operation given a list of input texts. For models provided by Transformers library, we can also simply use :class:`~transformers.models.wrapper.HuggingFaceModelWrapper` class which implements both the forward pass and tokenization. 18 | 19 | .. code-block:: 20 | 21 | >>> import textattack 22 | >>> model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer) 23 | 24 | Next, let's build the attack that we want to use. TextAttack provides prebuilt attacks in the form of :class:`~transformers.attack_recipes.AttackRecipe`. For this example, we will use :ref:TextFooler attack 25 | 26 | 27 | .. code-block:: 28 | 29 | >>> dataset = textattack.datasets.HuggingFaceDataset("imdb", split="test") 30 | >>> attack = textattack.attack_recipes.TextFoolerJin2019.build(model_wrapper) 31 | >>> # Attack 20 samples with CSV logging and checkpoint saved every 5 interval 32 | >>> attack_args = textattack.AttackArgs(num_examples=20, log_to_csv="log.csv", checkpoint_interval=5, checkpoint_dir="checkpoints", disable_stdout=True) 33 | >>> attacker = textattack.Attacker(attack, dataset, attack_args) 34 | >>> attacker.attack_dataset() 35 | 36 | 37 | .. image:: ../_static/imgs/overview.png 38 | -------------------------------------------------------------------------------- /docs/2notebook/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # First run "sphinx-apidoc -f -o apidoc -d 6 -E -T -M ../textattack" 5 | # Then run "make html" 6 | 7 | # You can set these variables from the command line, and also 8 | # from the environment for the first two. 9 | SPHINXOPTS ?= 10 | SPHINXBUILD ?= sphinx-build 11 | SOURCEDIR = . 12 | BUILDDIR = _build 13 | 14 | # Put it first so that "make" without argument is like "make help". 15 | help: 16 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 17 | 18 | # For autobuild 19 | # https://pypi.org/project/sphinx-autobuild/ 20 | livehtml: 21 | sphinx-autobuild -b html $(SPHINXOPTS) "$(BUILDDIR)/html" 22 | 23 | .PHONY: help Makefile 24 | 25 | # Catch-all target: route all unknown targets to Sphinx using the new 26 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 27 | %: Makefile 28 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 29 | 30 | -------------------------------------------------------------------------------- /docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/__init__.py -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | div.wy-side-nav-search .version { 2 | color: #404040; 3 | font-weight: bold; 4 | } 5 | 6 | nav.wy-nav-top { 7 | background: #AA2396; 8 | } 9 | 10 | div.wy-nav-content { 11 | max-width: 1000px; 12 | } 13 | 14 | span.caption-text { 15 | color: #cc4878; 16 | } 17 | 18 | /* Change header fonts to Cambria */ 19 | .rst-content .toctree-wrapper>p.caption, h1, h2, h3, h4, h5, h6, legend { 20 | font-family: 'Cambria', serif; 21 | } 22 | 23 | /* Change non-header default fonts to Helvetica */ 24 | /** { 25 | font-family: 'Helvetica', sans-serif; 26 | }*/ -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/search-example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/search-example.pdf -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/search-fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/search-fig1.png -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/search-fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/search-fig2.png -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/search-table1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/search-table1.png -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/search-table2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/search-table2.png -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/search-table31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/search-table31.png -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/search-table32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/search-table32.png -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/table3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/table3.png -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/table4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/table4.png -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/table5-main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/table5-main.png -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/table7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/table7.png -------------------------------------------------------------------------------- /docs/_static/imgs/benchmark/table9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/benchmark/table9.png -------------------------------------------------------------------------------- /docs/_static/imgs/intro/01-categorized-attacks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/intro/01-categorized-attacks.png -------------------------------------------------------------------------------- /docs/_static/imgs/intro/ae_papers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/intro/ae_papers.png -------------------------------------------------------------------------------- /docs/_static/imgs/intro/mr_aes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/intro/mr_aes.png -------------------------------------------------------------------------------- /docs/_static/imgs/intro/mr_aes_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/intro/mr_aes_table.png -------------------------------------------------------------------------------- /docs/_static/imgs/intro/pig_airliner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/intro/pig_airliner.png -------------------------------------------------------------------------------- /docs/_static/imgs/intro/textattack_components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/intro/textattack_components.png -------------------------------------------------------------------------------- /docs/_static/imgs/intro/textattack_ecosystem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/intro/textattack_ecosystem.png -------------------------------------------------------------------------------- /docs/_static/imgs/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/_static/imgs/overview.png -------------------------------------------------------------------------------- /docs/api/attack.rst: -------------------------------------------------------------------------------- 1 | Attack API Reference 2 | ======================= 3 | 4 | Attack 5 | ------------ 6 | Attack is composed of four components: 7 | 8 | - :ref:`Goal Functions `: stipulate the goal of the attack, like to change the prediction score of a classification model, or to change all of the words in a translation output. 9 | - :ref:`Constraints `: determine if a potential perturbation is valid with respect to the original input. 10 | - :ref:`Transformations `: take a text input and transform it by inserting and deleting characters, words, and/or phrases. 11 | - :ref:`Search Methods `: explore the space of possible **transformations** within the defined **constraints** and attempt to find a successful perturbation which satisfies the **goal function**. 12 | 13 | The :class:`~textattack.Attack` class represents an adversarial attack composed of a goal function, search method, transformation, and constraints. 14 | 15 | .. autoclass:: textattack.Attack 16 | :members: 17 | 18 | AttackRecipe 19 | ------------- 20 | Attack recipe is a subclass of :class:`~textattack.Attack` class that has a special method :meth:`build` which 21 | returns a pre-built :class:`~textattack.Attack` that correspond to attacks from the literature. 22 | 23 | 24 | 25 | .. autoclass:: textattack.attack_recipes.AttackRecipe 26 | :members: 27 | -------------------------------------------------------------------------------- /docs/api/attack_results.rst: -------------------------------------------------------------------------------- 1 | Attack Result API Reference 2 | ============================ 3 | 4 | AttackResult 5 | ------------- 6 | .. autoclass:: textattack.attack_results.AttackResult 7 | :members: 8 | 9 | SuccessfulAttackResult 10 | ----------------------- 11 | .. autoclass:: textattack.attack_results.SuccessfulAttackResult 12 | :members: 13 | 14 | FailedAttackResult 15 | ----------------------- 16 | .. autoclass:: textattack.attack_results.FailedAttackResult 17 | :members: 18 | 19 | SkippedAttackResult 20 | ----------------------- 21 | .. autoclass:: textattack.attack_results.SkippedAttackResult 22 | :members: 23 | 24 | MaximizedAttackResult 25 | ----------------------- 26 | .. autoclass:: textattack.attack_results.MaximizedAttackResult 27 | :members: 28 | -------------------------------------------------------------------------------- /docs/api/attacker.rst: -------------------------------------------------------------------------------- 1 | Attacker API Reference 2 | ======================= 3 | 4 | Attacker 5 | ------------- 6 | While :class:`~textattack.Attack` is the main class used to carry out the adversarial attack, it is only useful for attacking one example at a time. 7 | It lacks features that support attacking multiple samples in parallel (i.e. multi-GPU), saving checkpoints, or logging results to text file, CSV file, or wandb. 8 | :class:`~textattack.Attacker` provides these features in an easy-to-use API. 9 | 10 | .. autoclass:: textattack.Attacker 11 | :members: 12 | 13 | 14 | AttackArgs 15 | ------------- 16 | :class:`~textattack.AttackArgs` represents arguments to be passed to :class:`~textattack.Attacker`, such as number of examples to attack, interval at which to save checkpoints, logging details. 17 | 18 | .. autoclass:: textattack.AttackArgs 19 | :members: 20 | -------------------------------------------------------------------------------- /docs/api/constraints.rst: -------------------------------------------------------------------------------- 1 | Constraints API Reference 2 | ============================ 3 | 4 | Constraint 5 | ------------ 6 | .. autoclass:: textattack.constraints.Constraint 7 | :members: 8 | 9 | PreTransformationConstraint 10 | ----------------------------- 11 | .. autoclass:: textattack.constraints.PreTransformationConstraint 12 | :members: 13 | 14 | -------------------------------------------------------------------------------- /docs/api/datasets.rst: -------------------------------------------------------------------------------- 1 | Datasets API Reference 2 | ============================= 3 | Dataset class define the dataset object used to for carrying out attacks, augmentation, and training. 4 | :class:`~textattack.datasets.Dataset` class is the most basic class that could be used to wrap a list of input and output pairs. 5 | To load datasets from text, CSV, or JSON files, we recommend using 🤗 Datasets library to first 6 | load it as a :obj:`datasets.Dataset` object and then pass it to TextAttack's :class:`~textattack.datasets.HuggingFaceDataset` class. 7 | 8 | Dataset 9 | ---------- 10 | .. autoclass:: textattack.datasets.Dataset 11 | :members: __getitem__, __len__ 12 | 13 | HuggingFaceDataset 14 | ------------------- 15 | .. autoclass:: textattack.datasets.HuggingFaceDataset 16 | :members: __getitem__, __len__ 17 | -------------------------------------------------------------------------------- /docs/api/goal_functions.rst: -------------------------------------------------------------------------------- 1 | Goal Functions API Reference 2 | ============================ 3 | 4 | :class:`~textattack.goal_functions.GoalFunction` determines both the conditions under which the attack is successful (in terms of the model outputs) 5 | and the heuristic score that we want to maximize when searching for the solution. 6 | 7 | GoalFunction 8 | ------------ 9 | .. autoclass:: textattack.goal_functions.GoalFunction 10 | :members: 11 | 12 | ClassificationGoalFunction 13 | -------------------------- 14 | .. autoclass:: textattack.goal_functions.classification.ClassificationGoalFunction 15 | :members: 16 | 17 | TargetedClassification 18 | ---------------------- 19 | .. autoclass:: textattack.goal_functions.classification.TargetedClassification 20 | :members: 21 | 22 | UntargetedClassification 23 | ------------------------ 24 | .. autoclass:: textattack.goal_functions.classification.UntargetedClassification 25 | :members: 26 | 27 | InputReduction 28 | -------------- 29 | .. autoclass:: textattack.goal_functions.classification.InputReduction 30 | :members: 31 | 32 | TextToTextGoalFunction 33 | ----------------------- 34 | .. autoclass:: textattack.goal_functions.text.TextToTextGoalFunction 35 | :members: 36 | 37 | MinimizeBleu 38 | ------------- 39 | .. autoclass:: textattack.goal_functions.text.MinimizeBleu 40 | :members: 41 | 42 | NonOverlappingOutput 43 | ---------------------- 44 | .. autoclass:: textattack.goal_functions.text.NonOverlappingOutput 45 | :members: 46 | 47 | -------------------------------------------------------------------------------- /docs/api/search_methods.rst: -------------------------------------------------------------------------------- 1 | Search Methods API Reference 2 | ============================ 3 | 4 | :class:`~textattack.search_methods.SearchMethod` attempts to find the optimal set of perturbations that will produce an adversarial example. 5 | Finding such optimal perturbations becomes a combinatorial optimization problem, and search methods are typically heuristic search algorithms designed 6 | to solve the underlying combinatorial problem. 7 | 8 | More in-depth study of search algorithms for NLP adversarial attacks can be found in the following work 9 | `Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples `_ 10 | by Jin Yong Yoo, John X. Morris, Eli Lifland, and Yanjun Qi. 11 | 12 | SearchMethod 13 | ------------ 14 | .. autoclass:: textattack.search_methods.SearchMethod 15 | :members: 16 | 17 | BeamSearch 18 | ------------ 19 | .. autoclass:: textattack.search_methods.BeamSearch 20 | :members: 21 | 22 | GreedySearch 23 | ------------ 24 | .. autoclass:: textattack.search_methods.GreedySearch 25 | :members: 26 | 27 | GreedyWordSwapWIR 28 | ------------------ 29 | .. autoclass:: textattack.search_methods.GreedyWordSwapWIR 30 | :members: 31 | 32 | AlzantotGeneticAlgorithm 33 | ------------------------- 34 | .. autoclass:: textattack.search_methods.AlzantotGeneticAlgorithm 35 | :members: 36 | 37 | ImprovedGeneticAlgorithm 38 | ------------------------- 39 | .. autoclass:: textattack.search_methods.ImprovedGeneticAlgorithm 40 | :members: 41 | 42 | ParticleSwarmOptimization 43 | -------------------------- 44 | .. autoclass:: textattack.search_methods.ParticleSwarmOptimization 45 | :members: 46 | 47 | -------------------------------------------------------------------------------- /docs/api/transformations.rst: -------------------------------------------------------------------------------- 1 | Transformations API Reference 2 | =============================== 3 | 4 | Transformation 5 | ---------------- 6 | .. autoclass:: textattack.transformations.Transformation 7 | :members: 8 | 9 | Composite Transformation 10 | ----------------------------- 11 | .. autoclass:: textattack.transformations.CompositeTransformation 12 | :members: 13 | 14 | 15 | 16 | .. toctree:: 17 | :maxdepth: 6 18 | 19 | textattack.transformations.sentence_transformations 20 | textattack.transformations.word_insertions 21 | textattack.transformations.word_merges 22 | textattack.transformations.word_swaps 23 | 24 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.attack_results.rst: -------------------------------------------------------------------------------- 1 | textattack.attack\_results package 2 | ================================== 3 | 4 | .. automodule:: textattack.attack_results 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.attack_results.attack_result 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.attack_results.failed_attack_result 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.attack_results.maximized_attack_result 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.attack_results.skipped_attack_result 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | 36 | .. automodule:: textattack.attack_results.successful_attack_result 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.augmentation.rst: -------------------------------------------------------------------------------- 1 | textattack.augmentation package 2 | =============================== 3 | 4 | .. automodule:: textattack.augmentation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.augmentation.augmenter 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.augmentation.recipes 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.commands.rst: -------------------------------------------------------------------------------- 1 | textattack.commands package 2 | =========================== 3 | 4 | .. automodule:: textattack.commands 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.commands.attack_command 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.commands.attack_resume_command 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.commands.augment_command 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.commands.benchmark_recipe_command 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | 36 | .. automodule:: textattack.commands.eval_model_command 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | 41 | 42 | .. automodule:: textattack.commands.list_things_command 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | .. automodule:: textattack.commands.peek_dataset_command 49 | :members: 50 | :undoc-members: 51 | :show-inheritance: 52 | 53 | 54 | .. automodule:: textattack.commands.textattack_cli 55 | :members: 56 | :undoc-members: 57 | :show-inheritance: 58 | 59 | 60 | .. automodule:: textattack.commands.textattack_command 61 | :members: 62 | :undoc-members: 63 | :show-inheritance: 64 | 65 | 66 | .. automodule:: textattack.commands.train_model_command 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.grammaticality.language_models.google_language_model.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.grammaticality.language\_models.google\_language\_model package 2 | ====================================================================================== 3 | 4 | .. automodule:: textattack.constraints.grammaticality.language_models.google_language_model 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.alzantot_goog_lm 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.google_language_model 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.lm_data_utils 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.lm_utils 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.grammaticality.language_models.learning_to_write.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.grammaticality.language\_models.learning\_to\_write package 2 | ================================================================================== 3 | 4 | .. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.adaptive_softmax 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.language_model_helpers 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.learning_to_write 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.rnn_model 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.grammaticality.language_models.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.grammaticality.language\_models package 2 | ============================================================== 3 | 4 | .. automodule:: textattack.constraints.grammaticality.language_models 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.constraints.grammaticality.language_models.google_language_model 15 | textattack.constraints.grammaticality.language_models.learning_to_write 16 | 17 | 18 | 19 | 20 | .. automodule:: textattack.constraints.grammaticality.language_models.gpt2 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | 26 | .. automodule:: textattack.constraints.grammaticality.language_models.language_model_constraint 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.grammaticality.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.grammaticality package 2 | ============================================= 3 | 4 | .. automodule:: textattack.constraints.grammaticality 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.constraints.grammaticality.language_models 15 | 16 | 17 | 18 | 19 | .. automodule:: textattack.constraints.grammaticality.cola 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | 25 | .. automodule:: textattack.constraints.grammaticality.language_tool 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | 31 | .. automodule:: textattack.constraints.grammaticality.part_of_speech 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.overlap.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.overlap package 2 | ====================================== 3 | 4 | .. automodule:: textattack.constraints.overlap 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.constraints.overlap.bleu_score 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.constraints.overlap.chrf_score 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.constraints.overlap.levenshtein_edit_distance 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.constraints.overlap.max_words_perturbed 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | 36 | .. automodule:: textattack.constraints.overlap.meteor_score 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.pre_transformation.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.pre\_transformation package 2 | ================================================== 3 | 4 | .. automodule:: textattack.constraints.pre_transformation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.constraints.pre_transformation.input_column_modification 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.constraints.pre_transformation.max_modification_rate 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.constraints.pre_transformation.max_word_index_modification 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.constraints.pre_transformation.min_word_length 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | 36 | .. automodule:: textattack.constraints.pre_transformation.repeat_modification 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | 41 | 42 | .. automodule:: textattack.constraints.pre_transformation.stopword_modification 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | .. automodule:: textattack.constraints.pre_transformation.unmodifiable_indices 49 | :members: 50 | :undoc-members: 51 | :show-inheritance: 52 | 53 | 54 | .. automodule:: textattack.constraints.pre_transformation.unmodifiable_phrases 55 | :members: 56 | :undoc-members: 57 | :show-inheritance: 58 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints package 2 | ============================== 3 | 4 | .. automodule:: textattack.constraints 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.constraints.grammaticality 15 | textattack.constraints.overlap 16 | textattack.constraints.pre_transformation 17 | textattack.constraints.semantics 18 | 19 | 20 | 21 | 22 | .. automodule:: textattack.constraints.constraint 23 | :members: 24 | :undoc-members: 25 | :show-inheritance: 26 | 27 | 28 | .. automodule:: textattack.constraints.pre_transformation_constraint 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.semantics.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.semantics package 2 | ======================================== 3 | 4 | .. automodule:: textattack.constraints.semantics 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.constraints.semantics.sentence_encoders 15 | 16 | 17 | 18 | 19 | .. automodule:: textattack.constraints.semantics.bert_score 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | 25 | .. automodule:: textattack.constraints.semantics.word_embedding_distance 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.semantics.sentence_encoders.infer_sent.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.semantics.sentence\_encoders.infer\_sent package 2 | ======================================================================= 3 | 4 | .. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent.infer_sent 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent.infer_sent_model 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.semantics.sentence_encoders.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.semantics.sentence\_encoders package 2 | =========================================================== 3 | 4 | .. automodule:: textattack.constraints.semantics.sentence_encoders 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.constraints.semantics.sentence_encoders.sentence_bert 15 | textattack.constraints.semantics.sentence_encoders.infer_sent 16 | textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder 17 | 18 | 19 | 20 | 21 | .. automodule:: textattack.constraints.semantics.sentence_encoders.sentence_encoder 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | 27 | .. automodule:: textattack.constraints.semantics.sentence_encoders.thought_vector 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.semantics.sentence_encoders.sentence_bert.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.semantics.sentence\_encoders.bert package 2 | ================================================================ 3 | 4 | .. automodule:: textattack.constraints.semantics.sentence_encoders.sentence_bert 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.constraints.semantics.sentence_encoders.sentence_bert.sbert 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder.rst: -------------------------------------------------------------------------------- 1 | textattack.constraints.semantics.sentence\_encoders.universal\_sentence\_encoder package 2 | ======================================================================================== 3 | 4 | .. automodule:: textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder.multilingual_universal_sentence_encoder 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder.universal_sentence_encoder 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.datasets.helpers.rst: -------------------------------------------------------------------------------- 1 | textattack.datasets.helpers package 2 | =================================== 3 | 4 | .. automodule:: textattack.datasets.helpers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.datasets.helpers.ted_multi 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.datasets.rst: -------------------------------------------------------------------------------- 1 | textattack.datasets package 2 | =========================== 3 | 4 | .. automodule:: textattack.datasets 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.datasets.helpers 15 | 16 | 17 | 18 | 19 | .. automodule:: textattack.datasets.dataset 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | 25 | .. automodule:: textattack.datasets.huggingface_dataset 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.goal_function_results.rst: -------------------------------------------------------------------------------- 1 | textattack.goal\_function\_results package 2 | ========================================== 3 | 4 | .. automodule:: textattack.goal_function_results 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.goal_function_results.classification_goal_function_result 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.goal_function_results.goal_function_result 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.goal_function_results.text_to_text_goal_function_result 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.goal_functions.classification.rst: -------------------------------------------------------------------------------- 1 | textattack.goal\_functions.classification package 2 | ================================================= 3 | 4 | .. automodule:: textattack.goal_functions.classification 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.goal_functions.classification.classification_goal_function 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.goal_functions.classification.input_reduction 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.goal_functions.classification.targeted_classification 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.goal_functions.classification.untargeted_classification 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.goal_functions.rst: -------------------------------------------------------------------------------- 1 | textattack.goal\_functions package 2 | ================================== 3 | 4 | .. automodule:: textattack.goal_functions 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.goal_functions.classification 15 | textattack.goal_functions.text 16 | 17 | 18 | 19 | 20 | .. automodule:: textattack.goal_functions.goal_function 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.goal_functions.text.rst: -------------------------------------------------------------------------------- 1 | textattack.goal\_functions.text package 2 | ======================================= 3 | 4 | .. automodule:: textattack.goal_functions.text 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.goal_functions.text.minimize_bleu 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.goal_functions.text.non_overlapping_output 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.goal_functions.text.text_to_text_goal_function 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.llms.rst: -------------------------------------------------------------------------------- 1 | textattack.llms package 2 | ========================= 3 | 4 | .. automodule:: textattack.llms 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | .. automodule:: textattack.llms.huggingface_llm_wrapper 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | .. automodule:: textattack.llms.chat_gpt_wrapper 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.loggers.rst: -------------------------------------------------------------------------------- 1 | textattack.loggers package 2 | ========================== 3 | 4 | .. automodule:: textattack.loggers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.loggers.attack_log_manager 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.loggers.csv_logger 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.loggers.file_logger 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.loggers.logger 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | 36 | .. automodule:: textattack.loggers.visdom_logger 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | 41 | 42 | .. automodule:: textattack.loggers.weights_and_biases_logger 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.metrics.attack_metrics.rst: -------------------------------------------------------------------------------- 1 | textattack.metrics.attack\_metrics package 2 | ========================================== 3 | 4 | .. automodule:: textattack.metrics.attack_metrics 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.metrics.attack_metrics.attack_queries 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.metrics.attack_metrics.attack_success_rate 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.metrics.attack_metrics.words_perturbed 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.metrics.quality_metrics.rst: -------------------------------------------------------------------------------- 1 | textattack.metrics.quality\_metrics package 2 | =========================================== 3 | 4 | .. automodule:: textattack.metrics.quality_metrics 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.metrics.quality_metrics.perplexity 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.metrics.quality_metrics.use 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.metrics.rst: -------------------------------------------------------------------------------- 1 | textattack.metrics package 2 | ========================== 3 | 4 | .. automodule:: textattack.metrics 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.metrics.attack_metrics 15 | textattack.metrics.quality_metrics 16 | 17 | 18 | 19 | 20 | .. automodule:: textattack.metrics.metric 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.models.helpers.rst: -------------------------------------------------------------------------------- 1 | textattack.models.helpers package 2 | ================================= 3 | 4 | .. automodule:: textattack.models.helpers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.models.helpers.glove_embedding_layer 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.models.helpers.lstm_for_classification 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.models.helpers.t5_for_text_to_text 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.models.helpers.utils 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | 36 | .. automodule:: textattack.models.helpers.word_cnn_for_classification 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.models.rst: -------------------------------------------------------------------------------- 1 | textattack.models package 2 | ========================= 3 | 4 | .. automodule:: textattack.models 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.models.helpers 15 | textattack.models.tokenizers 16 | textattack.models.wrappers 17 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.models.tokenizers.rst: -------------------------------------------------------------------------------- 1 | textattack.models.tokenizers package 2 | ==================================== 3 | 4 | .. automodule:: textattack.models.tokenizers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.models.tokenizers.glove_tokenizer 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.models.tokenizers.t5_tokenizer 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.models.wrappers.rst: -------------------------------------------------------------------------------- 1 | textattack.models.wrappers package 2 | ================================== 3 | 4 | .. automodule:: textattack.models.wrappers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.models.wrappers.huggingface_model_wrapper 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.models.wrappers.model_wrapper 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.models.wrappers.pytorch_model_wrapper 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.models.wrappers.sklearn_model_wrapper 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | 36 | .. automodule:: textattack.models.wrappers.tensorflow_model_wrapper 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.prompt_augmentation.rst: -------------------------------------------------------------------------------- 1 | textattack.prompt_augmentation package 2 | ======================================= 3 | 4 | .. automodule:: textattack.prompt_augmentation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | .. automodule:: textattack.prompt_augmentation.prompt_augmentation_pipeline 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: -------------------------------------------------------------------------------- /docs/apidoc/textattack.rst: -------------------------------------------------------------------------------- 1 | textattack package 2 | ================== 3 | 4 | .. automodule:: textattack 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.attack_recipes 15 | textattack.attack_results 16 | textattack.augmentation 17 | textattack.commands 18 | textattack.constraints 19 | textattack.datasets 20 | textattack.goal_function_results 21 | textattack.goal_functions 22 | textattack.llms 23 | textattack.loggers 24 | textattack.metrics 25 | textattack.models 26 | textattack.prompt_augmentation 27 | textattack.search_methods 28 | textattack.shared 29 | textattack.transformations 30 | 31 | 32 | 33 | 34 | .. automodule:: textattack.attack 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | .. automodule:: textattack.attack_args 41 | :members: 42 | :undoc-members: 43 | :show-inheritance: 44 | 45 | 46 | .. automodule:: textattack.attacker 47 | :members: 48 | :undoc-members: 49 | :show-inheritance: 50 | 51 | 52 | .. automodule:: textattack.augment_args 53 | :members: 54 | :undoc-members: 55 | :show-inheritance: 56 | 57 | 58 | .. automodule:: textattack.dataset_args 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | 64 | .. automodule:: textattack.model_args 65 | :members: 66 | :undoc-members: 67 | :show-inheritance: 68 | 69 | 70 | .. automodule:: textattack.trainer 71 | :members: 72 | :undoc-members: 73 | :show-inheritance: 74 | 75 | 76 | .. automodule:: textattack.training_args 77 | :members: 78 | :undoc-members: 79 | :show-inheritance: 80 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.search_methods.rst: -------------------------------------------------------------------------------- 1 | textattack.search\_methods package 2 | ================================== 3 | 4 | .. automodule:: textattack.search_methods 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.search_methods.alzantot_genetic_algorithm 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.search_methods.beam_search 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.search_methods.genetic_algorithm 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.search_methods.greedy_search 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | 36 | .. automodule:: textattack.search_methods.greedy_word_swap_wir 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | 41 | 42 | .. automodule:: textattack.search_methods.improved_genetic_algorithm 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | .. automodule:: textattack.search_methods.particle_swarm_optimization 49 | :members: 50 | :undoc-members: 51 | :show-inheritance: 52 | 53 | 54 | .. automodule:: textattack.search_methods.population_based_search 55 | :members: 56 | :undoc-members: 57 | :show-inheritance: 58 | 59 | 60 | .. automodule:: textattack.search_methods.search_method 61 | :members: 62 | :undoc-members: 63 | :show-inheritance: 64 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.shared.rst: -------------------------------------------------------------------------------- 1 | textattack.shared package 2 | ========================= 3 | 4 | .. automodule:: textattack.shared 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.shared.utils 15 | 16 | 17 | 18 | 19 | .. automodule:: textattack.shared.attacked_text 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | 25 | .. automodule:: textattack.shared.checkpoint 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | 31 | .. automodule:: textattack.shared.data 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | 37 | .. automodule:: textattack.shared.validators 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | 43 | .. automodule:: textattack.shared.word_embeddings 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.shared.utils.rst: -------------------------------------------------------------------------------- 1 | textattack.shared.utils package 2 | =============================== 3 | 4 | .. automodule:: textattack.shared.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.shared.utils.importing 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.shared.utils.install 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.shared.utils.misc 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | 30 | .. automodule:: textattack.shared.utils.strings 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | 36 | .. automodule:: textattack.shared.utils.tensor 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.transformations.rst: -------------------------------------------------------------------------------- 1 | textattack.transformations package 2 | ================================== 3 | 4 | .. automodule:: textattack.transformations 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 6 13 | 14 | textattack.transformations.sentence_transformations 15 | textattack.transformations.word_insertions 16 | textattack.transformations.word_merges 17 | textattack.transformations.word_swaps 18 | 19 | 20 | 21 | 22 | .. automodule:: textattack.transformations.composite_transformation 23 | :members: 24 | :undoc-members: 25 | :show-inheritance: 26 | 27 | 28 | .. automodule:: textattack.transformations.transformation 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | 34 | .. automodule:: textattack.transformations.word_deletion 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | .. automodule:: textattack.transformations.word_innerswap_random 41 | :members: 42 | :undoc-members: 43 | :show-inheritance: 44 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.transformations.sentence_transformations.rst: -------------------------------------------------------------------------------- 1 | textattack.transformations.sentence\_transformations package 2 | ============================================================ 3 | 4 | .. automodule:: textattack.transformations.sentence_transformations 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.transformations.sentence_transformations.back_translation 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.transformations.sentence_transformations.back_transcription 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.transformations.sentence_transformations.sentence_transformation 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.transformations.word_insertions.rst: -------------------------------------------------------------------------------- 1 | textattack.transformations.word\_insertions package 2 | =================================================== 3 | 4 | .. automodule:: textattack.transformations.word_insertions 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.transformations.word_insertions.word_insertion 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.transformations.word_insertions.word_insertion_masked_lm 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | .. automodule:: textattack.transformations.word_insertions.word_insertion_random_synonym 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | -------------------------------------------------------------------------------- /docs/apidoc/textattack.transformations.word_merges.rst: -------------------------------------------------------------------------------- 1 | textattack.transformations.word\_merges package 2 | =============================================== 3 | 4 | .. automodule:: textattack.transformations.word_merges 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | 11 | 12 | .. automodule:: textattack.transformations.word_merges.word_merge 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | .. automodule:: textattack.transformations.word_merges.word_merge_masked_lm 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | name: textattackenv 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.8 6 | - cudatoolkit 7 | -------------------------------------------------------------------------------- /docs/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/docs/favicon.png -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | recommonmark 2 | Sphinx 3 | sphinx-autobuild 4 | sphinx-copybutton 5 | sphinx-markdown-tables 6 | sphinx-rtd-theme 7 | sphinxcontrib-applehelp 8 | sphinxcontrib-devhelp 9 | sphinxcontrib-htmlhelp 10 | sphinxcontrib-jsmath 11 | sphinxcontrib-qthelp 12 | sphinxcontrib-serializinghtml 13 | nbclient 14 | nbconvert 15 | nbsphinx 16 | widgetsnbextension 17 | ipykernel 18 | ipython 19 | ipython-genutils 20 | ipywidgets 21 | scipy 22 | tensorboard 23 | tensorboard-data-server 24 | tensorboard-plugin-wit 25 | tensorboardX 26 | tensorflow 27 | tensorflow-estimator 28 | tensorflow-hub 29 | tensorflow-text 30 | sentence-transformers 31 | transformers 32 | textattack 33 | sqlitedict 34 | stanza 35 | Cython 36 | -------------------------------------------------------------------------------- /examples/attack/attack_from_components.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Shows how to build an attack from components and use it on a pre-trained 3 | # model on the Yelp dataset. 4 | textattack attack --attack-n --goal-function untargeted-classification \ 5 | --model bert-base-uncased-yelp --num-examples 8 --transformation word-swap-wordnet \ 6 | --constraints edit-distance^12 max-words-perturbed^max_percent=0.75 repeat stopword \ 7 | --search greedy -------------------------------------------------------------------------------- /examples/attack/attack_huggingface_deepwordbug.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Shows how to attack a DistilBERT model fine-tuned on SST2 dataset *from the 3 | # huggingface model repository& using the DeepWordBug recipe and 10 examples. 4 | textattack attack --model-from-huggingface distilbert-base-uncased-finetuned-sst-2-english --dataset-from-huggingface glue^sst2 --recipe deepwordbug --num-examples 10 5 | -------------------------------------------------------------------------------- /examples/attack/attack_roberta_sst2_textfooler.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Shows how to attack our RoBERTA model fine-tuned on SST2 using the TextFooler 3 | # recipe and 10 examples. 4 | textattack attack --model roberta-base-sst2 --recipe textfooler --num-examples 10 -------------------------------------------------------------------------------- /examples/augmentation/.gitignore: -------------------------------------------------------------------------------- 1 | augment.csv # Don't commit the output file of this command 2 | -------------------------------------------------------------------------------- /examples/augmentation/augment.csv: -------------------------------------------------------------------------------- 1 | text,label 2 | "the rock is destined to be the new conan and that he's going to make a splash even greater than arnold , jean- claud van damme or steven segal.",1 3 | "the rock is destined to be the 21st century's new conan and that he's going to caravan make a splash even greater than arnold schwarzenegger , jean- claud van damme or steven segal.",1 4 | the gorgeously rarify continuation of 'the lord of the rings' trilogy is so huge that a column of give-and-take cannot adequately describe co-writer/director shaft jackson's expanded vision of j . r . r . tolkien's middle-earth .,1 5 | the gorgeously elaborate of 'the of the rings' trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson's expanded of j . r . r . tolkien's middle-earth .,1 6 | take care different my cat offers a refreshingly of slice of asian cinema .,1 7 | take care of my cat offers a different slice of asian cinema .,1 8 | a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish IT line proves simply too discouraging to let slide .,0 9 | a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish demarcation proves plainly too discouraging to let slide .,0 10 | it's pic a mystery how the movie could be released in this condition .,0 11 | it's a mystery how the movie could in released be this condition .,0 12 | -------------------------------------------------------------------------------- /examples/augmentation/augment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | textattack augment --input-csv examples.csv --output-csv output.csv --input-column text --recipe eda --pct-words-to-swap .1 --transformations-per-example 2 --exclude-original --overwrite 3 | -------------------------------------------------------------------------------- /examples/augmentation/examples.csv: -------------------------------------------------------------------------------- 1 | "text",label 2 | "the rock is destined to be the 21st century's new conan and that he's going to make a splash even greater than arnold schwarzenegger , jean- claud van damme or steven segal.", 1 3 | "the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .", 1 4 | "take care of my cat offers a refreshingly different slice of asian cinema .", 1 5 | "a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves simply too discouraging to let slide .", 0 6 | "it's a mystery how the movie could be released in this condition .", 0 7 | -------------------------------------------------------------------------------- /examples/dataset/chinese_data_demo.tsv: -------------------------------------------------------------------------------- 1 | text label 2 | 一分都不想给,连个快递都不会送,第二次送到家,要是别人不告诉我几别人百块钱就白花了 0 3 | 优点忒多了,不用多介绍了. 1 4 | 京东东西非常好,物流也非常给力,送货小哥服务很热情,希望京东越来越好,赞一个?! 1 5 | 一半以上都有点小问题,有几个不能吃。 0 6 | 性价比高,毕竟华为也是国内名牌。 1 7 | 物流超级快。快递大哥态度很好的哟,打开快递真的是没有失望,和我想象中的一样,男票穿的很显瘦!牛仔裤控!满意极了,裤子男票穿走了,没办法上图,总之很好评 1 8 | 收到的苹果与图片不符,很小,并且一盒中有5个坏的。 0 9 | 发热量也太大了吧,刚开机没多久,仅上网,机器就很热了,gpu就没有下过50度,cp一直44度以上,不知道是正常的还是我的这台有问题,希望有人指教一下~ 0 10 | 买了两条,这条裤子码数偏大了! 0 11 | 手感冷冰冰的,除了小点好像没问题,蛮好的 1 12 | 面对鸦绿江大桥,附望远镜,还有飞镖,设施没话讲!下楼就是风景区很不错! 1 13 | 紫霞喜欢至尊宝,至尊宝喜欢白晶晶,白晶晶喜欢齐天大圣,齐天大圣喜欢紫霞,你以为齐天大圣和至尊宝是一个人,但是他们相差了500年。一切都对,除了时间不对。想要救紫霞就必须打败牛魔王,想要打败牛魔王就必须要变成孙悟空,想要变成孙悟空就必须忘掉七情六欲从此不能再有半点沾染。 人生就是这样:想自由地和心爱之人在一起必须要事业有成,想要事业有成就必须要抛弃天真戴上面具,当你变得有能力给心爱之人一切时,却发现找不回最初的自己亦失去了爱的能力。 1 14 | 收到货的那瞬间我崩溃了,不知道怎么形容了 0 15 | -------------------------------------------------------------------------------- /examples/train/train_albert_snli_entailment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Trains `bert-base-cased` on the STS-B task for 3 epochs. This is a 3 | # demonstration of how our training script can handle different `transformers` 4 | # models and customize for different datasets. 5 | textattack train --model-name-or-path albert-base-v2 --dataset snli --per-device-train-batch-size 8 --epochs 5 --learning-rate 1e-5 -------------------------------------------------------------------------------- /examples/train/train_bert_stsb_similarity.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Trains `bert-base-cased` on the STS-B task for 3 epochs. This is a demonstration 3 | # of how our training script handles regression. 4 | textattack train --model-name-or-path bert-base-cased --dataset glue^stsb --epochs 3 --learning-rate 1e-5 -------------------------------------------------------------------------------- /examples/train/train_lstm_imdb_sentiment_classification.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Trains `bert-base-cased` on the STS-B task for 3 epochs. This is a basic 3 | # demonstration of our training script and `datasets` integration. 4 | textattack train --model-name-or-path lstm --dataset imdb --epochs 50 --learning-rate 1e-5 -------------------------------------------------------------------------------- /examples/train/train_lstm_rotten_tomatoes_sentiment_classification.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Trains `bert-base-cased` on the STS-B task for 3 epochs. This is a basic 3 | # demonstration of our training script and `datasets` integration. 4 | textattack train --model-name-or-path lstm --dataset rotten_tomatoes --epochs 50 --learning-rate 1e-5 -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # content of pytest.ini 2 | # (or tox.ini or setup.cfg) 3 | [pytest] 4 | addopts = -ra 5 | testpaths = tests 6 | markers = 7 | slow: a test that takes >60s to run. we don't run these on travis. 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bert-score>=0.3.5 2 | editdistance 3 | flair 4 | filelock 5 | language_tool_python 6 | lemminflect 7 | lru-dict 8 | datasets>=2.4.0 9 | nltk 10 | numpy>=1.21.0 11 | pandas>=1.0.1 12 | scipy>=1.4.1 13 | torch>=1.7.0,!=1.8 14 | transformers>=4.30.0 15 | terminaltables 16 | tqdm 17 | word2number 18 | num2words 19 | more-itertools 20 | pinyin>=0.4.0 21 | jieba 22 | OpenHowNet 23 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 88 3 | skip = __init__.py 4 | known_first_party = textattack 5 | sections = FUTURE, STDLIB, THIRDPARTY, FIRSTPARTY, LOCALFOLDER 6 | force_sort_within_sections = True 7 | multi_line_output = 3 8 | include_trailing_comma = True 9 | use_parentheses = True 10 | force_grid_wrap = 0 11 | 12 | [flake8] 13 | exclude = .git,__pycache__,wandb,build,dist 14 | ignore = E203, E266, E501, W503, D203 15 | max-complexity = 10 16 | max-line-length = 120 17 | mypy_config = mypy.ini 18 | per-file-ignores = __init__.py:F401 19 | -------------------------------------------------------------------------------- /tests/sample_inputs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QData/TextAttack/5fbb0762dffb841af8a360dcf42e962489d3a83b/tests/sample_inputs/__init__.py -------------------------------------------------------------------------------- /tests/sample_inputs/attack_from_file.py: -------------------------------------------------------------------------------- 1 | import textattack 2 | 3 | 4 | def Attack(model): 5 | goal_function = textattack.goal_functions.UntargetedClassification(model) 6 | search_method = textattack.search_methods.GreedyWordSwapWIR() 7 | transformation = textattack.transformations.WordSwapRandomCharacterSubstitution() 8 | constraints = [] 9 | return textattack.Attack(goal_function, constraints, transformation, search_method) 10 | -------------------------------------------------------------------------------- /tests/sample_inputs/augment.csv.txt: -------------------------------------------------------------------------------- 1 | text,label,another_column 2 | "For the last 8 years of his life, Galileo was under house arrest for espousing this man's theory", 5, "some text that needs to be preserved" 3 | "Signer of the Dec. of Indep., framer of the Constitution of Mass., second President of the United States", -13, "the answer to this question is John Adams" 4 | "In the title of an Aesop fable, this insect shared billing with a grasshopper", 1111, "these are from a jeopardy question, and this one is the ant" 5 | -------------------------------------------------------------------------------- /tests/sample_inputs/sst_model_and_dataset.py: -------------------------------------------------------------------------------- 1 | import transformers 2 | 3 | import textattack 4 | 5 | model_path = "distilbert-base-uncased-finetuned-sst-2-english" 6 | 7 | tokenizer = transformers.AutoTokenizer.from_pretrained(model_path) 8 | model = transformers.AutoModelForSequenceClassification.from_pretrained(model_path) 9 | 10 | model = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer) 11 | 12 | dataset = textattack.datasets.HuggingFaceDataset( 13 | "glue", subset="sst2", split="train", shuffle=False 14 | ) 15 | -------------------------------------------------------------------------------- /tests/sample_outputs/augment_test.csv.txt: -------------------------------------------------------------------------------- 1 | text,label,another_column 2 | "For the last 8 years of his life, Galileo was under house arrest for espousing this man's theory",5,some text that needs to be preserved 3 | "For the last 8 years of his lives, Galileo was under habitation arrest for espousing this man's theory",5,some text that needs to be preserved 4 | "For the last 8 yr of his life, Galileo was under house apprehending for espousing this man's theory",5,some text that needs to be preserved 5 | "Signer of the Dec. of Indep., framer of the Constitution of Mass., second President of the United States",-13,the answer to this question is John Adams 6 | "Signer of the Dec. of Indep., framer of the Constitution of Masse., seconds President of the United States",-13,the answer to this question is John Adams 7 | "Signer of the Dec. of Indep., framer of the Constitutions of Mass., second Chairwoman of the United States",-13,the answer to this question is John Adams 8 | "In the title of an Aesop fable, this insect shared billing with a grasshopper",1111,"these are from a jeopardy question, and this one is the ant" 9 | "Among the title of an Aesop fable, this beetle shared billing with a grasshopper",1111,"these are from a jeopardy question, and this one is the ant" 10 | "In the title of an Aesop fable, this insect exchanging invoice with a grasshopper",1111,"these are from a jeopardy question, and this one is the ant" 11 | -------------------------------------------------------------------------------- /tests/sample_outputs/csv_attack_log.csv: -------------------------------------------------------------------------------- 1 | "ground_truth_output","num_queries","original_output","original_score","original_text","perturbed_output","perturbed_score","perturbed_text","result_type" 2 | 1.0,28.0,1.0,0.09334743022918701,"lovingly photographed in the manner of a golden book sprung to [[life]] , stuart little 2 [[manages]] [[sweetness]] largely without stickiness .",0.0,0.6904040575027466,"lovingly photographed in the manner of a golden book sprung to [[ife]] , stuart little 2 [[manager]] [[seetness]] largely without stickiness .","Successful" 3 | 1.0,16.0,1.0,0.009427368640899658,"[[consistently]] [[clever]] and [[suspenseful]] .",0.0,0.8219608664512634,"[[conisstently]] [[celver]] and [[Huspenseful]] .","Successful" 4 | -------------------------------------------------------------------------------- /tests/sample_outputs/eval_model_hub_rt.txt: -------------------------------------------------------------------------------- 1 | textattack: Loading pre-trained model from HuggingFace model repository: textattack/distilbert-base-uncased-rotten-tomatoes 2 | Using custom data configuration default 3 | textattack: Loading nlp dataset rotten_tomatoes, split train. 4 | textattack: Got 4 predictions. 5 | textattack: Successes 3/4 (75.00%) 6 | -------------------------------------------------------------------------------- /tests/sample_outputs/eval_snli.txt: -------------------------------------------------------------------------------- 1 | textattack: Loading pre-trained model from HuggingFace model repository: textattack/bert-base-uncased-snli 2 | Downloading: 0%| | 0.00/3.85k [00:00 0 (74%) 45 | 46 | All that glitters is not gold 47 | 48 | All that sparkly is not gold 49 | 50 | Enter a sentence to attack or "q" to quit: 51 | -------------------------------------------------------------------------------- /tests/sample_outputs/json_attack_summary.json: -------------------------------------------------------------------------------- 1 | { 2 | "Attack Results": { 3 | "Number of successful attacks:": 2, 4 | "Number of failed attacks:": 0, 5 | "Number of skipped attacks:": 0, 6 | "Original accuracy:": 100.0, 7 | "Accuracy under attack:": 0.0, 8 | "Attack success rate:": 100.0, 9 | "Average perturbed word %:": 45.0, 10 | "Average num. words per input:": 12.0, 11 | "Avg num queries:": 22.0 12 | } 13 | } -------------------------------------------------------------------------------- /tests/sample_outputs/kuleshov_cnn_sst_2.txt: -------------------------------------------------------------------------------- 1 | /.*/Attack( 2 | (search_method): GreedySearch 3 | (goal_function): UntargetedClassification 4 | (transformation): WordSwapEmbedding( 5 | (max_candidates): 15 6 | (embedding): WordEmbedding 7 | ) 8 | (constraints): 9 | (0): MaxWordsPerturbed( 10 | (max_percent): 0.5 11 | (compare_against_original): True 12 | ) 13 | (1): ThoughtVector( 14 | (word_embedding): WordEmbedding 15 | (metric): max_euclidean 16 | (threshold): -0.2 17 | (window_size): inf 18 | (skip_text_shorter_than_window): False 19 | (compare_against_original): True 20 | ) 21 | (2): GPT2( 22 | (max_log_prob_diff): 2.0 23 | (compare_against_original): True 24 | ) 25 | (3): RepeatModification 26 | (4): StopwordModification 27 | (is_black_box): True 28 | ) 29 | 30 | --------------------------------------------- Result 1 --------------------------------------------- 31 | [[Positive (100%)]] --> [[Negative (69%)]] 32 | 33 | it 's a [[charming]] and [[often]] affecting journey . 34 | 35 | it 's a [[loveable]] and [[ordinarily]] affecting journey . 36 | 37 | 38 | --------------------------------------------- Result 2 --------------------------------------------- 39 | [[Negative (83%)]] --> [[Positive (90%)]] 40 | 41 | unflinchingly bleak and [[desperate]] 42 | 43 | unflinchingly bleak and [[desperation]] 44 | 45 | 46 | 47 | +-------------------------------+--------+ 48 | | Attack Results | | 49 | +-------------------------------+--------+ 50 | | Number of successful attacks: | 2 | 51 | | Number of failed attacks: | 0 | 52 | | Number of skipped attacks: | 0 | 53 | | Original accuracy: | 100.0% | 54 | | Accuracy under attack: | 0.0% | 55 | | Attack success rate: | 100.0% | 56 | | Average perturbed word %: | 25.0% | 57 | | Average num. words per input: | 6.0 | 58 | | Avg num queries: | 48.5 | 59 | +-------------------------------+--------+ 60 | -------------------------------------------------------------------------------- /tests/sample_outputs/list_augmentation_recipes.txt: -------------------------------------------------------------------------------- 1 | back_trans (textattack.augmentation.BackTranslationAugmenter) 2 | charswap (textattack.augmentation.CharSwapAugmenter) 3 | checklist (textattack.augmentation.CheckListAugmenter) 4 | clare (textattack.augmentation.CLAREAugmenter) 5 | eda (textattack.augmentation.EasyDataAugmenter) 6 | embedding (textattack.augmentation.EmbeddingAugmenter) 7 | wordnet (textattack.augmentation.WordNetAugmenter) 8 | -------------------------------------------------------------------------------- /tests/sample_outputs/run_attack_gradient_greedy_word_wir.txt: -------------------------------------------------------------------------------- 1 | /.*/Attack( 2 | (search_method): GreedyWordSwapWIR( 3 | (wir_method): gradient 4 | ) 5 | (goal_function): UntargetedClassification 6 | (transformation): WordSwapEmbedding( 7 | (max_candidates): 15 8 | (embedding): WordEmbedding 9 | ) 10 | (constraints): 11 | (0): RepeatModification 12 | (1): StopwordModification 13 | (is_black_box): False 14 | ) 15 | 16 | --------------------------------------------- Result 1 --------------------------------------------- 17 | [[Positive (99%)]] --> [[Negative (98%)]] 18 | 19 | goldmember is funny enough to [[justify]] the embarrassment of bringing a barf bag to the moviehouse . 20 | 21 | goldmember is funny enough to [[reasoned]] the embarrassment of bringing a barf bag to the moviehouse . 22 | 23 | 24 | --------------------------------------------- Result 2 --------------------------------------------- 25 | [[Positive (100%)]] --> [[Negative (100%)]] 26 | 27 | . . . a fairly disposable yet still [[entertaining]] b picture . 28 | 29 | . . . a fairly disposable yet still [[distracting]] b picture . 30 | 31 | 32 | --------------------------------------------- Result 3 --------------------------------------------- 33 | [[Positive (100%)]] --> [[[FAILED]]] 34 | 35 | it may not be particularly innovative , but the film's crisp , unaffected style and air of gentle longing make it unexpectedly rewarding . 36 | 37 | 38 | 39 | +-------------------------------+--------+ 40 | | Attack Results | | 41 | +-------------------------------+--------+ 42 | | Number of successful attacks: | 2 | 43 | | Number of failed attacks: | 1 | 44 | | Number of skipped attacks: | 0 | 45 | | Original accuracy: | 100.0% | 46 | | Accuracy under attack: | 33.33% | 47 | | Attack success rate: | 66.67% | 48 | | Average perturbed word %: | 9.38% | 49 | | Average num. words per input: | 15.0 | 50 | | Avg num queries: | 71.0 | 51 | +-------------------------------+--------+ 52 | -------------------------------------------------------------------------------- /tests/sample_outputs/run_attack_nonoverlapping_t5ende_editdistance_bleu.txt: -------------------------------------------------------------------------------- 1 | /.*/Attack( 2 | (search_method): GreedySearch 3 | (goal_function): NonOverlappingOutput 4 | (transformation): WordSwapRandomCharacterSubstitution 5 | (constraints): 6 | (0): LevenshteinEditDistance( 7 | (max_edit_distance): 12 8 | ) 9 | (1): MaxWordsPerturbed( 10 | (max_percent): 0.75 11 | ) 12 | (2): RepeatModification 13 | (3): StopwordModification 14 | (is_black_box): True 15 | ) 16 | /.*/ 17 | --------------------------------------------- Result 1 --------------------------------------------- 18 | Eine republikanische Strategie, um der Wiederwahl Obamas entgegenzuwirken-->[FAILED] 19 | A Republican strategy to counter the re-election of Obama 20 | 21 | 22 | --------------------------------------------- Result 2 --------------------------------------------- 23 | Die republikanischen Führer rechtfertigten ihre Politik durch die Not-->Repuzlican leaders justifZed their policy by the need to coq 24 | Republican leaders justified their policy by the need to combat electoral fraud. 25 | Repuzlican leaders justifZed their policy by the need to coqbat electoral fraud. 26 | 27 | 28 | 29 | +-------------------------------+--------+ 30 | | Attack Results | | 31 | +-------------------------------+--------+ 32 | | Number of successful attacks: | 1 | 33 | | Number of failed attacks: | 1 | 34 | | Number of skipped attacks: | 0 | 35 | | Original accuracy: | 100.0% | 36 | | Accuracy under attack: | 50.0% | 37 | | Attack success rate: | 50.0% | 38 | | Average perturbed word %: | 25.0% | 39 | | Average num. words per input: | 11.0 | 40 | | Avg num queries: | 19.0 | 41 | +-------------------------------+--------+ 42 | -------------------------------------------------------------------------------- /tests/sample_outputs/txt_attack_log.txt: -------------------------------------------------------------------------------- 1 | --------------------------------------------- Result 1 --------------------------------------------- 2 | [[Positive (91%)]] --> [[Negative (69%)]] 3 | 4 | lovingly photographed in the manner of a golden book sprung to [[life]] , stuart little 2 [[manages]] [[sweetness]] largely without stickiness . 5 | 6 | lovingly photographed in the manner of a golden book sprung to [[ife]] , stuart little 2 [[manager]] [[seetness]] largely without stickiness . 7 | --------------------------------------------- Result 2 --------------------------------------------- 8 | [[Positive (99%)]] --> [[Negative (82%)]] 9 | 10 | [[consistently]] [[clever]] and [[suspenseful]] . 11 | 12 | [[conisstently]] [[celver]] and [[Huspenseful]] . 13 | Number of successful attacks: 2 14 | Number of failed attacks: 0 15 | Number of skipped attacks: 0 16 | Original accuracy: 100.0% 17 | Accuracy under attack: 0.0% 18 | Attack success rate: 100.0% 19 | Average perturbed word %: 45.0% 20 | Average num. words per input: 12.0 21 | Avg num queries: 22.0 22 | -------------------------------------------------------------------------------- /tests/test_command_line/helpers.py: -------------------------------------------------------------------------------- 1 | import shlex 2 | import subprocess 3 | 4 | 5 | def run_command_and_get_result(command): 6 | """Runs a command in the console and gets the result. 7 | 8 | Command can be a string (single command) or a tuple of strings 9 | (multiple commands). In the multi-command setting, commands will be 10 | joined together with a pipe, and the output of the last command will 11 | be returned. 12 | """ 13 | from subprocess import PIPE 14 | 15 | # run command 16 | if isinstance(command, tuple): 17 | # Support pipes via tuple of commands 18 | procs = [] 19 | for i in range(len(command) - 1): 20 | if i == 0: 21 | proc = subprocess.Popen(shlex.split(command[i]), stdout=PIPE) 22 | else: 23 | proc = subprocess.Popen( 24 | shlex.split(command[i]), 25 | stdout=subprocess.PIPE, 26 | stdin=procs[-1].stdout, 27 | ) 28 | procs.append(proc) 29 | # Run last commmand 30 | result = subprocess.run( 31 | shlex.split(command[-1]), stdin=procs[-1].stdout, stdout=PIPE, stderr=PIPE 32 | ) 33 | # Wait for all intermittent processes 34 | for proc in procs: 35 | proc.wait() 36 | else: 37 | result = subprocess.run(shlex.split(command), stdout=PIPE, stderr=PIPE) 38 | return result 39 | -------------------------------------------------------------------------------- /tests/test_command_line/test_augment.py: -------------------------------------------------------------------------------- 1 | from helpers import run_command_and_get_result 2 | import pytest 3 | 4 | augment_test_params = [ 5 | ( 6 | "simple_augment_test", 7 | "textattack augment --input-csv tests/sample_inputs/augment.csv.txt --input-column text --output-csv augment_test.csv --overwrite", 8 | "augment_test.csv", 9 | "tests/sample_outputs/augment_test.csv.txt", 10 | ) 11 | ] 12 | 13 | 14 | @pytest.mark.parametrize( 15 | "name, command, outfile, sample_output_file", augment_test_params 16 | ) 17 | @pytest.mark.slow 18 | def test_command_line_augmentation(name, command, outfile, sample_output_file): 19 | import os 20 | 21 | # desired_text = open(sample_output_file).read().strip() 22 | # Run command and validate outputs. 23 | result = run_command_and_get_result(command) 24 | 25 | assert result.stdout is not None 26 | stdout = result.stdout.decode().strip() 27 | print("stdout =>", stdout) 28 | assert stdout == "" 29 | 30 | assert result.stderr is not None 31 | stderr = result.stderr.decode().strip() 32 | print("stderr =>", stderr) 33 | assert "Wrote 9 augmentations to augment_test.csv" in stderr 34 | 35 | # Ensure CSV file exists, then delete it. 36 | assert os.path.exists(outfile) 37 | os.remove(outfile) 38 | 39 | assert result.returncode == 0 40 | -------------------------------------------------------------------------------- /tests/test_command_line/test_eval.py: -------------------------------------------------------------------------------- 1 | from helpers import run_command_and_get_result 2 | import pytest 3 | 4 | eval_test_params = [ 5 | ( 6 | "eval_model_hub_rt", 7 | "textattack eval --model-from-huggingface textattack/distilbert-base-uncased-rotten-tomatoes --dataset-from-huggingface rotten_tomatoes --num-examples 4", 8 | "tests/sample_outputs/eval_model_hub_rt.txt", 9 | ), 10 | ( 11 | "eval_snli", 12 | "textattack eval --model bert-base-uncased-snli --num-examples 10", 13 | "tests/sample_outputs/eval_snli.txt", 14 | ), 15 | ] 16 | 17 | 18 | @pytest.mark.parametrize("name, command, sample_output_file", eval_test_params) 19 | def test_command_line_eval(name, command, sample_output_file): 20 | """Tests the command-line function, `textattack eval`. 21 | 22 | Different from other tests, this one compares the sample output file 23 | to *stderr* output of the evaluation. 24 | """ 25 | desired_text = open(sample_output_file).read().strip() 26 | desired_text_lines = desired_text.split("\n") 27 | 28 | # Run command and validate outputs. 29 | result = run_command_and_get_result(command) 30 | 31 | assert result.stdout is not None 32 | assert result.stderr is not None 33 | 34 | stdout = result.stdout.decode().strip() 35 | print("stdout =>", stdout) 36 | stderr = result.stderr.decode().strip() 37 | print("stderr =>", stderr) 38 | 39 | print("desired_text =>", desired_text) 40 | stderr_lines = stderr.split("\n") 41 | assert desired_text_lines <= stderr_lines 42 | 43 | assert result.returncode == 0 44 | -------------------------------------------------------------------------------- /tests/test_command_line/test_list.py: -------------------------------------------------------------------------------- 1 | from helpers import run_command_and_get_result 2 | import pytest 3 | 4 | list_test_params = [ 5 | ( 6 | "list_augmentation_recipes", 7 | "textattack list augmentation-recipes", 8 | "tests/sample_outputs/list_augmentation_recipes.txt", 9 | ) 10 | ] 11 | 12 | 13 | @pytest.mark.parametrize("name, command, sample_output_file", list_test_params) 14 | def test_command_line_list(name, command, sample_output_file): 15 | desired_text = open(sample_output_file).read().strip() 16 | 17 | # Run command and validate outputs. 18 | result = run_command_and_get_result(command) 19 | 20 | assert result.stdout is not None 21 | assert result.stderr is not None 22 | 23 | stdout = result.stdout.decode().strip() 24 | print("stdout =>", stdout) 25 | stderr = result.stderr.decode().strip() 26 | print("stderr =>", stderr) 27 | 28 | assert stdout == desired_text 29 | 30 | assert result.returncode == 0 31 | -------------------------------------------------------------------------------- /tests/test_command_line/test_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | from helpers import run_command_and_get_result 5 | 6 | 7 | def test_train_tiny(): 8 | command = "textattack train --model distilbert-base-uncased --attack textfooler --dataset rotten_tomatoes --model-max-length 64 --num-epochs 1 --num-clean-epochs 0 --num-train-adv-examples 2" 9 | 10 | # Run command and validate outputs. 11 | result = run_command_and_get_result(command) 12 | 13 | assert result.stdout is not None 14 | assert result.stderr is not None 15 | assert result.returncode == 0 16 | 17 | stdout = result.stdout.decode().strip() 18 | print("stdout =>", stdout) 19 | stderr = result.stderr.decode().strip() 20 | print("stderr =>", stderr) 21 | 22 | train_args_json_path = re.findall( 23 | r"Wrote original training args to (\S+)\.", stderr 24 | ) 25 | assert len(train_args_json_path) and os.path.exists(train_args_json_path[0]) 26 | 27 | train_acc = re.findall(r"Train accuracy: (\S+)", stderr) 28 | assert train_acc 29 | train_acc = float(train_acc[0][:-1]) # [:-1] removes percent sign 30 | assert train_acc > 60 31 | 32 | eval_acc = re.findall(r"Eval accuracy: (\S+)", stderr) 33 | assert eval_acc 34 | eval_acc = float(eval_acc[0][:-1]) # [:-1] removes percent sign 35 | assert train_acc > 60 36 | -------------------------------------------------------------------------------- /tests/test_command_line/update_test_outputs.py: -------------------------------------------------------------------------------- 1 | """Re-runs command-line tests and places their outputs in the sample files. 2 | 3 | This is useful for large changes, but be wary: the outputs still may 4 | need to be manually edited to account for variance between runs. 5 | """ 6 | 7 | from helpers import run_command_and_get_result 8 | from test_attack import attack_test_params 9 | from test_augment import augment_test_params 10 | from test_list import list_test_params 11 | 12 | 13 | def update_test(command, outfile, add_magic_str=False): 14 | if isinstance(command, str): 15 | print(">", command) 16 | else: 17 | print("\n".join(f"> {c}" for c in command)) 18 | result = run_command_and_get_result(command) 19 | stdout = result.stdout.decode().strip() 20 | if add_magic_str: 21 | # add magic string to beginning 22 | magic_str = "/.*/" 23 | stdout = magic_str + stdout 24 | # add magic string after attack 25 | mid_attack_str = "\n--------------------------------------------- Result 1" 26 | stdout.replace(mid_attack_str, magic_str + mid_attack_str) 27 | # write to file 28 | open(outfile, "w").write(stdout + "\n") 29 | 30 | 31 | def main(): 32 | #### `textattack attack` tests #### 33 | for _, command, outfile in attack_test_params: 34 | update_test(command, outfile, add_magic_str=True) 35 | #### `textattack augment` tests #### 36 | for _, command, outfile, __ in augment_test_params: 37 | update_test(command, outfile) 38 | #### `textattack list` tests 39 | for _, command, outfile in list_test_params: 40 | update_test(command, outfile) 41 | 42 | 43 | if __name__ == "__main__": 44 | main() 45 | -------------------------------------------------------------------------------- /tests/test_prompt_augmentation.py: -------------------------------------------------------------------------------- 1 | def test_prompt_augmentation_pipeline(): 2 | from transformers import AutoModelForSeq2SeqLM, AutoTokenizer 3 | 4 | from textattack.augmentation.recipes import CheckListAugmenter 5 | from textattack.constraints.pre_transformation import UnmodifiableIndices 6 | from textattack.llms import HuggingFaceLLMWrapper 7 | from textattack.prompt_augmentation import PromptAugmentationPipeline 8 | 9 | model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small") 10 | tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small") 11 | model_wrapper = HuggingFaceLLMWrapper(model, tokenizer) 12 | 13 | augmenter = CheckListAugmenter() 14 | 15 | pipeline = PromptAugmentationPipeline(augmenter, model_wrapper) 16 | 17 | prompt = "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify: Poor Ben Bratt couldn't find stardom if MapQuest emailed him point-to-point driving directions." 18 | prompt_constraints = [UnmodifiableIndices([2, 3, 10, 12, 14])] 19 | 20 | output = pipeline(prompt, prompt_constraints) 21 | 22 | assert len(output) == 1 23 | assert len(output[0]) == 2 24 | assert "could not" in output[0][0] 25 | assert "negative" in output[0][1] 26 | -------------------------------------------------------------------------------- /tests/test_word_embedding.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from textattack.shared import GensimWordEmbedding, WordEmbedding 7 | 8 | 9 | def test_embedding_paragramcf(): 10 | word_embedding = WordEmbedding.counterfitted_GLOVE_embedding() 11 | assert pytest.approx(word_embedding[0][0]) == -0.022007 12 | assert pytest.approx(word_embedding["fawn"][0]) == -0.022007 13 | assert word_embedding[10**9] is None 14 | 15 | 16 | def test_embedding_gensim(): 17 | # download a trained word2vec model 18 | from textattack.shared.utils import LazyLoader 19 | from textattack.shared.utils.install import TEXTATTACK_CACHE_DIR 20 | 21 | path = os.path.join(TEXTATTACK_CACHE_DIR, "test_gensim_embedding.txt") 22 | f = open(path, "w") 23 | f.write( 24 | """4 2 25 | hi 1 0 26 | hello 1 1 27 | bye -1 0 28 | bye-bye -1 1 29 | """ 30 | ) 31 | f.close() 32 | 33 | gensim = LazyLoader("gensim", globals(), "gensim") 34 | keyed_vectors = ( 35 | gensim.models.keyedvectors.Word2VecKeyedVectors.load_word2vec_format(path) 36 | ) 37 | word_embedding = GensimWordEmbedding(keyed_vectors) 38 | assert pytest.approx(word_embedding[0][0]) == 1 39 | assert pytest.approx(word_embedding["bye-bye"][0]) == -1 / np.sqrt(2) 40 | assert word_embedding[10**9] is None 41 | 42 | # test query functionality 43 | assert pytest.approx(word_embedding.get_cos_sim(1, 3)) == 0 44 | # mse dist 45 | assert pytest.approx(word_embedding.get_mse_dist(0, 2)) == 4 46 | # nearest neighbour of hi is hello 47 | assert word_embedding.nearest_neighbours(0, 1)[0] == 1 48 | assert word_embedding.word2index("bye") == 2 49 | assert word_embedding.index2word(3) == "bye-bye" 50 | # remove test file 51 | os.remove(path) 52 | -------------------------------------------------------------------------------- /textattack/__init__.py: -------------------------------------------------------------------------------- 1 | """Welcome to the API references for TextAttack! 2 | 3 | What is TextAttack? 4 | 5 | `TextAttack `__ 6 | is a Python framework for adversarial attacks, adversarial training, and data augmentation in NLP. 7 | 8 | TextAttack makes experimenting with the robustness of NLP models seamless, fast, and easy. It's also useful for NLP model training, adversarial training, and data augmentation. 9 | 10 | TextAttack provides components for common NLP tasks like sentence encoding, grammar-checking, and word replacement that can be used on their own. 11 | """ 12 | 13 | from .attack_args import AttackArgs, CommandLineAttackArgs 14 | from .augment_args import AugmenterArgs 15 | from .dataset_args import DatasetArgs 16 | from .model_args import ModelArgs 17 | from .training_args import TrainingArgs, CommandLineTrainingArgs 18 | from .attack import Attack 19 | from .attacker import Attacker 20 | from .trainer import Trainer 21 | from .metrics import Metric 22 | 23 | from . import ( 24 | attack_recipes, 25 | attack_results, 26 | augmentation, 27 | commands, 28 | constraints, 29 | datasets, 30 | goal_function_results, 31 | goal_functions, 32 | loggers, 33 | metrics, 34 | models, 35 | search_methods, 36 | shared, 37 | transformations, 38 | ) 39 | 40 | 41 | name = "textattack" 42 | -------------------------------------------------------------------------------- /textattack/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | if __name__ == "__main__": 4 | import textattack 5 | 6 | textattack.commands.textattack_cli.main() 7 | -------------------------------------------------------------------------------- /textattack/attack_recipes/attack_recipe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack Recipe Class 3 | ======================== 4 | 5 | """ 6 | 7 | from abc import ABC, abstractmethod 8 | 9 | from textattack import Attack 10 | 11 | 12 | class AttackRecipe(Attack, ABC): 13 | """A recipe for building an NLP adversarial attack from the literature.""" 14 | 15 | @staticmethod 16 | @abstractmethod 17 | def build(model_wrapper, **kwargs): 18 | """Creates pre-built :class:`~textattack.Attack` that correspond to 19 | attacks from the literature. 20 | 21 | Args: 22 | model_wrapper (:class:`~textattack.models.wrappers.ModelWrapper`): 23 | :class:`~textattack.models.wrappers.ModelWrapper` that contains the victim model and tokenizer. 24 | This is passed to :class:`~textattack.goal_functions.GoalFunction` when constructing the attack. 25 | kwargs: 26 | Additional keyword arguments. 27 | Returns: 28 | :class:`~textattack.Attack` 29 | """ 30 | raise NotImplementedError() 31 | -------------------------------------------------------------------------------- /textattack/attack_recipes/checklist_ribeiro_2020.py: -------------------------------------------------------------------------------- 1 | """ 2 | CheckList: 3 | ========================= 4 | 5 | (Beyond Accuracy: Behavioral Testing of NLP models with CheckList) 6 | 7 | """ 8 | 9 | from textattack import Attack 10 | from textattack.constraints.pre_transformation import RepeatModification 11 | from textattack.goal_functions import UntargetedClassification 12 | from textattack.search_methods import GreedySearch 13 | from textattack.transformations import ( 14 | CompositeTransformation, 15 | WordSwapChangeLocation, 16 | WordSwapChangeName, 17 | WordSwapChangeNumber, 18 | WordSwapContract, 19 | WordSwapExtend, 20 | ) 21 | 22 | from .attack_recipe import AttackRecipe 23 | 24 | 25 | class CheckList2020(AttackRecipe): 26 | """An implementation of the attack used in "Beyond Accuracy: Behavioral 27 | Testing of NLP models with CheckList", Ribeiro et al., 2020. 28 | 29 | This attack focuses on a number of attacks used in the Invariance Testing 30 | Method: Contraction, Extension, Changing Names, Number, Location 31 | 32 | https://arxiv.org/abs/2005.04118 33 | """ 34 | 35 | @staticmethod 36 | def build(model_wrapper): 37 | transformation = CompositeTransformation( 38 | [ 39 | WordSwapExtend(), 40 | WordSwapContract(), 41 | WordSwapChangeName(), 42 | WordSwapChangeNumber(), 43 | WordSwapChangeLocation(), 44 | ] 45 | ) 46 | 47 | # Need this constraint to prevent extend and contract modifying each others' changes and forming infinite loop 48 | constraints = [RepeatModification()] 49 | 50 | # Untargeted attack & GreedySearch 51 | goal_function = UntargetedClassification(model_wrapper) 52 | search_method = GreedySearch() 53 | 54 | return Attack(goal_function, constraints, transformation, search_method) 55 | -------------------------------------------------------------------------------- /textattack/attack_recipes/french_recipe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack French Recipe 3 | ==================== 4 | 5 | (Contextualized Perturbation for French NLP Adversarial Attack) 6 | 7 | """ 8 | 9 | from textattack import Attack 10 | from textattack.constraints.pre_transformation import ( 11 | RepeatModification, 12 | StopwordModification, 13 | ) 14 | from textattack.goal_functions import UntargetedClassification 15 | from textattack.search_methods import GreedyWordSwapWIR 16 | from textattack.transformations import ( 17 | CompositeTransformation, 18 | WordSwapChangeLocation, 19 | WordSwapChangeName, 20 | WordSwapWordNet, 21 | ) 22 | 23 | from .attack_recipe import AttackRecipe 24 | 25 | 26 | class FrenchRecipe(AttackRecipe): 27 | @staticmethod 28 | def build(model_wrapper): 29 | transformation = CompositeTransformation( 30 | [ 31 | WordSwapWordNet(language="fra"), 32 | WordSwapChangeLocation(language="fra"), 33 | WordSwapChangeName(language="fra"), 34 | ] 35 | ) 36 | constraints = [RepeatModification(), StopwordModification("french")] 37 | goal_function = UntargetedClassification(model_wrapper) 38 | search_method = GreedyWordSwapWIR() 39 | return Attack(goal_function, constraints, transformation, search_method) 40 | -------------------------------------------------------------------------------- /textattack/attack_recipes/input_reduction_feng_2018.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Input Reduction 4 | ==================== 5 | (Pathologies of Neural Models Make Interpretations Difficult) 6 | 7 | """ 8 | 9 | from textattack import Attack 10 | from textattack.constraints.pre_transformation import ( 11 | RepeatModification, 12 | StopwordModification, 13 | ) 14 | from textattack.goal_functions import InputReduction 15 | from textattack.search_methods import GreedyWordSwapWIR 16 | from textattack.transformations import WordDeletion 17 | 18 | from .attack_recipe import AttackRecipe 19 | 20 | 21 | class InputReductionFeng2018(AttackRecipe): 22 | """Feng, Wallace, Grissom, Iyyer, Rodriguez, Boyd-Graber. (2018). 23 | 24 | Pathologies of Neural Models Make Interpretations Difficult. 25 | 26 | https://arxiv.org/abs/1804.07781 27 | """ 28 | 29 | @staticmethod 30 | def build(model_wrapper): 31 | # At each step, we remove the word with the lowest importance value until 32 | # the model changes its prediction. 33 | transformation = WordDeletion() 34 | 35 | constraints = [RepeatModification(), StopwordModification()] 36 | # 37 | # Goal is untargeted classification 38 | # 39 | goal_function = InputReduction(model_wrapper, maximizable=True) 40 | # 41 | # "For each word in an input sentence, we measure its importance by the 42 | # change in the confidence of the original prediction when we remove 43 | # that word from the sentence." 44 | # 45 | # "Instead of looking at the words with high importance values—what 46 | # interpretation methods commonly do—we take a complementary approach 47 | # and study how the model behaves when the supposedly unimportant words are 48 | # removed." 49 | # 50 | search_method = GreedyWordSwapWIR(wir_method="delete") 51 | 52 | return Attack(goal_function, constraints, transformation, search_method) 53 | -------------------------------------------------------------------------------- /textattack/attack_recipes/morpheus_tan_2020.py: -------------------------------------------------------------------------------- 1 | """ 2 | MORPHEUS2020 3 | =============== 4 | (It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations) 5 | 6 | 7 | """ 8 | 9 | from textattack import Attack 10 | from textattack.constraints.pre_transformation import ( 11 | RepeatModification, 12 | StopwordModification, 13 | ) 14 | from textattack.goal_functions import MinimizeBleu 15 | from textattack.search_methods import GreedySearch 16 | from textattack.transformations import WordSwapInflections 17 | 18 | from .attack_recipe import AttackRecipe 19 | 20 | 21 | class MorpheusTan2020(AttackRecipe): 22 | """Samson Tan, Shafiq Joty, Min-Yen Kan, Richard Socher. 23 | 24 | It’s Morphin’ Time! Combating Linguistic Discrimination with 25 | Inflectional Perturbations 26 | 27 | https://www.aclweb.org/anthology/2020.acl-main.263/ 28 | """ 29 | 30 | @staticmethod 31 | def build(model_wrapper): 32 | # 33 | # Goal is to minimize BLEU score between the model output given for the 34 | # perturbed input sequence and the reference translation 35 | # 36 | goal_function = MinimizeBleu(model_wrapper) 37 | 38 | # Swap words with their inflections 39 | transformation = WordSwapInflections() 40 | 41 | # 42 | # Don't modify the same word twice or stopwords 43 | # 44 | constraints = [RepeatModification(), StopwordModification()] 45 | 46 | # 47 | # Greedily swap words (see pseudocode, Algorithm 1 of the paper). 48 | # 49 | search_method = GreedySearch() 50 | 51 | return Attack(goal_function, constraints, transformation, search_method) 52 | -------------------------------------------------------------------------------- /textattack/attack_recipes/pwws_ren_2019.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | PWWS 4 | ======= 5 | 6 | (Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency) 7 | 8 | """ 9 | 10 | from textattack import Attack 11 | from textattack.constraints.pre_transformation import ( 12 | RepeatModification, 13 | StopwordModification, 14 | ) 15 | from textattack.goal_functions import UntargetedClassification 16 | from textattack.search_methods import GreedyWordSwapWIR 17 | from textattack.transformations import WordSwapWordNet 18 | 19 | from .attack_recipe import AttackRecipe 20 | 21 | 22 | class PWWSRen2019(AttackRecipe): 23 | """An implementation of Probability Weighted Word Saliency from "Generating 24 | Natural Language Adversarial Examples through Probability Weighted Word 25 | Saliency", Ren et al., 2019. 26 | 27 | Words are prioritized for a synonym-swap transformation based on a 28 | combination of their saliency score and maximum word-swap 29 | effectiveness. Note that this implementation does not include the 30 | Named Entity adversarial swap from the original paper, because it 31 | requires access to the full dataset and ground truth labels in 32 | advance. 33 | 34 | https://www.aclweb.org/anthology/P19-1103/ 35 | """ 36 | 37 | @staticmethod 38 | def build(model_wrapper): 39 | transformation = WordSwapWordNet() 40 | constraints = [RepeatModification(), StopwordModification()] 41 | goal_function = UntargetedClassification(model_wrapper) 42 | # search over words based on a combination of their saliency score, and how efficient the WordSwap transform is 43 | search_method = GreedyWordSwapWIR("weighted-saliency") 44 | return Attack(goal_function, constraints, transformation, search_method) 45 | -------------------------------------------------------------------------------- /textattack/attack_recipes/seq2sick_cheng_2018_blackbox.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Seq2Sick 4 | ================================================ 5 | (Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples) 6 | """ 7 | 8 | from textattack import Attack 9 | from textattack.constraints.overlap import LevenshteinEditDistance 10 | from textattack.constraints.pre_transformation import ( 11 | RepeatModification, 12 | StopwordModification, 13 | ) 14 | from textattack.goal_functions import NonOverlappingOutput 15 | from textattack.search_methods import GreedyWordSwapWIR 16 | from textattack.transformations import WordSwapEmbedding 17 | 18 | from .attack_recipe import AttackRecipe 19 | 20 | 21 | class Seq2SickCheng2018BlackBox(AttackRecipe): 22 | """Cheng, Minhao, et al. 23 | 24 | Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with 25 | Adversarial Examples 26 | 27 | https://arxiv.org/abs/1803.01128 28 | 29 | This is a greedy re-implementation of the seq2sick attack method. It does 30 | not use gradient descent. 31 | """ 32 | 33 | @staticmethod 34 | def build(model_wrapper, goal_function="non_overlapping"): 35 | # 36 | # Goal is non-overlapping output. 37 | # 38 | goal_function = NonOverlappingOutput(model_wrapper) 39 | transformation = WordSwapEmbedding(max_candidates=50) 40 | # 41 | # Don't modify the same word twice or stopwords 42 | # 43 | constraints = [RepeatModification(), StopwordModification()] 44 | # 45 | # In these experiments, we hold the maximum difference 46 | # on edit distance (ϵ) to a constant 30 for each sample. 47 | # 48 | constraints.append(LevenshteinEditDistance(30)) 49 | # 50 | # Greedily swap words with "Word Importance Ranking". 51 | # 52 | search_method = GreedyWordSwapWIR(wir_method="unk") 53 | 54 | return Attack(goal_function, constraints, transformation, search_method) 55 | -------------------------------------------------------------------------------- /textattack/attack_recipes/spanish_recipe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack Spanish Recipe 3 | ===================== 4 | 5 | (Contextualized Perturbation for Spanish NLP Adversarial Attack) 6 | 7 | """ 8 | 9 | from textattack import Attack 10 | from textattack.constraints.pre_transformation import ( 11 | RepeatModification, 12 | StopwordModification, 13 | ) 14 | from textattack.goal_functions import UntargetedClassification 15 | from textattack.search_methods import GreedyWordSwapWIR 16 | from textattack.transformations import ( 17 | CompositeTransformation, 18 | WordSwapChangeLocation, 19 | WordSwapChangeName, 20 | WordSwapWordNet, 21 | ) 22 | 23 | from .attack_recipe import AttackRecipe 24 | 25 | 26 | class SpanishRecipe(AttackRecipe): 27 | @staticmethod 28 | def build(model_wrapper): 29 | transformation = CompositeTransformation( 30 | [ 31 | WordSwapWordNet(language="esp"), 32 | WordSwapChangeLocation(language="esp"), 33 | WordSwapChangeName(language="esp"), 34 | ] 35 | ) 36 | constraints = [RepeatModification(), StopwordModification("spanish")] 37 | goal_function = UntargetedClassification(model_wrapper) 38 | search_method = GreedyWordSwapWIR() 39 | return Attack(goal_function, constraints, transformation, search_method) 40 | -------------------------------------------------------------------------------- /textattack/attack_results/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextAttack attack_results Package 4 | ================================== 5 | 6 | """ 7 | 8 | from .attack_result import AttackResult 9 | from .maximized_attack_result import MaximizedAttackResult 10 | from .failed_attack_result import FailedAttackResult 11 | from .skipped_attack_result import SkippedAttackResult 12 | from .successful_attack_result import SuccessfulAttackResult 13 | -------------------------------------------------------------------------------- /textattack/attack_results/failed_attack_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | FailedAttackResult Class 3 | =========================== 4 | 5 | """ 6 | 7 | from textattack.shared import utils 8 | 9 | from .attack_result import AttackResult 10 | 11 | 12 | class FailedAttackResult(AttackResult): 13 | """The result of a failed attack.""" 14 | 15 | def __init__(self, original_result, perturbed_result=None): 16 | perturbed_result = perturbed_result or original_result 17 | super().__init__(original_result, perturbed_result) 18 | 19 | def str_lines(self, color_method=None): 20 | lines = ( 21 | self.goal_function_result_str(color_method), 22 | self.original_text(color_method), 23 | ) 24 | return tuple(map(str, lines)) 25 | 26 | def goal_function_result_str(self, color_method=None): 27 | failed_str = utils.color_text("[FAILED]", "red", color_method) 28 | return ( 29 | self.original_result.get_colored_output(color_method) + " --> " + failed_str 30 | ) 31 | -------------------------------------------------------------------------------- /textattack/attack_results/maximized_attack_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | MaximizedAttackResult Class 3 | ============================ 4 | 5 | """ 6 | 7 | from .attack_result import AttackResult 8 | 9 | 10 | class MaximizedAttackResult(AttackResult): 11 | """The result of a successful attack.""" 12 | -------------------------------------------------------------------------------- /textattack/attack_results/skipped_attack_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | SkippedAttackResult Class 3 | ============================ 4 | 5 | """ 6 | 7 | from textattack.shared import utils 8 | 9 | from .attack_result import AttackResult 10 | 11 | 12 | class SkippedAttackResult(AttackResult): 13 | """The result of a skipped attack.""" 14 | 15 | def __init__(self, original_result): 16 | super().__init__(original_result, original_result) 17 | 18 | def str_lines(self, color_method=None): 19 | lines = ( 20 | self.goal_function_result_str(color_method), 21 | self.original_text(color_method), 22 | ) 23 | return tuple(map(str, lines)) 24 | 25 | def goal_function_result_str(self, color_method=None): 26 | skipped_str = utils.color_text("[SKIPPED]", "gray", color_method) 27 | return ( 28 | self.original_result.get_colored_output(color_method) 29 | + " --> " 30 | + skipped_str 31 | ) 32 | -------------------------------------------------------------------------------- /textattack/attack_results/successful_attack_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | SuccessfulAttackResult Class 3 | ============================== 4 | 5 | """ 6 | 7 | from .attack_result import AttackResult 8 | 9 | 10 | class SuccessfulAttackResult(AttackResult): 11 | """The result of a successful attack.""" 12 | -------------------------------------------------------------------------------- /textattack/augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _augmentation: 2 | 3 | TextAttack augmentation package: 4 | ================================= 5 | 6 | Transformations and constraints can be used outside of an attack for simple NLP data augmentation with the ``Augmenter`` class that returns all possible transformations for a given string. 7 | """ 8 | 9 | from .augmenter import Augmenter 10 | from .recipes import ( 11 | WordNetAugmenter, 12 | EmbeddingAugmenter, 13 | CharSwapAugmenter, 14 | EasyDataAugmenter, 15 | CheckListAugmenter, 16 | DeletionAugmenter, 17 | CLAREAugmenter, 18 | BackTranslationAugmenter, 19 | ) 20 | -------------------------------------------------------------------------------- /textattack/commands/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextAttack commands Package 4 | =========================== 5 | 6 | """ 7 | 8 | from abc import ABC, abstractmethod 9 | from .textattack_command import TextAttackCommand 10 | from . import textattack_cli 11 | -------------------------------------------------------------------------------- /textattack/commands/attack_command.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | AttackCommand class 4 | =========================== 5 | 6 | """ 7 | 8 | from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser 9 | 10 | from textattack import Attacker, CommandLineAttackArgs, DatasetArgs, ModelArgs 11 | from textattack.commands import TextAttackCommand 12 | 13 | 14 | class AttackCommand(TextAttackCommand): 15 | """The TextAttack attack module: 16 | 17 | A command line parser to run an attack from user specifications. 18 | """ 19 | 20 | def run(self, args): 21 | attack_args = CommandLineAttackArgs(**vars(args)) 22 | dataset = DatasetArgs._create_dataset_from_args(attack_args) 23 | 24 | if attack_args.interactive: 25 | model_wrapper = ModelArgs._create_model_from_args(attack_args) 26 | attack = CommandLineAttackArgs._create_attack_from_args( 27 | attack_args, model_wrapper 28 | ) 29 | Attacker.attack_interactive(attack) 30 | else: 31 | model_wrapper = ModelArgs._create_model_from_args(attack_args) 32 | attack = CommandLineAttackArgs._create_attack_from_args( 33 | attack_args, model_wrapper 34 | ) 35 | attacker = Attacker(attack, dataset, attack_args) 36 | attacker.attack_dataset() 37 | 38 | @staticmethod 39 | def register_subcommand(main_parser: ArgumentParser): 40 | parser = main_parser.add_parser( 41 | "attack", 42 | help="run an attack on an NLP model", 43 | formatter_class=ArgumentDefaultsHelpFormatter, 44 | ) 45 | parser = CommandLineAttackArgs._add_parser_args(parser) 46 | parser.set_defaults(func=AttackCommand()) 47 | -------------------------------------------------------------------------------- /textattack/commands/benchmark_recipe_command.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | BenchmarkRecipeCommand class 4 | ============================== 5 | 6 | """ 7 | 8 | from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser 9 | 10 | from textattack.commands import TextAttackCommand 11 | 12 | 13 | class BenchmarkRecipeCommand(TextAttackCommand): 14 | """The TextAttack benchmark recipe module: 15 | 16 | A command line parser to benchmark a recipe from user 17 | specifications. 18 | """ 19 | 20 | def run(self, args): 21 | raise NotImplementedError("Cannot benchmark recipes yet - stay tuned!!") 22 | 23 | @staticmethod 24 | def register_subcommand(main_parser: ArgumentParser): 25 | parser = main_parser.add_parser( 26 | "benchmark-recipe", 27 | help="benchmark a recipe", 28 | formatter_class=ArgumentDefaultsHelpFormatter, 29 | ) 30 | parser.set_defaults(func=BenchmarkRecipeCommand()) 31 | -------------------------------------------------------------------------------- /textattack/commands/textattack_cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextAttack CLI main class 4 | ============================== 5 | 6 | """ 7 | 8 | # !/usr/bin/env python 9 | import argparse 10 | 11 | from textattack.commands.attack_command import AttackCommand 12 | from textattack.commands.attack_resume_command import AttackResumeCommand 13 | from textattack.commands.augment_command import AugmentCommand 14 | from textattack.commands.benchmark_recipe_command import BenchmarkRecipeCommand 15 | from textattack.commands.eval_model_command import EvalModelCommand 16 | from textattack.commands.list_things_command import ListThingsCommand 17 | from textattack.commands.peek_dataset_command import PeekDatasetCommand 18 | from textattack.commands.train_model_command import TrainModelCommand 19 | 20 | 21 | def main(): 22 | parser = argparse.ArgumentParser( 23 | "TextAttack CLI", 24 | usage="[python -m] textattack []", 25 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 26 | ) 27 | subparsers = parser.add_subparsers(help="textattack command helpers") 28 | 29 | # Register commands 30 | AttackCommand.register_subcommand(subparsers) 31 | AttackResumeCommand.register_subcommand(subparsers) 32 | AugmentCommand.register_subcommand(subparsers) 33 | BenchmarkRecipeCommand.register_subcommand(subparsers) 34 | EvalModelCommand.register_subcommand(subparsers) 35 | ListThingsCommand.register_subcommand(subparsers) 36 | TrainModelCommand.register_subcommand(subparsers) 37 | PeekDatasetCommand.register_subcommand(subparsers) 38 | 39 | # Let's go 40 | args = parser.parse_args() 41 | 42 | if not hasattr(args, "func"): 43 | parser.print_help() 44 | exit(1) 45 | 46 | # Run 47 | func = args.func 48 | del args.func 49 | func.run(args) 50 | 51 | 52 | if __name__ == "__main__": 53 | main() 54 | -------------------------------------------------------------------------------- /textattack/commands/textattack_command.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class TextAttackCommand(ABC): 5 | @staticmethod 6 | @abstractmethod 7 | def register_subcommand(parser): 8 | raise NotImplementedError() 9 | 10 | @abstractmethod 11 | def run(self): 12 | raise NotImplementedError() 13 | -------------------------------------------------------------------------------- /textattack/commands/train_model_command.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TrainModelCommand class 4 | ============================== 5 | 6 | """ 7 | 8 | from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser 9 | 10 | from textattack import CommandLineTrainingArgs, Trainer 11 | from textattack.commands import TextAttackCommand 12 | 13 | 14 | class TrainModelCommand(TextAttackCommand): 15 | """The TextAttack train module: 16 | 17 | A command line parser to train a model from user specifications. 18 | """ 19 | 20 | def run(self, args): 21 | training_args = CommandLineTrainingArgs(**vars(args)) 22 | model_wrapper = CommandLineTrainingArgs._create_model_from_args(training_args) 23 | train_dataset, eval_dataset = CommandLineTrainingArgs._create_dataset_from_args( 24 | training_args 25 | ) 26 | attack = CommandLineTrainingArgs._create_attack_from_args( 27 | training_args, model_wrapper 28 | ) 29 | trainer = Trainer( 30 | model_wrapper, 31 | training_args.task_type, 32 | attack, 33 | train_dataset, 34 | eval_dataset, 35 | training_args, 36 | ) 37 | trainer.train() 38 | 39 | @staticmethod 40 | def register_subcommand(main_parser: ArgumentParser): 41 | parser = main_parser.add_parser( 42 | "train", 43 | help="train a model for sequence classification", 44 | formatter_class=ArgumentDefaultsHelpFormatter, 45 | ) 46 | parser = CommandLineTrainingArgs._add_parser_args(parser) 47 | parser.set_defaults(func=TrainModelCommand()) 48 | -------------------------------------------------------------------------------- /textattack/constraints/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _constraint: 2 | 3 | Constraints 4 | =================== 5 | 6 | Constraints determine whether a given transformation is valid. Since transformations do not perfectly preserve semantics semantics or grammaticality, constraints can increase the likelihood that the resulting transformation preserves these qualities. All constraints are subclasses of the ``Constraint`` abstract class, and must implement at least one of ``__call__`` or ``call_many``. 7 | 8 | We split constraints into three main categories. 9 | 10 | :ref:`Semantics `: Based on the meaning of the input and perturbation. 11 | 12 | :ref:`Grammaticality `: Based on syntactic properties like part-of-speech and grammar. 13 | 14 | :ref:`Overlap `: Based on character-based properties, like edit distance. 15 | 16 | A fourth type of constraint restricts the search method from exploring certain parts of the search space: 17 | 18 | :ref:`pre_transformation `: Based on the input and index of word replacement. 19 | """ 20 | 21 | from .pre_transformation_constraint import PreTransformationConstraint 22 | from .constraint import Constraint 23 | 24 | from . import grammaticality 25 | from . import semantics 26 | from . import overlap 27 | from . import pre_transformation 28 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _grammaticality: 2 | 3 | Grammaticality: 4 | -------------------------- 5 | 6 | Grammaticality constraints determine if a transformation is valid based on 7 | syntactic properties of the perturbation. 8 | """ 9 | 10 | from . import language_models 11 | 12 | from .language_tool import LanguageTool 13 | from .part_of_speech import PartOfSpeech 14 | from .cola import COLA 15 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | non-pre Language Models: 3 | ----------------------------- 4 | 5 | """ 6 | 7 | from .language_model_constraint import LanguageModelConstraint 8 | 9 | from .google_language_model import Google1BillionWordsLanguageModel 10 | from .gpt2 import GPT2 11 | from .learning_to_write import LearningToWriteLanguageModel 12 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/google_language_model/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Google Language Models: 3 | ----------------------------- 4 | 5 | """ 6 | 7 | from .google_language_model import ( 8 | GoogleLanguageModel as Google1BillionWordsLanguageModel, 9 | ) 10 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_models/learning_to_write/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | "Learning To Write" 3 | -------------------------- 4 | 5 | """ 6 | 7 | from .learning_to_write import LearningToWriteLanguageModel 8 | -------------------------------------------------------------------------------- /textattack/constraints/grammaticality/language_tool.py: -------------------------------------------------------------------------------- 1 | """ 2 | LanguageTool Grammar Checker 3 | ------------------------------ 4 | """ 5 | 6 | import language_tool_python 7 | 8 | from textattack.constraints import Constraint 9 | 10 | 11 | class LanguageTool(Constraint): 12 | """Uses languagetool to determine if two sentences have the same number of 13 | grammatical erors. (https://languagetool.org/) 14 | 15 | Args: 16 | grammar_error_threshold (int): the number of additional errors permitted in `x_adv` 17 | relative to `x` 18 | compare_against_original (bool): If `True`, compare against the original text. 19 | Otherwise, compare against the most recent text. 20 | language: language to use for languagetool (available choices: https://dev.languagetool.org/languages) 21 | """ 22 | 23 | def __init__( 24 | self, grammar_error_threshold=0, compare_against_original=True, language="en-US" 25 | ): 26 | super().__init__(compare_against_original) 27 | self.lang_tool = language_tool_python.LanguageTool(language) 28 | self.grammar_error_threshold = grammar_error_threshold 29 | self.grammar_error_cache = {} 30 | 31 | def get_errors(self, attacked_text, use_cache=False): 32 | text = attacked_text.text 33 | if use_cache: 34 | if text not in self.grammar_error_cache: 35 | self.grammar_error_cache[text] = len(self.lang_tool.check(text)) 36 | return self.grammar_error_cache[text] 37 | else: 38 | return len(self.lang_tool.check(text)) 39 | 40 | def _check_constraint(self, transformed_text, reference_text): 41 | original_num_errors = self.get_errors(reference_text, use_cache=True) 42 | errors_added = self.get_errors(transformed_text) - original_num_errors 43 | return errors_added <= self.grammar_error_threshold 44 | 45 | def extra_repr_keys(self): 46 | return ["grammar_error_threshold"] + super().extra_repr_keys() 47 | -------------------------------------------------------------------------------- /textattack/constraints/overlap/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _overlap: 2 | 3 | Overlap Constraints 4 | -------------------------- 5 | 6 | Overlap constraints determine if a transformation is valid based on character-level analysis. 7 | """ 8 | 9 | from .bleu_score import BLEU 10 | from .chrf_score import chrF 11 | from .levenshtein_edit_distance import LevenshteinEditDistance 12 | from .meteor_score import METEOR 13 | from .max_words_perturbed import MaxWordsPerturbed 14 | -------------------------------------------------------------------------------- /textattack/constraints/overlap/bleu_score.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | BLEU Constraints 4 | -------------------------- 5 | 6 | 7 | """ 8 | 9 | import nltk 10 | 11 | from textattack.constraints import Constraint 12 | 13 | 14 | class BLEU(Constraint): 15 | """A constraint on BLEU score difference. 16 | 17 | Args: 18 | max_bleu_score (int): Maximum BLEU score allowed. 19 | compare_against_original (bool): If `True`, compare new `x_adv` against the original `x`. 20 | Otherwise, compare it against the previous `x_adv`. 21 | """ 22 | 23 | def __init__(self, max_bleu_score, compare_against_original=True): 24 | super().__init__(compare_against_original) 25 | if not isinstance(max_bleu_score, int): 26 | raise TypeError("max_bleu_score must be an int") 27 | self.max_bleu_score = max_bleu_score 28 | 29 | def _check_constraint(self, transformed_text, reference_text): 30 | ref = reference_text.words 31 | hyp = transformed_text.words 32 | bleu_score = nltk.translate.bleu_score.sentence_bleu([ref], hyp) 33 | return bleu_score <= self.max_bleu_score 34 | 35 | def extra_repr_keys(self): 36 | return ["max_bleu_score"] + super().extra_repr_keys() 37 | -------------------------------------------------------------------------------- /textattack/constraints/overlap/chrf_score.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | chrF Constraints 4 | -------------------------- 5 | 6 | 7 | """ 8 | 9 | import nltk.translate.chrf_score 10 | 11 | from textattack.constraints import Constraint 12 | 13 | 14 | class chrF(Constraint): 15 | """A constraint on chrF (n-gram F-score) difference. 16 | 17 | Args: 18 | max_chrf (int): Max n-gram F-score allowed. 19 | compare_against_original (bool): If `True`, compare new `x_adv` against the original `x`. 20 | Otherwise, compare it against the previous `x_adv`. 21 | """ 22 | 23 | def __init__(self, max_chrf, compare_against_original=True): 24 | super().__init__(compare_against_original) 25 | if not isinstance(max_chrf, int): 26 | raise TypeError("max_chrf must be an int") 27 | self.max_chrf = max_chrf 28 | 29 | def _check_constraint(self, transformed_text, reference_text): 30 | ref = reference_text.words 31 | hyp = transformed_text.words 32 | chrf = nltk.translate.chrf_score.sentence_chrf(ref, hyp) 33 | return chrf <= self.max_chrf 34 | 35 | def extra_repr_keys(self): 36 | return ["max_chrf"] + super().extra_repr_keys() 37 | -------------------------------------------------------------------------------- /textattack/constraints/overlap/levenshtein_edit_distance.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Edit Distance Constraints 4 | -------------------------- 5 | 6 | 7 | """ 8 | 9 | import editdistance 10 | 11 | from textattack.constraints import Constraint 12 | 13 | 14 | class LevenshteinEditDistance(Constraint): 15 | """A constraint on edit distance (Levenshtein Distance). 16 | 17 | Args: 18 | max_edit_distance (int): Maximum edit distance allowed. 19 | compare_against_original (bool): If `True`, compare new `x_adv` against the original `x`. 20 | Otherwise, compare it against the previous `x_adv`. 21 | """ 22 | 23 | def __init__(self, max_edit_distance, compare_against_original=True): 24 | super().__init__(compare_against_original) 25 | if not isinstance(max_edit_distance, int): 26 | raise TypeError("max_edit_distance must be an int") 27 | self.max_edit_distance = max_edit_distance 28 | 29 | def _check_constraint(self, transformed_text, reference_text): 30 | edit_distance = editdistance.eval(reference_text.text, transformed_text.text) 31 | return edit_distance <= self.max_edit_distance 32 | 33 | def extra_repr_keys(self): 34 | return ["max_edit_distance"] + super().extra_repr_keys() 35 | -------------------------------------------------------------------------------- /textattack/constraints/overlap/meteor_score.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | METEOR Constraints 4 | -------------------------- 5 | 6 | 7 | """ 8 | 9 | import nltk 10 | 11 | from textattack.constraints import Constraint 12 | 13 | 14 | class METEOR(Constraint): 15 | """A constraint on METEOR score difference. 16 | 17 | Args: 18 | max_meteor (int): Max METEOR score allowed. 19 | compare_against_original (bool): If `True`, compare new `x_adv` against the original `x`. 20 | Otherwise, compare it against the previous `x_adv`. 21 | """ 22 | 23 | def __init__(self, max_meteor, compare_against_original=True): 24 | super().__init__(compare_against_original) 25 | if not isinstance(max_meteor, int): 26 | raise TypeError("max_meteor must be an int") 27 | self.max_meteor = max_meteor 28 | 29 | def _check_constraint(self, transformed_text, reference_text): 30 | meteor = nltk.translate.meteor([reference_text], transformed_text) 31 | return meteor <= self.max_meteor 32 | 33 | def extra_repr_keys(self): 34 | return ["max_meteor"] + super().extra_repr_keys() 35 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _pre_transformation: 2 | 3 | Pre-Transformation: 4 | --------------------- 5 | 6 | Pre-transformation constraints determine if a transformation is valid based on only the original input and the position of the replacement. These constraints are applied before the transformation is even called. For example, these constraints can prevent search methods from swapping words at the same index twice, or from replacing stopwords. 7 | """ 8 | 9 | from .stopword_modification import StopwordModification 10 | from .repeat_modification import RepeatModification 11 | from .input_column_modification import InputColumnModification 12 | from .max_word_index_modification import MaxWordIndexModification 13 | from .max_num_words_modified import MaxNumWordsModified 14 | from .min_word_length import MinWordLength 15 | from .max_modification_rate import MaxModificationRate 16 | from .unmodifiable_indices import UnmodifiableIndices 17 | from .unmodifiable_phrases import UnmodifablePhrases 18 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/input_column_modification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Input Column Modification 4 | -------------------------- 5 | 6 | """ 7 | 8 | from textattack.constraints import PreTransformationConstraint 9 | 10 | 11 | class InputColumnModification(PreTransformationConstraint): 12 | """A constraint disallowing the modification of words within a specific 13 | input column. 14 | 15 | For example, can prevent modification of 'premise' during 16 | entailment. 17 | """ 18 | 19 | def __init__(self, matching_column_labels, columns_to_ignore): 20 | self.matching_column_labels = matching_column_labels 21 | self.columns_to_ignore = columns_to_ignore 22 | 23 | def _get_modifiable_indices(self, current_text): 24 | """Returns the word indices in current_text which are able to be 25 | deleted. 26 | 27 | If ``current_text.column_labels`` doesn't match 28 | ``self.matching_column_labels``, do nothing, and allow all words 29 | to be modified. 30 | 31 | If it does match, only allow words to be modified if they are not 32 | in columns from ``columns_to_ignore``. 33 | """ 34 | if current_text.column_labels != self.matching_column_labels: 35 | return set(range(len(current_text.words))) 36 | 37 | idx = 0 38 | indices_to_modify = set() 39 | for column, words in zip( 40 | current_text.column_labels, current_text.words_per_input 41 | ): 42 | num_words = len(words) 43 | if column not in self.columns_to_ignore: 44 | indices_to_modify |= set(range(idx, idx + num_words)) 45 | idx += num_words 46 | return indices_to_modify 47 | 48 | def extra_repr_keys(self): 49 | return ["matching_column_labels", "columns_to_ignore"] 50 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/max_num_words_modified.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Max Modification Rate 4 | ----------------------------- 5 | 6 | """ 7 | 8 | from textattack.constraints import PreTransformationConstraint 9 | 10 | 11 | class MaxNumWordsModified(PreTransformationConstraint): 12 | def __init__(self, max_num_words: int): 13 | self.max_num_words = max_num_words 14 | 15 | def _get_modifiable_indices(self, current_text): 16 | """Returns the word indices in current_text which are able to be 17 | modified.""" 18 | 19 | if len(current_text.attack_attrs["modified_indices"]) >= self.max_num_words: 20 | return set() 21 | else: 22 | return set(range(len(current_text.words))) 23 | 24 | def extra_repr_keys(self): 25 | return ["max_num_words"] 26 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/max_word_index_modification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Max Word Index Modification 4 | ----------------------------- 5 | 6 | """ 7 | 8 | from textattack.constraints import PreTransformationConstraint 9 | 10 | 11 | class MaxWordIndexModification(PreTransformationConstraint): 12 | """A constraint disallowing the modification of words which are past some 13 | maximum sentence word-length limit.""" 14 | 15 | def __init__(self, max_length): 16 | self.max_length = max_length 17 | 18 | def _get_modifiable_indices(self, current_text): 19 | """Returns the word indices in current_text which are able to be 20 | deleted.""" 21 | return set(range(min(self.max_length, len(current_text.words)))) 22 | 23 | def extra_repr_keys(self): 24 | return ["max_length"] 25 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/min_word_length.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Min Word Lenth 4 | -------------------------- 5 | 6 | """ 7 | 8 | from textattack.constraints import PreTransformationConstraint 9 | 10 | 11 | class MinWordLength(PreTransformationConstraint): 12 | """A constraint that prevents modifications to words less than a certain 13 | word character-length. 14 | 15 | :param min_length: Minimum word character-length needed for changes 16 | to be made to a word. 17 | """ 18 | 19 | def __init__(self, min_length): 20 | self.min_length = min_length 21 | 22 | def _get_modifiable_indices(self, current_text): 23 | idxs = [] 24 | for i, word in enumerate(current_text.words): 25 | if len(word) >= self.min_length: 26 | idxs.append(i) 27 | return set(idxs) 28 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/repeat_modification.py: -------------------------------------------------------------------------------- 1 | """ 2 | Repeat Modification 3 | -------------------------- 4 | 5 | """ 6 | 7 | from textattack.constraints import PreTransformationConstraint 8 | 9 | 10 | class RepeatModification(PreTransformationConstraint): 11 | """A constraint disallowing the modification of words which have already 12 | been modified.""" 13 | 14 | def _get_modifiable_indices(self, current_text): 15 | """Returns the word indices in current_text which are able to be 16 | deleted.""" 17 | try: 18 | return ( 19 | set(range(len(current_text.words))) 20 | - current_text.attack_attrs["modified_indices"] 21 | ) 22 | except KeyError: 23 | raise KeyError( 24 | "`modified_indices` in attack_attrs required for RepeatModification constraint." 25 | ) 26 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/stopword_modification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Stopword Modification 4 | -------------------------- 5 | 6 | """ 7 | 8 | import nltk 9 | 10 | from textattack.constraints import PreTransformationConstraint 11 | from textattack.shared.validators import transformation_consists_of_word_swaps 12 | 13 | 14 | class StopwordModification(PreTransformationConstraint): 15 | """A constraint disallowing the modification of stopwords.""" 16 | 17 | def __init__(self, stopwords=None, language="english"): 18 | if stopwords is not None: 19 | self.stopwords = set(stopwords) 20 | else: 21 | self.stopwords = set(nltk.corpus.stopwords.words(language)) 22 | 23 | def _get_modifiable_indices(self, current_text): 24 | """Returns the word indices in ``current_text`` which are able to be 25 | modified.""" 26 | non_stopword_indices = set() 27 | for i, word in enumerate(current_text.words): 28 | if word not in self.stopwords: 29 | non_stopword_indices.add(i) 30 | return non_stopword_indices 31 | 32 | def check_compatibility(self, transformation): 33 | """The stopword constraint only is concerned with word swaps since 34 | paraphrasing phrases containing stopwords is OK. 35 | 36 | Args: 37 | transformation: The ``Transformation`` to check compatibility with. 38 | """ 39 | return transformation_consists_of_word_swaps(transformation) 40 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/unmodifiable_indices.py: -------------------------------------------------------------------------------- 1 | from textattack.constraints import PreTransformationConstraint 2 | 3 | 4 | class UnmodifiableIndices(PreTransformationConstraint): 5 | """A constraint that prevents the modification of certain words at specific 6 | indices. 7 | 8 | Args: 9 | indices (list(int)): A list of indices which are unmodifiable 10 | """ 11 | 12 | def __init__(self, indices): 13 | self.unmodifiable_indices = indices 14 | 15 | def _get_modifiable_indices(self, current_text): 16 | unmodifiable_set = current_text.convert_from_original_idxs( 17 | self.unmodifiable_indices 18 | ) 19 | return set( 20 | i for i in range(0, len(current_text.words)) if i not in unmodifiable_set 21 | ) 22 | 23 | def extra_repr_keys(self): 24 | return ["indices"] 25 | -------------------------------------------------------------------------------- /textattack/constraints/pre_transformation/unmodifiable_phrases.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | from textattack.constraints import PreTransformationConstraint 4 | 5 | 6 | class UnmodifablePhrases(PreTransformationConstraint): 7 | """A constraint that prevents the modification of specified phrases or 8 | words. 9 | 10 | Args: 11 | phrases (list(str)): A list of strings that cannot be modified 12 | """ 13 | 14 | def __init__(self, phrases): 15 | self.length_to_phrases = defaultdict(set) 16 | for phrase in phrases: 17 | self.length_to_phrases[len(phrase.split())].add(phrase.lower()) 18 | 19 | def _get_modifiable_indices(self, current_text): 20 | phrase_indices = set() 21 | 22 | for phrase_length in self.length_to_phrases.keys(): 23 | for i in range(len(current_text.words) - phrase_length + 1): 24 | if ( 25 | " ".join(current_text.words[i : i + phrase_length]) 26 | in self.length_to_phrases[phrase_length] 27 | ): 28 | phrase_indices |= set(range(i, i + phrase_length)) 29 | 30 | return set(i for i in range(len(current_text.words)) if i not in phrase_indices) 31 | 32 | def extra_repr_keys(self): 33 | return ["phrases"] 34 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _semantics: 2 | 3 | Semantic Constraints 4 | --------------------- 5 | Semantic constraints determine if a transformation is valid based on similarity of the semantics of the orignal input and the transformed input. 6 | """ 7 | 8 | from . import sentence_encoders 9 | 10 | from .word_embedding_distance import WordEmbeddingDistance 11 | from .bert_score import BERTScore 12 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sentence Encoder Constraint 3 | ---------------------------- 4 | """ 5 | 6 | from .sentence_encoder import SentenceEncoder 7 | 8 | from .sentence_bert import SBERT 9 | from .infer_sent import InferSent 10 | from .thought_vector import ThoughtVector 11 | from .universal_sentence_encoder import ( 12 | UniversalSentenceEncoder, 13 | MultilingualUniversalSentenceEncoder, 14 | ) 15 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/infer_sent/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | infer sent 3 | ^^^^^^^^^^^^ 4 | """ 5 | 6 | from .infer_sent import InferSent 7 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/sentence_bert/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | sBERT 3 | ^^^^^^^ 4 | """ 5 | 6 | from .sbert import SBERT 7 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/sentence_bert/sbert.py: -------------------------------------------------------------------------------- 1 | """ 2 | sBERT for Sentence Similarity 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | """ 5 | 6 | from textattack.constraints.semantics.sentence_encoders import SentenceEncoder 7 | from textattack.shared import utils 8 | 9 | sentence_transformers = utils.LazyLoader( 10 | "sentence_transformers", globals(), "sentence_transformers" 11 | ) 12 | 13 | 14 | class SBERT(SentenceEncoder): 15 | """Constraint using similarity between sentence encodings of x and x_adv 16 | where the text embeddings are created using BERT, trained on NLI data, and 17 | fine- tuned on the STS benchmark dataset. 18 | 19 | Available models can be found here: https://huggingface.co/sentence-transformers 20 | """ 21 | 22 | def __init__( 23 | self, 24 | threshold=0.7, 25 | metric="cosine", 26 | model_name="bert-base-nli-stsb-mean-tokens", 27 | **kwargs 28 | ): 29 | super().__init__(threshold=threshold, metric=metric, **kwargs) 30 | self.model = sentence_transformers.SentenceTransformer(model_name) 31 | self.model.to(utils.device) 32 | 33 | def encode(self, sentences): 34 | return self.model.encode(sentences) 35 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/thought_vector.py: -------------------------------------------------------------------------------- 1 | """ 2 | Thought Vector Class 3 | --------------------- 4 | """ 5 | 6 | import functools 7 | 8 | import torch 9 | 10 | from textattack.shared import AbstractWordEmbedding, WordEmbedding, utils 11 | 12 | from .sentence_encoder import SentenceEncoder 13 | 14 | 15 | class ThoughtVector(SentenceEncoder): 16 | """A constraint on the distance between two sentences' thought vectors. 17 | 18 | Args: 19 | word_embedding (textattack.shared.AbstractWordEmbedding): The word embedding to use 20 | """ 21 | 22 | def __init__(self, embedding=None, **kwargs): 23 | if embedding is None: 24 | embedding = WordEmbedding.counterfitted_GLOVE_embedding() 25 | if not isinstance(embedding, AbstractWordEmbedding): 26 | raise ValueError( 27 | "`embedding` object must be of type `textattack.shared.AbstractWordEmbedding`." 28 | ) 29 | self.word_embedding = embedding 30 | super().__init__(**kwargs) 31 | 32 | def clear_cache(self): 33 | self._get_thought_vector.cache_clear() 34 | 35 | @functools.lru_cache(maxsize=2**10) 36 | def _get_thought_vector(self, text): 37 | """Sums the embeddings of all the words in ``text`` into a "thought 38 | vector".""" 39 | embeddings = [] 40 | for word in utils.words_from_text(text): 41 | embedding = self.word_embedding[word] 42 | if embedding is not None: # out-of-vocab words do not have embeddings 43 | embeddings.append(embedding) 44 | embeddings = torch.tensor(embeddings) 45 | return torch.mean(embeddings, dim=0) 46 | 47 | def encode(self, raw_text_list): 48 | return torch.stack([self._get_thought_vector(text) for text in raw_text_list]) 49 | 50 | def extra_repr_keys(self): 51 | """Set the extra representation of the constraint using these keys.""" 52 | return ["word_embedding"] + super().extra_repr_keys() 53 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Universal sentence encoder 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | """ 5 | 6 | from .universal_sentence_encoder import UniversalSentenceEncoder 7 | from .multilingual_universal_sentence_encoder import ( 8 | MultilingualUniversalSentenceEncoder, 9 | ) 10 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | multilingual universal sentence encoder 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | """ 5 | 6 | from textattack.constraints.semantics.sentence_encoders import SentenceEncoder 7 | from textattack.shared.utils import LazyLoader 8 | 9 | hub = LazyLoader("tensorflow_hub", globals(), "tensorflow_hub") 10 | tensorflow_text = LazyLoader("tensorflow_text", globals(), "tensorflow_text") 11 | 12 | 13 | class MultilingualUniversalSentenceEncoder(SentenceEncoder): 14 | """Constraint using similarity between sentence encodings of x and x_adv 15 | where the text embeddings are created using the Multilingual Universal 16 | Sentence Encoder.""" 17 | 18 | def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): 19 | super().__init__(threshold=threshold, metric=metric, **kwargs) 20 | tensorflow_text._load() 21 | if large: 22 | tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3" 23 | else: 24 | tfhub_url = ( 25 | "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" 26 | ) 27 | 28 | # TODO add QA SET. Details at: https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3 29 | self._tfhub_url = tfhub_url 30 | self.model = hub.load(tfhub_url) 31 | 32 | def encode(self, sentences): 33 | return self.model(sentences).numpy() 34 | 35 | def __getstate__(self): 36 | state = self.__dict__.copy() 37 | state["model"] = None 38 | return state 39 | 40 | def __setstate__(self, state): 41 | self.__dict__ = state 42 | self.model = hub.load(self._tfhub_url) 43 | -------------------------------------------------------------------------------- /textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | universal sentence encoder class 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | """ 5 | 6 | from textattack.constraints.semantics.sentence_encoders import SentenceEncoder 7 | from textattack.shared.utils import LazyLoader 8 | 9 | hub = LazyLoader("tensorflow_hub", globals(), "tensorflow_hub") 10 | 11 | 12 | class UniversalSentenceEncoder(SentenceEncoder): 13 | """Constraint using similarity between sentence encodings of x and x_adv 14 | where the text embeddings are created using the Universal Sentence 15 | Encoder.""" 16 | 17 | def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): 18 | super().__init__(threshold=threshold, metric=metric, **kwargs) 19 | if large: 20 | tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5" 21 | else: 22 | tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder/3" 23 | 24 | self._tfhub_url = tfhub_url 25 | # Lazily load the model 26 | self.model = None 27 | 28 | def encode(self, sentences): 29 | if not self.model: 30 | self.model = hub.load(self._tfhub_url) 31 | encoding = self.model(sentences) 32 | 33 | if isinstance(encoding, dict): 34 | encoding = encoding["outputs"] 35 | 36 | return encoding.numpy() 37 | 38 | def __getstate__(self): 39 | state = self.__dict__.copy() 40 | state["model"] = None 41 | return state 42 | 43 | def __setstate__(self, state): 44 | self.__dict__ = state 45 | self.model = None 46 | -------------------------------------------------------------------------------- /textattack/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | datasets package: 4 | ====================== 5 | 6 | TextAttack allows users to provide their own dataset or load from HuggingFace. 7 | 8 | 9 | """ 10 | 11 | from .dataset import Dataset 12 | from .huggingface_dataset import HuggingFaceDataset 13 | 14 | from . import helpers 15 | -------------------------------------------------------------------------------- /textattack/datasets/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Dataset Helpers 4 | --------------------------------------------------------------------- 5 | """ 6 | 7 | from .ted_multi import TedMultiTranslationDataset 8 | -------------------------------------------------------------------------------- /textattack/goal_function_results/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Goal Function Result package: 3 | ============================= 4 | 5 | Goal function results report the result of a goal function evaluation, indicating whether an attack succeeded for a given example. 6 | 7 | """ 8 | 9 | from .goal_function_result import GoalFunctionResult, GoalFunctionResultStatus 10 | 11 | from .classification_goal_function_result import ClassificationGoalFunctionResult 12 | from .text_to_text_goal_function_result import TextToTextGoalFunctionResult 13 | -------------------------------------------------------------------------------- /textattack/goal_function_results/text_to_text_goal_function_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | TextToTextGoalFunctionResult Class 4 | ==================================== 5 | 6 | text2text goal function Result 7 | 8 | """ 9 | 10 | from .goal_function_result import GoalFunctionResult 11 | 12 | 13 | class TextToTextGoalFunctionResult(GoalFunctionResult): 14 | """Represents the result of a text-to-text goal function.""" 15 | 16 | def __init__( 17 | self, 18 | attacked_text, 19 | raw_output, 20 | output, 21 | goal_status, 22 | score, 23 | num_queries, 24 | ground_truth_output, 25 | ): 26 | super().__init__( 27 | attacked_text, 28 | raw_output, 29 | output, 30 | goal_status, 31 | score, 32 | num_queries, 33 | ground_truth_output, 34 | goal_function_result_type="Text to Text", 35 | ) 36 | 37 | def get_text_color_input(self): 38 | """A string representing the color this result's changed portion should 39 | be if it represents the original input.""" 40 | return "red" 41 | 42 | def get_text_color_perturbed(self): 43 | """A string representing the color this result's changed portion should 44 | be if it represents the perturbed input.""" 45 | return "blue" 46 | 47 | def get_colored_output(self, color_method=None): 48 | """Returns a string representation of this result's output, colored 49 | according to `color_method`.""" 50 | return str(self.output) 51 | -------------------------------------------------------------------------------- /textattack/goal_functions/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _goal_functions: 2 | 3 | Goal Functions 4 | ================================================================== 5 | 6 | Goal Functions determine if an attack has been successful. 7 | """ 8 | 9 | from .goal_function import GoalFunction 10 | 11 | from .classification import * 12 | from .text import * 13 | -------------------------------------------------------------------------------- /textattack/goal_functions/classification/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Goal fucntion for Classification 4 | --------------------------------------------------------------------- 5 | 6 | """ 7 | 8 | from .input_reduction import InputReduction 9 | from .classification_goal_function import ClassificationGoalFunction 10 | from .untargeted_classification import UntargetedClassification 11 | from .targeted_classification import TargetedClassification 12 | -------------------------------------------------------------------------------- /textattack/goal_functions/classification/hardlabel_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | Determine if an attack has been successful in Hard Label Classficiation. 3 | ------------------------------------------------------------------------ 4 | """ 5 | 6 | from .classification_goal_function import ClassificationGoalFunction 7 | 8 | 9 | class HardLabelClassification(ClassificationGoalFunction): 10 | """An hard label attack on classification models which attempts to maximize 11 | the semantic similarity of the label such that the target is outside of the 12 | decision boundary. 13 | 14 | Args: 15 | target_max_score (float): If set, goal is to reduce model output to 16 | below this score. Otherwise, goal is to change the overall predicted 17 | class. 18 | """ 19 | 20 | def __init__(self, *args, target_max_score=None, **kwargs): 21 | self.target_max_score = target_max_score 22 | super().__init__(*args, **kwargs) 23 | 24 | def _is_goal_complete(self, model_output, _): 25 | if self.target_max_score: 26 | return model_output[self.ground_truth_output] < self.target_max_score 27 | elif (model_output.numel() == 1) and isinstance( 28 | self.ground_truth_output, float 29 | ): 30 | return abs(self.ground_truth_output - model_output.item()) >= 0.5 31 | else: 32 | return model_output.argmax() != self.ground_truth_output 33 | 34 | def _get_score(self, model_output, _): 35 | # If the model outputs a single number and the ground truth output is 36 | # a float, we assume that this is a regression task. 37 | if (model_output.numel() == 1) and isinstance(self.ground_truth_output, float): 38 | return max(model_output.item(), self.ground_truth_output) 39 | else: 40 | return 1 - model_output[self.ground_truth_output] 41 | -------------------------------------------------------------------------------- /textattack/goal_functions/classification/targeted_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Determine if an attack has been successful in targeted Classification 4 | ----------------------------------------------------------------------- 5 | """ 6 | 7 | from .classification_goal_function import ClassificationGoalFunction 8 | 9 | 10 | class TargetedClassification(ClassificationGoalFunction): 11 | """A targeted attack on classification models which attempts to maximize 12 | the score of the target label. 13 | 14 | Complete when the arget label is the predicted label. 15 | """ 16 | 17 | def __init__(self, *args, target_class=0, **kwargs): 18 | super().__init__(*args, **kwargs) 19 | self.target_class = target_class 20 | 21 | def _is_goal_complete(self, model_output, _): 22 | return ( 23 | self.target_class == model_output.argmax() 24 | ) or self.ground_truth_output == self.target_class 25 | 26 | def _get_score(self, model_output, _): 27 | if self.target_class < 0 or self.target_class >= len(model_output): 28 | raise ValueError( 29 | f"target class set to {self.target_class} with {len(model_output)} classes." 30 | ) 31 | else: 32 | return model_output[self.target_class] 33 | 34 | def extra_repr_keys(self): 35 | if self.maximizable: 36 | return ["maximizable", "target_class"] 37 | else: 38 | return ["target_class"] 39 | -------------------------------------------------------------------------------- /textattack/goal_functions/classification/untargeted_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Determine successful in untargeted Classification 4 | ---------------------------------------------------- 5 | """ 6 | 7 | from .classification_goal_function import ClassificationGoalFunction 8 | 9 | 10 | class UntargetedClassification(ClassificationGoalFunction): 11 | """An untargeted attack on classification models which attempts to minimize 12 | the score of the correct label until it is no longer the predicted label. 13 | 14 | Args: 15 | target_max_score (float): If set, goal is to reduce model output to 16 | below this score. Otherwise, goal is to change the overall predicted 17 | class. 18 | """ 19 | 20 | def __init__(self, *args, target_max_score=None, **kwargs): 21 | self.target_max_score = target_max_score 22 | super().__init__(*args, **kwargs) 23 | 24 | def _is_goal_complete(self, model_output, _): 25 | if self.target_max_score: 26 | return model_output[self.ground_truth_output] < self.target_max_score 27 | elif (model_output.numel() == 1) and isinstance( 28 | self.ground_truth_output, float 29 | ): 30 | return abs(self.ground_truth_output - model_output.item()) >= 0.5 31 | else: 32 | return model_output.argmax() != self.ground_truth_output 33 | 34 | def _get_score(self, model_output, _): 35 | # If the model outputs a single number and the ground truth output is 36 | # a float, we assume that this is a regression task. 37 | if (model_output.numel() == 1) and isinstance(self.ground_truth_output, float): 38 | return abs(model_output.item() - self.ground_truth_output) 39 | else: 40 | return 1 - model_output[self.ground_truth_output] 41 | -------------------------------------------------------------------------------- /textattack/goal_functions/text/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Goal Function for Text to Text case 4 | --------------------------------------------------------------------- 5 | 6 | """ 7 | 8 | from .minimize_bleu import MinimizeBleu 9 | from .non_overlapping_output import NonOverlappingOutput 10 | from .text_to_text_goal_function import TextToTextGoalFunction 11 | -------------------------------------------------------------------------------- /textattack/goal_functions/text/non_overlapping_output.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Goal Function for seq2sick 4 | ------------------------------------------------------- 5 | """ 6 | 7 | import functools 8 | 9 | import numpy as np 10 | 11 | from textattack.shared.utils import words_from_text 12 | 13 | from .text_to_text_goal_function import TextToTextGoalFunction 14 | 15 | 16 | class NonOverlappingOutput(TextToTextGoalFunction): 17 | """Ensures that none of the words at a position are equal. 18 | 19 | Defined in seq2sick (https://arxiv.org/pdf/1803.01128.pdf), equation 20 | (3). 21 | """ 22 | 23 | def clear_cache(self): 24 | if self.use_cache: 25 | self._call_model_cache.clear() 26 | get_words_cached.cache_clear() 27 | word_difference_score.cache_clear() 28 | 29 | def _is_goal_complete(self, model_output, _): 30 | return self._get_score(model_output, self.ground_truth_output) == 1.0 31 | 32 | def _get_score(self, model_output, _): 33 | num_words_diff = word_difference_score(model_output, self.ground_truth_output) 34 | if num_words_diff == 0: 35 | return 0.0 36 | else: 37 | return num_words_diff / len(get_words_cached(self.ground_truth_output)) 38 | 39 | 40 | @functools.lru_cache(maxsize=2**12) 41 | def get_words_cached(s): 42 | return np.array(words_from_text(s)) 43 | 44 | 45 | @functools.lru_cache(maxsize=2**12) 46 | def word_difference_score(s1, s2): 47 | """Returns the number of words that are non-overlapping between s1 and 48 | s2.""" 49 | s1_words = get_words_cached(s1) 50 | s2_words = get_words_cached(s2) 51 | min_length = min(len(s1_words), len(s2_words)) 52 | if min_length == 0: 53 | return 0 54 | s1_words = s1_words[:min_length] 55 | s2_words = s2_words[:min_length] 56 | return (s1_words != s2_words).sum() 57 | -------------------------------------------------------------------------------- /textattack/goal_functions/text/text_to_text_goal_function.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Goal Function for TextToText 4 | ------------------------------------------------------- 5 | """ 6 | 7 | import numpy as np 8 | 9 | from textattack.goal_function_results import TextToTextGoalFunctionResult 10 | from textattack.goal_functions import GoalFunction 11 | 12 | 13 | class TextToTextGoalFunction(GoalFunction): 14 | """A goal function defined on a model that outputs text. 15 | 16 | model: The PyTorch or TensorFlow model used for evaluation. 17 | original_output: the original output of the model 18 | """ 19 | 20 | def _goal_function_result_type(self): 21 | """Returns the class of this goal function's results.""" 22 | return TextToTextGoalFunctionResult 23 | 24 | def _process_model_outputs(self, _, outputs): 25 | """Processes and validates a list of model outputs.""" 26 | if isinstance(outputs, np.ndarray): 27 | return outputs.flatten() 28 | else: 29 | return outputs 30 | 31 | def _get_displayed_output(self, raw_output): 32 | return raw_output 33 | -------------------------------------------------------------------------------- /textattack/llms/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Large Language Models 3 | ====================== 4 | 5 | TextAttack can generate responses to prompts using LLMs, which take in a list of strings and outputs a list of responses. 6 | 7 | We've provided an implementation around two common LLM patterns: 8 | 9 | 1. `HuggingFaceLLMWrapper` for LLMs in HuggingFace 10 | 2. `ChatGptWrapper` for OpenAI's ChatGPT model 11 | 12 | 13 | """ 14 | 15 | from .chat_gpt_wrapper import ChatGptWrapper 16 | from .huggingface_llm_wrapper import HuggingFaceLLMWrapper 17 | -------------------------------------------------------------------------------- /textattack/llms/chat_gpt_wrapper.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from textattack.models.wrappers import ModelWrapper 4 | 5 | 6 | class ChatGptWrapper(ModelWrapper): 7 | """A wrapper around OpenAI's ChatGPT model. Note that you must provide your 8 | own API key to use this wrapper. 9 | 10 | Args: 11 | model_name (:obj:`str`): The name of the GPT model to use. See the OpenAI documentation 12 | for a list of latest model names 13 | key_environment_variable (:obj:`str`, 'optional`, defaults to :obj:`OPENAI_API_KEY`): 14 | The environment variable that the API key is set to 15 | """ 16 | 17 | def __init__( 18 | self, model_name="gpt-3.5-turbo", key_environment_variable="OPENAI_API_KEY" 19 | ): 20 | from openai import OpenAI 21 | 22 | self.model_name = model_name 23 | self.client = OpenAI(api_key=os.getenv(key_environment_variable)) 24 | 25 | def __call__(self, text_input_list): 26 | """Returns a list of responses to the given input list.""" 27 | if isinstance(text_input_list, str): 28 | text_input_list = [text_input_list] 29 | 30 | outputs = [] 31 | for text in text_input_list: 32 | completion = self.client.chat.completions.create( 33 | model=self.model_name, messages=[{"role": "user", "content": text}] 34 | ) 35 | outputs.append(completion.choices[0].message) 36 | 37 | return outputs 38 | -------------------------------------------------------------------------------- /textattack/llms/huggingface_llm_wrapper.py: -------------------------------------------------------------------------------- 1 | from textattack.models.wrappers import ModelWrapper 2 | 3 | 4 | class HuggingFaceLLMWrapper(ModelWrapper): 5 | """A wrapper around HuggingFace for LLMs. 6 | 7 | Args: 8 | model: A HuggingFace pretrained LLM 9 | tokenizer: A HuggingFace pretrained tokenizer 10 | """ 11 | 12 | def __init__(self, model, tokenizer): 13 | self.model = model 14 | self.tokenizer = tokenizer 15 | 16 | def __call__(self, text_input_list): 17 | """Returns a list of responses to the given input list.""" 18 | model_device = next(self.model.parameters()).device 19 | input_ids = self.tokenizer(text_input_list, return_tensors="pt").input_ids 20 | input_ids.to(model_device) 21 | 22 | outputs = self.model.generate( 23 | input_ids, max_new_tokens=512, pad_token_id=self.tokenizer.eos_token_id 24 | ) 25 | 26 | responses = self.tokenizer.batch_decode(outputs, skip_special_tokens=True) 27 | if len(text_input_list) == 1: 28 | return responses[0] 29 | return responses 30 | -------------------------------------------------------------------------------- /textattack/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _loggers: 2 | 3 | Misc Loggers: Loggers track, visualize, and export attack results. 4 | =================================================================== 5 | """ 6 | 7 | from .csv_logger import CSVLogger 8 | from .file_logger import FileLogger 9 | from .logger import Logger 10 | from .visdom_logger import VisdomLogger 11 | from .weights_and_biases_logger import WeightsAndBiasesLogger 12 | from .json_summary_logger import JsonSummaryLogger 13 | 14 | # AttackLogManager must be imported last, since it imports the other loggers. 15 | from .attack_log_manager import AttackLogManager 16 | -------------------------------------------------------------------------------- /textattack/loggers/json_summary_logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack Summary Results Logs to Json 3 | ======================== 4 | """ 5 | 6 | import json 7 | 8 | from textattack.shared import logger 9 | 10 | from .logger import Logger 11 | 12 | 13 | class JsonSummaryLogger(Logger): 14 | def __init__(self, filename="results_summary.json"): 15 | logger.info(f"Logging Summary to JSON at path {filename}") 16 | self.filename = filename 17 | self.json_dictionary = {} 18 | self._flushed = True 19 | 20 | def log_summary_rows(self, rows, title, window_id): 21 | self.json_dictionary[title] = {} 22 | for i in range(len(rows)): 23 | row = rows[i] 24 | if isinstance(row[1], str): 25 | try: 26 | row[1] = row[1].replace("%", "") 27 | row[1] = float(row[1]) 28 | except ValueError: 29 | raise ValueError( 30 | f'Unable to convert row value "{row[1]}" for Attack Result "{row[0]}" into float' 31 | ) 32 | 33 | for metric, summary in rows: 34 | self.json_dictionary[title][metric] = summary 35 | 36 | self._flushed = False 37 | 38 | def flush(self): 39 | with open(self.filename, "w") as f: 40 | json.dump(self.json_dictionary, f, indent=4) 41 | 42 | self._flushed = True 43 | 44 | def close(self): 45 | super().close() 46 | -------------------------------------------------------------------------------- /textattack/loggers/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack Logger Wrapper 3 | ======================== 4 | """ 5 | 6 | from abc import ABC 7 | 8 | 9 | class Logger(ABC): 10 | """An abstract class for different methods of logging attack results.""" 11 | 12 | def __init__(self): 13 | pass 14 | 15 | def log_attack_result(self, result, examples_completed=None): 16 | pass 17 | 18 | def log_summary_rows(self, rows, title, window_id): 19 | pass 20 | 21 | def log_hist(self, arr, numbins, title, window_id): 22 | pass 23 | 24 | def log_sep(self): 25 | pass 26 | 27 | def flush(self): 28 | pass 29 | 30 | def close(self): 31 | pass 32 | -------------------------------------------------------------------------------- /textattack/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _metrics: 2 | 3 | metrics package: to calculate advanced metrics for evaluting attacks and augmented text 4 | ======================================================================================== 5 | """ 6 | 7 | from .metric import Metric 8 | 9 | from .attack_metrics import AttackSuccessRate 10 | from .attack_metrics import WordsPerturbed 11 | from .attack_metrics import AttackQueries 12 | 13 | from .quality_metrics import Perplexity 14 | from .quality_metrics import USEMetric 15 | from .quality_metrics import SBERTMetric 16 | from .quality_metrics import BERTScoreMetric 17 | from .quality_metrics import MeteorMetric 18 | -------------------------------------------------------------------------------- /textattack/metrics/attack_metrics/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | attack_metrics package: 4 | --------------------------------------------------------------------- 5 | 6 | TextAttack provide users common metrics on attacks' quality. 7 | 8 | """ 9 | 10 | from .attack_queries import AttackQueries 11 | from .attack_success_rate import AttackSuccessRate 12 | from .words_perturbed import WordsPerturbed 13 | -------------------------------------------------------------------------------- /textattack/metrics/attack_metrics/attack_queries.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Metrics on AttackQueries 4 | --------------------------------------------------------------------- 5 | 6 | """ 7 | 8 | import numpy as np 9 | 10 | from textattack.attack_results import SkippedAttackResult 11 | from textattack.metrics import Metric 12 | 13 | 14 | class AttackQueries(Metric): 15 | def __init__(self): 16 | self.all_metrics = {} 17 | 18 | def calculate(self, results): 19 | """Calculates all metrics related to number of queries in an attack. 20 | 21 | Args: 22 | results (``AttackResult`` objects): 23 | Attack results for each instance in dataset 24 | """ 25 | 26 | self.results = results 27 | self.num_queries = np.array( 28 | [ 29 | r.num_queries 30 | for r in self.results 31 | if not isinstance(r, SkippedAttackResult) 32 | ] 33 | ) 34 | self.all_metrics["avg_num_queries"] = self.avg_num_queries() 35 | 36 | return self.all_metrics 37 | 38 | def avg_num_queries(self): 39 | avg_num_queries = self.num_queries.mean() 40 | avg_num_queries = round(avg_num_queries, 2) 41 | return avg_num_queries 42 | -------------------------------------------------------------------------------- /textattack/metrics/metric.py: -------------------------------------------------------------------------------- 1 | """ 2 | Metric Class 3 | ======================== 4 | 5 | """ 6 | 7 | from abc import ABC, abstractmethod 8 | 9 | 10 | class Metric(ABC): 11 | """A metric for evaluating results and data quality.""" 12 | 13 | @abstractmethod 14 | def __init__(self, **kwargs): 15 | """Creates pre-built :class:`~textattack.Metric` that correspond to 16 | evaluation metrics for adversarial examples.""" 17 | raise NotImplementedError() 18 | 19 | @abstractmethod 20 | def calculate(self, results): 21 | """Abstract function for computing any values which are to be calculated as a whole during initialization 22 | Args: 23 | results (``AttackResult`` objects): 24 | Attack results for each instance in dataset 25 | """ 26 | 27 | raise NotImplementedError 28 | -------------------------------------------------------------------------------- /textattack/metrics/quality_metrics/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Metrics on Quality package 4 | --------------------------------------------------------------------- 5 | 6 | TextAttack provide users common metrics on text examples' quality. 7 | 8 | 9 | """ 10 | 11 | from .perplexity import Perplexity 12 | from .use import USEMetric 13 | from .sentence_bert import SBERTMetric 14 | from .bert_score import BERTScoreMetric 15 | from .meteor_score import MeteorMetric 16 | -------------------------------------------------------------------------------- /textattack/metrics/recipe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attack Metric Quality Recipes: 3 | ============================== 4 | 5 | """ 6 | 7 | from textattack.metrics.quality_metrics.bert_score import BERTScoreMetric 8 | from textattack.metrics.quality_metrics.meteor_score import MeteorMetric 9 | from textattack.metrics.quality_metrics.perplexity import Perplexity 10 | from textattack.metrics.quality_metrics.sentence_bert import SBERTMetric 11 | from textattack.metrics.quality_metrics.use import USEMetric 12 | 13 | from .metric import Metric 14 | 15 | 16 | class AdvancedAttackMetric(Metric): 17 | """Calculate a suite of advanced metrics to evaluate attackResults' 18 | quality.""" 19 | 20 | def __init__(self, choices=["use"]): 21 | self.achoices = choices 22 | 23 | def calculate(self, results): 24 | advanced_metrics = {} 25 | if "use" in self.achoices: 26 | advanced_metrics.update(USEMetric().calculate(results)) 27 | if "perplexity" in self.achoices: 28 | advanced_metrics.update(Perplexity().calculate(results)) 29 | if "bert_score" in self.achoices: 30 | advanced_metrics.update(BERTScoreMetric().calculate(results)) 31 | if "meteor_score" in self.achoices: 32 | advanced_metrics.update(MeteorMetric().calculate(results)) 33 | if "sbert_score" in self.achoices: 34 | advanced_metrics.update(SBERTMetric().calculate(results)) 35 | return advanced_metrics 36 | -------------------------------------------------------------------------------- /textattack/models/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _models: 2 | 3 | Models 4 | ========= 5 | 6 | TextAttack can attack any model that takes a list of strings as input and outputs a list of predictions. This is the idea behind *model wrappers*: to help your model conform to this API, we've provided the ``textattack.models.wrappers.ModelWrapper`` abstract class. 7 | 8 | We've also provided implementations of model wrappers for common patterns in some popular machine learning frameworks: 9 | 10 | 11 | Models User-specified 12 | -------------------------- 13 | 14 | TextAttack allows users to provide their own models for testing. Models can be loaded in three ways: 15 | 16 | 1. ``--model`` for pre-trained models and models trained with TextAttack 17 | 2. ``--model-from-huggingface`` which will attempt to load any model from the ``HuggingFace model hub `` 18 | 3. ``--model-from-file`` which will dynamically load a Python file and look for the ``model`` variable 19 | 20 | 21 | 22 | Models Pre-trained 23 | -------------------------- 24 | 25 | TextAttack also provides lots of pre-trained models for common tasks. Testing different attacks on the same model ensures attack comparisons are fair. 26 | 27 | Any of these models can be provided to ``textattack attack`` via ``--model``, for example, ``--model bert-base-uncased-mr``. For a full list of pre-trained models, see the `pre-trained models README `_. 28 | 29 | 30 | Model Wrappers 31 | -------------------------- 32 | TextAttack can attack any model that takes a list of strings as input and outputs a list of predictions. This is the idea behind *model wrappers*: to help your model conform to this API, we've provided the ``textattack.models.wrappers.ModelWrapper`` abstract class. 33 | 34 | 35 | We've also provided implementations of model wrappers for common patterns in some popular machine learning frameworks: including pytorch / sklearn / tensorflow. 36 | """ 37 | 38 | from . import helpers 39 | from . import tokenizers 40 | from . import wrappers 41 | -------------------------------------------------------------------------------- /textattack/models/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Moderl Helpers 3 | ------------------ 4 | """ 5 | 6 | # Helper stuff, like embeddings. 7 | from . import utils 8 | from .glove_embedding_layer import GloveEmbeddingLayer 9 | 10 | # Helper modules. 11 | from .lstm_for_classification import LSTMForClassification 12 | from .t5_for_text_to_text import T5ForTextToText 13 | from .word_cnn_for_classification import WordCNNForClassification 14 | -------------------------------------------------------------------------------- /textattack/models/helpers/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Util function for Model Wrapper 3 | --------------------------------------------------------------------- 4 | 5 | """ 6 | 7 | import glob 8 | import os 9 | 10 | import torch 11 | 12 | 13 | def load_cached_state_dict(model_folder_path): 14 | # Take the first model matching the pattern *model.bin. 15 | model_path_list = glob.glob(os.path.join(model_folder_path, "*model.bin")) 16 | if not model_path_list: 17 | raise FileNotFoundError( 18 | f"model.bin not found in model folder {model_folder_path}." 19 | ) 20 | model_path = model_path_list[0] 21 | state_dict = torch.load(model_path, map_location=torch.device("cpu")) 22 | return state_dict 23 | -------------------------------------------------------------------------------- /textattack/models/tokenizers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tokenizers for Model Wrapper 3 | ------------------------------- 4 | """ 5 | 6 | from .glove_tokenizer import GloveTokenizer 7 | from .t5_tokenizer import T5Tokenizer 8 | -------------------------------------------------------------------------------- /textattack/models/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model Wrappers Package 3 | -------------------------- 4 | TextAttack can attack any model that takes a list of strings as input and outputs a list of predictions. This is the idea behind *model wrappers*: to help your model conform to this API, we've provided the ``textattack.models.wrappers.ModelWrapper`` abstract class. 5 | 6 | 7 | We've also provided implementations of model wrappers for common patterns in some popular machine learning frameworks: 8 | 9 | """ 10 | 11 | from .model_wrapper import ModelWrapper 12 | 13 | from .huggingface_model_wrapper import HuggingFaceModelWrapper 14 | from .pytorch_model_wrapper import PyTorchModelWrapper 15 | from .sklearn_model_wrapper import SklearnModelWrapper 16 | from .tensorflow_model_wrapper import TensorFlowModelWrapper 17 | -------------------------------------------------------------------------------- /textattack/models/wrappers/model_wrapper.py: -------------------------------------------------------------------------------- 1 | """ 2 | ModelWrapper class 3 | -------------------------- 4 | 5 | """ 6 | 7 | from abc import ABC, abstractmethod 8 | 9 | 10 | class ModelWrapper(ABC): 11 | """A model wrapper queries a model with a list of text inputs. 12 | 13 | Classification-based models return a list of lists, where each 14 | sublist represents the model's scores for a given input. 15 | 16 | Text-to-text models return a list of strings, where each string is 17 | the output – like a translation or summarization – for a given 18 | input. 19 | """ 20 | 21 | @abstractmethod 22 | def __call__(self, text_input_list, **kwargs): 23 | raise NotImplementedError() 24 | 25 | def get_grad(self, text_input): 26 | """Get gradient of loss with respect to input tokens.""" 27 | raise NotImplementedError() 28 | 29 | def _tokenize(self, inputs): 30 | """Helper method for `tokenize`""" 31 | raise NotImplementedError() 32 | 33 | def tokenize(self, inputs, strip_prefix=False): 34 | """Helper method that tokenizes input strings 35 | Args: 36 | inputs (list[str]): list of input strings 37 | strip_prefix (bool): If `True`, we strip auxiliary characters added to tokens as prefixes (e.g. "##" for BERT, "Ġ" for RoBERTa) 38 | Returns: 39 | tokens (list[list[str]]): List of list of tokens as strings 40 | """ 41 | tokens = self._tokenize(inputs) 42 | if strip_prefix: 43 | # `aux_chars` are known auxiliary characters that are added to tokens 44 | strip_chars = ["##", "Ġ", "__"] 45 | # TODO: Find a better way to identify prefixes. These depend on the model, so cannot be resolved in ModelWrapper. 46 | 47 | def strip(s, chars): 48 | for c in chars: 49 | s = s.replace(c, "") 50 | return s 51 | 52 | tokens = [[strip(t, strip_chars) for t in x] for x in tokens] 53 | 54 | return tokens 55 | -------------------------------------------------------------------------------- /textattack/models/wrappers/sklearn_model_wrapper.py: -------------------------------------------------------------------------------- 1 | """ 2 | scikit-learn Model Wrapper 3 | -------------------------- 4 | """ 5 | 6 | import pandas as pd 7 | 8 | from .model_wrapper import ModelWrapper 9 | 10 | 11 | class SklearnModelWrapper(ModelWrapper): 12 | """Loads a scikit-learn model and tokenizer (tokenizer implements 13 | `transform` and model implements `predict_proba`). 14 | 15 | May need to be extended and modified for different types of 16 | tokenizers. 17 | """ 18 | 19 | def __init__(self, model, tokenizer): 20 | self.model = model 21 | self.tokenizer = tokenizer 22 | 23 | def __call__(self, text_input_list, batch_size=None): 24 | encoded_text_matrix = self.tokenizer.transform(text_input_list).toarray() 25 | tokenized_text_df = pd.DataFrame( 26 | encoded_text_matrix, columns=self.tokenizer.get_feature_names() 27 | ) 28 | return self.model.predict_proba(tokenized_text_df) 29 | 30 | def get_grad(self, text_input): 31 | raise NotImplementedError() 32 | -------------------------------------------------------------------------------- /textattack/models/wrappers/tensorflow_model_wrapper.py: -------------------------------------------------------------------------------- 1 | """ 2 | TensorFlow Model Wrapper 3 | -------------------------- 4 | """ 5 | 6 | import numpy as np 7 | 8 | from .model_wrapper import ModelWrapper 9 | 10 | 11 | class TensorFlowModelWrapper(ModelWrapper): 12 | """Loads a TensorFlow model and tokenizer. 13 | 14 | TensorFlow models can use many different architectures and 15 | tokenization strategies. This assumes that the model takes an 16 | np.array of strings as input and returns a tf.Tensor of outputs, as 17 | is typical with Keras modules. You may need to subclass this for 18 | models that have dedicated tokenizers or otherwise take input 19 | differently. 20 | """ 21 | 22 | def __init__(self, model): 23 | self.model = model 24 | 25 | def __call__(self, text_input_list, **kwargs): 26 | text_array = np.array(text_input_list) 27 | preds = self.model(text_array) 28 | return preds.numpy() 29 | 30 | def get_grad(self, text_input): 31 | raise NotImplementedError() 32 | -------------------------------------------------------------------------------- /textattack/prompt_augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prompt Augmentation 3 | ===================== 4 | 5 | This package includes functions used to augment a prompt for a LLM 6 | 7 | """ 8 | 9 | from .prompt_augmentation_pipeline import PromptAugmentationPipeline 10 | -------------------------------------------------------------------------------- /textattack/prompt_augmentation/prompt_augmentation_pipeline.py: -------------------------------------------------------------------------------- 1 | from textattack.constraints import PreTransformationConstraint 2 | 3 | 4 | class PromptAugmentationPipeline: 5 | """A prompt augmentation pipeline to augment a prompt and obtain the 6 | responses from a LLM on the augmented prompts. 7 | 8 | Args: 9 | augmenter (textattack.Augmenter): the augmenter to use to 10 | augment the prompt 11 | llm (textattack.ModelWrapper): the LLM to generate responses 12 | to the augmented data 13 | """ 14 | 15 | def __init__(self, augmenter, llm): 16 | self.augmenter = augmenter 17 | self.llm = llm 18 | 19 | def __call__(self, prompt, prompt_constraints=[]): 20 | """Augments the given prompt using the augmenter and generates 21 | responses using the LLM. 22 | 23 | Args: 24 | prompt (:obj:`str`): the prompt to augment and generate responses 25 | prompt_constraints (List(textattack.constraints.PreTransformationConstraint)): a list of pretransformation 26 | constraints to apply to the given prompt 27 | 28 | Returns a list of tuples of strings, where the first string in the pair is the augmented prompt and the second 29 | is the response to the augmented prompt from the LLM 30 | """ 31 | for constraint in prompt_constraints: 32 | if isinstance(constraint, PreTransformationConstraint): 33 | self.augmenter.pre_transformation_constraints.append(constraint) 34 | else: 35 | raise ValueError( 36 | "Prompt constraints must be of type PreTransformationConstraint" 37 | ) 38 | 39 | augmented_prompts = self.augmenter.augment(prompt) 40 | for _ in range(len(prompt_constraints)): 41 | self.augmenter.pre_transformation_constraints.pop() 42 | 43 | outputs = [] 44 | for augmented_prompt in augmented_prompts: 45 | outputs.append((augmented_prompt, self.llm(augmented_prompt))) 46 | return outputs 47 | -------------------------------------------------------------------------------- /textattack/search_methods/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _search_methods: 2 | 3 | Search Methods 4 | ======================== 5 | 6 | Search methods explore the transformation space in an attempt to find a successful attack as determined by a :ref:`Goal Functions ` and list of :ref:`Constraints ` 7 | """ 8 | 9 | from .search_method import SearchMethod 10 | from .beam_search import BeamSearch 11 | from .greedy_search import GreedySearch 12 | from .greedy_word_swap_wir import GreedyWordSwapWIR 13 | from .population_based_search import PopulationBasedSearch, PopulationMember 14 | from .genetic_algorithm import GeneticAlgorithm 15 | from .alzantot_genetic_algorithm import AlzantotGeneticAlgorithm 16 | from .improved_genetic_algorithm import ImprovedGeneticAlgorithm 17 | from .particle_swarm_optimization import ParticleSwarmOptimization 18 | -------------------------------------------------------------------------------- /textattack/search_methods/greedy_search.py: -------------------------------------------------------------------------------- 1 | """ 2 | Greedy Search 3 | ================= 4 | """ 5 | 6 | from .beam_search import BeamSearch 7 | 8 | 9 | class GreedySearch(BeamSearch): 10 | """A search method that greedily chooses from a list of possible 11 | perturbations. 12 | 13 | Implemented by calling ``BeamSearch`` with beam_width set to 1. 14 | """ 15 | 16 | def __init__(self): 17 | super().__init__(beam_width=1) 18 | 19 | def extra_repr_keys(self): 20 | return [] 21 | -------------------------------------------------------------------------------- /textattack/shared/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Shared TextAttack Functions 3 | ============================= 4 | 5 | This package includes functions shared across packages. 6 | 7 | """ 8 | 9 | from . import data 10 | from . import utils 11 | from .utils import logger 12 | from . import validators 13 | 14 | from .attacked_text import AttackedText 15 | from .word_embeddings import AbstractWordEmbedding, WordEmbedding, GensimWordEmbedding 16 | from .checkpoint import AttackCheckpoint 17 | -------------------------------------------------------------------------------- /textattack/shared/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .install import * 2 | from .misc import * 3 | from .strings import * 4 | from .tensor import * 5 | from .importing import * 6 | -------------------------------------------------------------------------------- /textattack/shared/utils/tensor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def batch_model_predict(model_predict, inputs, batch_size=32): 6 | """Runs prediction on iterable ``inputs`` using batch size ``batch_size``. 7 | 8 | Aggregates all predictions into an ``np.ndarray``. 9 | """ 10 | outputs = [] 11 | i = 0 12 | while i < len(inputs): 13 | batch = inputs[i : i + batch_size] 14 | batch_preds = model_predict(batch) 15 | 16 | # Some seq-to-seq models will return a single string as a prediction 17 | # for a single-string list. Wrap these in a list. 18 | if isinstance(batch_preds, str): 19 | batch_preds = [batch_preds] 20 | 21 | # Get PyTorch tensors off of other devices. 22 | if isinstance(batch_preds, torch.Tensor): 23 | batch_preds = batch_preds.cpu() 24 | 25 | # Cast all predictions iterables to ``np.ndarray`` types. 26 | if not isinstance(batch_preds, np.ndarray): 27 | batch_preds = np.array(batch_preds) 28 | outputs.append(batch_preds) 29 | i += batch_size 30 | 31 | return np.concatenate(outputs, axis=0) 32 | -------------------------------------------------------------------------------- /textattack/transformations/__init__.py: -------------------------------------------------------------------------------- 1 | """.. _transformations: 2 | 3 | Transformations 4 | ========================== 5 | 6 | A transformation is a method which perturbs a text input through the insertion, deletion and substiution of words, characters, and phrases. All transformations take a ``TokenizedText`` as input and return a list of ``TokenizedText`` that contains possible transformations. Every transformation is a subclass of the abstract ``Transformation`` class. 7 | """ 8 | 9 | from .transformation import Transformation 10 | 11 | from .sentence_transformations import * 12 | from .word_swaps import * 13 | from .word_insertions import * 14 | from .word_merges import * 15 | 16 | from .composite_transformation import CompositeTransformation 17 | from .word_deletion import WordDeletion 18 | from .word_innerswap_random import WordInnerSwapRandom 19 | -------------------------------------------------------------------------------- /textattack/transformations/sentence_transformations/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | sentence_transformations package 3 | ----------------------------------- 4 | 5 | """ 6 | 7 | from .sentence_transformation import SentenceTransformation 8 | from .back_translation import BackTranslation 9 | from .back_transcription import BackTranscription 10 | -------------------------------------------------------------------------------- /textattack/transformations/sentence_transformations/sentence_transformation.py: -------------------------------------------------------------------------------- 1 | """ 2 | SentenceTransformation class 3 | ----------------------------------- 4 | 5 | https://github.com/makcedward/nlpaug 6 | 7 | """ 8 | 9 | from textattack.transformations import Transformation 10 | 11 | 12 | class SentenceTransformation(Transformation): 13 | def _get_transformations(self, current_text, indices_to_modify): 14 | raise NotImplementedError() 15 | -------------------------------------------------------------------------------- /textattack/transformations/word_deletion.py: -------------------------------------------------------------------------------- 1 | """ 2 | word deletion Transformation 3 | ============================================ 4 | 5 | """ 6 | 7 | from .transformation import Transformation 8 | 9 | 10 | class WordDeletion(Transformation): 11 | """An abstract class that takes a sentence and transforms it by deleting a 12 | single word. 13 | 14 | letters_to_insert (string): letters allowed for insertion into words 15 | """ 16 | 17 | def _get_transformations(self, current_text, indices_to_modify): 18 | # words = current_text.words 19 | transformed_texts = [] 20 | if len(current_text.words) > 1: 21 | for i in indices_to_modify: 22 | transformed_texts.append(current_text.delete_word_at_index(i)) 23 | return transformed_texts 24 | -------------------------------------------------------------------------------- /textattack/transformations/word_innerswap_random.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap Transformation by swapping the order of words 3 | ========================================================== 4 | """ 5 | 6 | import random 7 | 8 | from textattack.transformations import Transformation 9 | 10 | 11 | class WordInnerSwapRandom(Transformation): 12 | """Transformation that randomly swaps the order of words in a sequence.""" 13 | 14 | def _get_transformations(self, current_text, indices_to_modify): 15 | transformed_texts = [] 16 | words = current_text.words 17 | for idx in indices_to_modify: 18 | word = words[idx] 19 | swap_idxs = list(set(range(len(words))) - {idx}) 20 | if swap_idxs: 21 | swap_idx = random.choice(swap_idxs) 22 | swapped_text = current_text.replace_word_at_index( 23 | idx, words[swap_idx] 24 | ).replace_word_at_index(swap_idx, word) 25 | transformed_texts.append(swapped_text) 26 | return transformed_texts 27 | 28 | @property 29 | def deterministic(self): 30 | return False 31 | -------------------------------------------------------------------------------- /textattack/transformations/word_insertions/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | word_insertions package 3 | ------------------------------- 4 | 5 | """ 6 | 7 | from .word_insertion import WordInsertion 8 | from .word_insertion_random_synonym import WordInsertionRandomSynonym 9 | from .word_insertion_masked_lm import WordInsertionMaskedLM 10 | -------------------------------------------------------------------------------- /textattack/transformations/word_insertions/word_insertion.py: -------------------------------------------------------------------------------- 1 | """ 2 | WordInsertion Class 3 | ------------------------------- 4 | Word Insertion transformations act by inserting a new word at a specific word index. 5 | For example, if we insert "new" in position 3 in the text "I like the movie", we get "I like the new movie". 6 | Subclasses can implement the abstract ``WordInsertion`` class by overriding ``self._get_new_words``. 7 | """ 8 | 9 | from textattack.transformations import Transformation 10 | 11 | 12 | class WordInsertion(Transformation): 13 | """A base class for word insertions.""" 14 | 15 | def _get_new_words(self, current_text, index): 16 | """Returns a set of new words we can insert at position `index` of `current_text` 17 | Args: 18 | current_text (AttackedText): Current text to modify. 19 | index (int): Position in which to insert a new word 20 | Returns: 21 | list[str]: List of new words to insert. 22 | """ 23 | raise NotImplementedError() 24 | 25 | def _get_transformations(self, current_text, indices_to_modify): 26 | """ 27 | Return a set of transformed texts obtained by insertion a new word in `indices_to_modify` 28 | Args: 29 | current_text (AttackedText): Current text to modify. 30 | indices_to_modify (list[int]): List of positions in which to insert a new word. 31 | 32 | Returns: 33 | list[AttackedText]: List of transformed texts 34 | """ 35 | transformed_texts = [] 36 | 37 | for i in indices_to_modify: 38 | new_words = self._get_new_words(current_text, i) 39 | 40 | new_transformted_texts = [] 41 | for w in new_words: 42 | new_transformted_texts.append( 43 | current_text.insert_text_before_word_index(i, w) 44 | ) 45 | transformed_texts.extend(new_transformted_texts) 46 | 47 | return transformed_texts 48 | -------------------------------------------------------------------------------- /textattack/transformations/word_insertions/word_insertion_random_synonym.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | WordInsertionRandomSynonym Class 4 | ------------------------------------ 5 | random synonym insertation Transformation 6 | """ 7 | 8 | import random 9 | 10 | from nltk.corpus import wordnet 11 | 12 | from .word_insertion import WordInsertion 13 | 14 | 15 | class WordInsertionRandomSynonym(WordInsertion): 16 | """Transformation that inserts synonyms of words that are already in the 17 | sequence.""" 18 | 19 | def _get_synonyms(self, word): 20 | synonyms = set() 21 | for syn in wordnet.synsets(word): 22 | for lemma in syn.lemmas(): 23 | if lemma.name() != word and check_if_one_word(lemma.name()): 24 | synonyms.add(lemma.name()) 25 | return list(synonyms) 26 | 27 | def _get_transformations(self, current_text, indices_to_modify): 28 | transformed_texts = [] 29 | for idx in indices_to_modify: 30 | synonyms = [] 31 | # try to find a word with synonyms, and deal with edge case where there aren't any 32 | for attempt in range(7): 33 | synonyms = self._get_synonyms(random.choice(current_text.words)) 34 | if synonyms: 35 | break 36 | elif attempt == 6: 37 | return [current_text] 38 | random_synonym = random.choice(synonyms) 39 | transformed_texts.append( 40 | current_text.insert_text_before_word_index(idx, random_synonym) 41 | ) 42 | return transformed_texts 43 | 44 | @property 45 | def deterministic(self): 46 | return False 47 | 48 | 49 | def check_if_one_word(word): 50 | for c in word: 51 | if not c.isalpha(): 52 | return False 53 | return True 54 | -------------------------------------------------------------------------------- /textattack/transformations/word_merges/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | word_merges package 3 | ------------------------------------------------ 4 | 5 | """ 6 | 7 | from .word_merge import WordMerge 8 | from .word_merge_masked_lm import WordMergeMaskedLM 9 | -------------------------------------------------------------------------------- /textattack/transformations/word_swaps/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | word_swaps package 3 | ------------------------------- 4 | 5 | """ 6 | 7 | from .word_swap import WordSwap 8 | 9 | # Black box transformations 10 | from .chn_transformations import * 11 | from .word_swap_embedding import WordSwapEmbedding 12 | from .word_swap_hownet import WordSwapHowNet 13 | from .word_swap_homoglyph_swap import WordSwapHomoglyphSwap 14 | from .word_swap_inflections import WordSwapInflections 15 | from .word_swap_neighboring_character_swap import WordSwapNeighboringCharacterSwap 16 | from .word_swap_random_character_deletion import WordSwapRandomCharacterDeletion 17 | from .word_swap_random_character_insertion import WordSwapRandomCharacterInsertion 18 | from .word_swap_random_character_substitution import WordSwapRandomCharacterSubstitution 19 | from .word_swap_wordnet import WordSwapWordNet 20 | from .word_swap_masked_lm import WordSwapMaskedLM 21 | from .word_swap_qwerty import WordSwapQWERTY 22 | from .word_swap_contract import WordSwapContract 23 | from .word_swap_extend import WordSwapExtend 24 | from .word_swap_change_number import WordSwapChangeNumber 25 | from .word_swap_change_location import WordSwapChangeLocation 26 | from .word_swap_change_name import WordSwapChangeName 27 | 28 | # White box transformation 29 | from .word_swap_gradient_based import WordSwapGradientBased 30 | -------------------------------------------------------------------------------- /textattack/transformations/word_swaps/chn_transformations/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | chinese_transformations package 3 | ----------------------------------- 4 | 5 | """ 6 | 7 | from textattack.transformations.word_swaps.word_swap import WordSwap 8 | from .chinese_homophone_character_swap import ChineseHomophoneCharacterSwap 9 | from .chinese_morphonym_character_swap import ChineseMorphonymCharacterSwap 10 | from .chinese_word_swap_masked import ChineseWordSwapMaskedLM 11 | from .chinese_word_swap_hownet import ChineseWordSwapHowNet 12 | -------------------------------------------------------------------------------- /textattack/transformations/word_swaps/chn_transformations/chinese_morphonym_character_swap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by chinese morphonym 3 | ------------------------------------- 4 | """ 5 | 6 | from textattack.shared.data import MORPHONYM_LS 7 | 8 | from . import WordSwap 9 | 10 | 11 | class ChineseMorphonymCharacterSwap(WordSwap): 12 | """Transforms an input by replacing its words with synonyms provided by a 13 | morphonym dictionary.""" 14 | 15 | def __init__(self, **kwargs): 16 | super().__init__(**kwargs) 17 | 18 | def _get_replacement_words(self, word): 19 | """Returns a list containing all possible words with 1 character 20 | replaced by a morphonym.""" 21 | word = list(word) 22 | candidate_words = set() 23 | for i in range(len(word)): 24 | character = word[i] 25 | for char_morpho_ls in MORPHONYM_LS: 26 | if character in char_morpho_ls: 27 | for new_char in char_morpho_ls: 28 | temp_word = word 29 | temp_word[i] = new_char 30 | candidate_words.add("".join(temp_word)) 31 | return list(candidate_words) 32 | -------------------------------------------------------------------------------- /textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_hownet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by chinese hownet 3 | ------------------------------------- 4 | """ 5 | 6 | import OpenHowNet 7 | 8 | from . import WordSwap 9 | 10 | 11 | class ChineseWordSwapHowNet(WordSwap): 12 | """Transforms an input by replacing its words with synonyms provided by 13 | OpenHownet http://nlp.csai.tsinghua.edu.cn/.""" 14 | 15 | def __init__(self, topk=5): 16 | self.hownet_dict = OpenHowNet.HowNetDict(init_sim=True) 17 | self.topk = topk 18 | self.wordCache = {} 19 | 20 | def _get_replacement_words(self, word): 21 | """Returns a list containing all possible words with N characters 22 | replaced by a homoglyph.""" 23 | if word in self.wordCache: 24 | return self.wordCache[word] 25 | results = self.hownet_dict.get_nearest_words(word, language="zh", K=self.topk) 26 | synonyms = [] 27 | if results: 28 | for key, value in results.items(): 29 | synonyms = synonyms + value[1:] 30 | self.wordCache[word] = synonyms.copy() 31 | break 32 | return synonyms 33 | else: 34 | return [] 35 | -------------------------------------------------------------------------------- /textattack/transformations/word_swaps/word_swap_contract.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by Contraction 3 | ------------------------------- 4 | """ 5 | 6 | from textattack.shared.data import EXTENSION_MAP 7 | 8 | from .word_swap import WordSwap 9 | 10 | 11 | class WordSwapContract(WordSwap): 12 | """Transforms an input by performing contraction on recognized 13 | combinations.""" 14 | 15 | reverse_contraction_map = {v: k for k, v in EXTENSION_MAP.items()} 16 | 17 | def _get_transformations(self, current_text, indices_to_modify): 18 | """Return all possible transformed sentences, each with one 19 | contraction. 20 | 21 | >>> from textattack.transformations import WordSwapContract 22 | >>> from textattack.augmentation import Augmenter 23 | 24 | >>> transformation = WordSwapContract() 25 | >>> augmenter = Augmenter(transformation=transformation) 26 | >>> s = 'I am 12 years old.' 27 | >>> augmenter.augment(s) 28 | """ 29 | transformed_texts = [] 30 | 31 | words = current_text.words 32 | indices_to_modify = sorted(indices_to_modify) 33 | 34 | # search for every 2-words combination in reverse_contraction_map 35 | for idx, word_idx in enumerate(indices_to_modify[:-1]): 36 | next_idx = indices_to_modify[idx + 1] 37 | if (idx + 1) != next_idx: 38 | continue 39 | word = words[word_idx] 40 | next_word = words[next_idx] 41 | 42 | # generating the words to search for 43 | key = " ".join([word, next_word]) 44 | 45 | # when a possible contraction is found in map, contract the current text 46 | if key in self.reverse_contraction_map: 47 | transformed_text = current_text.replace_word_at_index( 48 | idx, self.reverse_contraction_map[key] 49 | ) 50 | transformed_text = transformed_text.delete_word_at_index(next_idx) 51 | transformed_texts.append(transformed_text) 52 | 53 | return transformed_texts 54 | -------------------------------------------------------------------------------- /textattack/transformations/word_swaps/word_swap_extend.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by Extension 3 | ------------------------------- 4 | """ 5 | 6 | from textattack.shared.data import EXTENSION_MAP 7 | 8 | from .word_swap import WordSwap 9 | 10 | 11 | class WordSwapExtend(WordSwap): 12 | """Transforms an input by performing extension on recognized 13 | combinations.""" 14 | 15 | def _get_transformations(self, current_text, indices_to_modify): 16 | """Return all possible transformed sentences, each with one extension. 17 | 18 | >>> from textattack.transformations import WordSwapExtend 19 | >>> from textattack.augmentation import Augmenter 20 | 21 | >>> transformation = WordSwapExtend() 22 | >>> augmenter = Augmenter(transformation=transformation) 23 | >>> s = '''I'm fabulous''' 24 | >>> augmenter.augment(s) 25 | """ 26 | transformed_texts = [] 27 | words = current_text.words 28 | for idx in indices_to_modify: 29 | word = words[idx] 30 | # expend when word in map 31 | if word in EXTENSION_MAP: 32 | expanded = EXTENSION_MAP[word] 33 | transformed_text = current_text.replace_word_at_index(idx, expanded) 34 | transformed_texts.append(transformed_text) 35 | 36 | return transformed_texts 37 | -------------------------------------------------------------------------------- /textattack/transformations/word_swaps/word_swap_wordnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Word Swap by swapping synonyms in WordNet 3 | ------------------------------------------------ 4 | """ 5 | 6 | import nltk 7 | from nltk.corpus import wordnet 8 | 9 | import textattack 10 | 11 | from .word_swap import WordSwap 12 | 13 | 14 | class WordSwapWordNet(WordSwap): 15 | """Transforms an input by replacing its words with synonyms provided by 16 | WordNet. 17 | 18 | >>> from textattack.transformations import WordSwapWordNet 19 | >>> from textattack.augmentation import Augmenter 20 | 21 | >>> transformation = WordSwapWordNet() 22 | >>> augmenter = Augmenter(transformation=transformation) 23 | >>> s = 'I am fabulous.' 24 | >>> augmenter.augment(s) 25 | """ 26 | 27 | def __init__(self, language="eng"): 28 | nltk.download("omw-1.4") 29 | if language not in wordnet.langs(): 30 | raise ValueError(f"Language {language} not one of {wordnet.langs()}") 31 | self.language = language 32 | 33 | def _get_replacement_words(self, word, random=False): 34 | """Returns a list containing all possible words with 1 character 35 | replaced by a homoglyph.""" 36 | synonyms = set() 37 | for syn in wordnet.synsets(word, lang=self.language): 38 | for syn_word in syn.lemma_names(lang=self.language): 39 | if ( 40 | (syn_word != word) 41 | and ("_" not in syn_word) 42 | and (textattack.shared.utils.is_one_word(syn_word)) 43 | ): 44 | # WordNet can suggest phrases that are joined by '_' but we ignore phrases. 45 | synonyms.add(syn_word) 46 | return list(synonyms) 47 | --------------------------------------------------------------------------------