├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.yaml │ ├── config.yaml │ ├── feature-enhancement-request.yaml │ └── question.yaml ├── stale.yml └── workflows │ ├── ci.yml │ ├── issues.yml │ ├── publish-docs.yml │ └── test-docs.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── SECURITY.md ├── assets ├── README.md └── redirect.html ├── collect_env.py ├── docs ├── _static │ ├── api.svg │ ├── contributing.svg │ ├── css │ │ ├── api.css │ │ ├── footer.css │ │ ├── header.css │ │ ├── legal-notice.css │ │ ├── main.css │ │ ├── search.css │ │ ├── sidebar.css │ │ ├── tutorial.css │ │ └── version-switcher.css │ ├── favicon.ico │ ├── flair_logo_orange.svg │ ├── flair_logo_white.svg │ ├── glossary.svg │ ├── magnifying_glass.svg │ ├── magnifying_glass_dark.svg │ ├── octocat.svg │ └── tutorial.svg ├── _templates │ ├── darkmode-toggle.html │ ├── footer-links │ │ ├── legal-notice.html │ │ ├── linkedin.html │ │ └── x.html │ ├── landing-page-banner.html │ ├── landing-page-illustrations.html │ ├── landing_page_styles.html │ ├── legal-notice-content.html │ ├── page.html │ ├── version-switcher.html │ └── versioning.html ├── api │ ├── datasets │ │ ├── base.rst │ │ ├── biomedical.rst │ │ ├── document_classification.rst │ │ ├── entity_linking.rst │ │ ├── ocr.rst │ │ ├── relation_extraction.rst │ │ ├── sequence_labeling.rst │ │ ├── text_image.rst │ │ ├── text_text.rst │ │ └── treebanks.rst │ ├── embeddings │ │ ├── base.rst │ │ ├── document.rst │ │ ├── image.rst │ │ ├── legacy.rst │ │ ├── token.rst │ │ └── transformer.rst │ ├── flair.data.rst │ ├── flair.datasets.rst │ ├── flair.embeddings.rst │ ├── flair.models.rst │ ├── flair.nn.rst │ ├── flair.rst │ ├── flair.splitter.rst │ ├── flair.tokenization.rst │ ├── flair.trainers.plugins.rst │ ├── flair.trainers.rst │ └── index.rst ├── conf.py ├── contributing │ ├── index.rst │ ├── local_development.md │ ├── making_a_pull_request.md │ ├── updating_documentation.md │ └── writing_a_good_issue.md ├── index.rst ├── legal-notice │ └── index.rst ├── requirements.txt └── tutorial │ ├── index.rst │ ├── intro.md │ ├── tutorial-basics │ ├── basic-types.md │ ├── entity-linking.md │ ├── entity-mention-linking.md │ ├── how-predictions-work.md │ ├── how-to-tag-corpus.md │ ├── index.rst │ ├── other-models.md │ ├── part-of-speech-tagging.md │ ├── tagging-entities.md │ └── tagging-sentiment.md │ ├── tutorial-embeddings │ ├── classic-word-embeddings.md │ ├── embeddings.md │ ├── flair-embeddings.md │ ├── index.rst │ ├── other-embeddings.md │ └── transformer-embeddings.md │ ├── tutorial-hunflair2 │ ├── customize-linking.md │ ├── index.rst │ ├── linking.md │ ├── overview.md │ ├── tagging.md │ └── training-ner-models.md │ └── tutorial-training │ ├── how-model-training-works.md │ ├── how-to-load-custom-dataset.md │ ├── how-to-load-prepared-dataset.md │ ├── how-to-train-multitask-model.md │ ├── how-to-train-sequence-tagger.md │ ├── how-to-train-span-classifier.md │ ├── how-to-train-text-classifier.md │ ├── index.rst │ └── train-vs-fine-tune.md ├── examples ├── README.md ├── multi_gpu │ ├── README.md │ ├── __init__.py │ └── run_multi_gpu.py └── ner │ ├── README.md │ ├── __init__.py │ └── run_ner.py ├── flair ├── __init__.py ├── class_utils.py ├── data.py ├── datasets │ ├── __init__.py │ ├── base.py │ ├── biomedical.py │ ├── document_classification.py │ ├── entity_linking.py │ ├── ocr.py │ ├── relation_extraction.py │ ├── sequence_labeling.py │ ├── text_image.py │ ├── text_text.py │ └── treebanks.py ├── distributed_utils.py ├── embeddings │ ├── __init__.py │ ├── base.py │ ├── document.py │ ├── image.py │ ├── legacy.py │ ├── token.py │ └── transformer.py ├── file_utils.py ├── inference_utils.py ├── models │ ├── __init__.py │ ├── entity_linker_model.py │ ├── entity_mention_linking.py │ ├── language_model.py │ ├── lemmatizer_model.py │ ├── multitask_model.py │ ├── pairwise_classification_model.py │ ├── pairwise_regression_model.py │ ├── prefixed_tagger.py │ ├── regexp_tagger.py │ ├── relation_classifier_model.py │ ├── relation_extractor_model.py │ ├── sequence_tagger_model.py │ ├── sequence_tagger_utils │ │ ├── __init__.py │ │ ├── crf.py │ │ └── viterbi.py │ ├── tars_model.py │ ├── text_classification_model.py │ ├── text_regression_model.py │ ├── triple_classification_model.py │ └── word_tagger_model.py ├── nn │ ├── __init__.py │ ├── decoder.py │ ├── distance │ │ ├── __init__.py │ │ ├── cosine.py │ │ ├── euclidean.py │ │ └── hyperbolic.py │ ├── dropout.py │ ├── model.py │ ├── multitask.py │ └── recurrent.py ├── optim.py ├── py.typed ├── samplers.py ├── splitter.py ├── tokenization.py ├── trainers │ ├── __init__.py │ ├── language_model_trainer.py │ ├── plugins │ │ ├── __init__.py │ │ ├── base.py │ │ ├── functional │ │ │ ├── __init__.py │ │ │ ├── anneal_on_plateau.py │ │ │ ├── checkpoints.py │ │ │ ├── deepncm_trainer_plugin.py │ │ │ ├── linear_scheduler.py │ │ │ ├── reduce_transformer_vocab.py │ │ │ └── weight_extractor.py │ │ ├── loggers │ │ │ ├── __init__.py │ │ │ ├── clearml_logger.py │ │ │ ├── log_file.py │ │ │ ├── loss_file.py │ │ │ ├── metric_history.py │ │ │ ├── tensorboard.py │ │ │ └── wandb.py │ │ └── metric_records.py │ └── trainer.py ├── training_utils.py └── visual │ ├── __init__.py │ ├── activations.py │ ├── manifold.py │ ├── ner_html.py │ ├── training_curves.py │ └── tree_printer.py ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── resources └── docs │ ├── EXPERIMENTS.md │ ├── HUNFLAIR.md │ ├── HUNFLAIR2.md │ ├── HUNFLAIR2_TUTORIAL_1_TAGGING.md │ ├── HUNFLAIR2_TUTORIAL_2_LINKING.md │ ├── HUNFLAIR2_TUTORIAL_3_TRAINING_NER.md │ ├── HUNFLAIR2_TUTORIAL_4_CUSTOMIZE_LINKING.md │ ├── HUNFLAIR_CORPORA.md │ ├── HUNFLAIR_EXPERIMENTS.md │ ├── HUNFLAIR_TUTORIAL_1_TAGGING.md │ ├── HUNFLAIR_TUTORIAL_2_TRAINING.md │ ├── HUNFLAIR_TUTORIAL_3_ENTITY_LINKING.md │ ├── KOR_docs │ ├── README.md │ ├── TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md │ ├── TUTORIAL_1_BASICS.md │ ├── TUTORIAL_2_TAGGING.md │ ├── TUTORIAL_3_WORD_EMBEDDING.md │ ├── TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md │ ├── TUTORIAL_5_DOCUMENT_EMBEDDINGS.md │ ├── TUTORIAL_6_CORPUS.md │ ├── TUTORIAL_7_TRAINING_A_MODEL.md │ ├── TUTORIAL_8_MODEL_OPTIMIZATION.md │ └── TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md │ ├── TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md │ ├── TUTORIAL_8_MODEL_OPTIMIZATION.md │ ├── TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md │ ├── TUTORIAL_BASICS_TOKENIZATION.md │ ├── TUTORIAL_CORPUS_CUSTOM.md │ ├── TUTORIAL_CORPUS_PREPARED.md │ ├── TUTORIAL_EMBEDDINGS_OVERVIEW.md │ ├── TUTORIAL_FLAIR_BASICS.md │ ├── TUTORIAL_PRODUCTION_FASTER_TRANSFORMERS.md │ ├── TUTORIAL_TAGGING_CIRCUS.md │ ├── TUTORIAL_TAGGING_LINKING.md │ ├── TUTORIAL_TAGGING_NER.md │ ├── TUTORIAL_TAGGING_OVERVIEW.md │ ├── TUTORIAL_TAGGING_POS.md │ ├── TUTORIAL_TAGGING_RELATIONS.md │ ├── TUTORIAL_TAGGING_SENTIMENT.md │ ├── TUTORIAL_TRAINING_MODELS.md │ ├── TUTORIAL_TRAINING_MORE.md │ ├── TUTORIAL_TRAINING_OVERVIEW.md │ ├── TUTORIAL_TRAINING_SEQUENCE_LABELER.md │ ├── TUTORIAL_TRAINING_TEXT_CLASSIFIER.md │ ├── embeddings │ ├── BYTE_PAIR_EMBEDDINGS.md │ ├── CHARACTER_EMBEDDINGS.md │ ├── CLASSIC_WORD_EMBEDDINGS.md │ ├── DOCUMENT_POOL_EMBEDDINGS.md │ ├── DOCUMENT_RNN_EMBEDDINGS.md │ ├── ELMO_EMBEDDINGS.md │ ├── FASTTEXT_EMBEDDINGS.md │ ├── FLAIR_EMBEDDINGS.md │ ├── ONE_HOT_EMBEDDINGS.md │ └── TRANSFORMER_EMBEDDINGS.md │ ├── flair_logo.svg │ ├── flair_logo_2020.png │ ├── flair_logo_2020.svg │ ├── flair_logo_2020_FINAL_day_dpi72.png │ ├── flair_logo_2020_FINAL_night_dpi72.png │ └── flair_logo_2020_FINAL_night_light_dpi72.png ├── setup.py └── tests ├── __init__.py ├── conftest.py ├── embedding_test_utils.py ├── embeddings ├── __init__.py ├── test_byte_pair_embeddings.py ├── test_document_transform_word_embeddings.py ├── test_flair_embeddings.py ├── test_simple_token_embeddings.py ├── test_stacked_embeddings.py ├── test_tfidf_embeddings.py ├── test_transformer_document_embeddings.py ├── test_transformer_word_embeddings.py └── test_word_embeddings.py ├── model_test_utils.py ├── models ├── __init__.py ├── test_deepncm_classifier.py ├── test_entity_linker.py ├── test_model_license.py ├── test_regexp_tagger.py ├── test_relation_classifier.py ├── test_relation_extractor.py ├── test_sequence_tagger.py ├── test_tars_classifier.py ├── test_tars_ner.py ├── test_text_classifier.py ├── test_text_regressor.py └── test_word_tagger.py ├── resources ├── corpora │ └── lorem_ipsum │ │ ├── test.txt │ │ ├── train │ │ ├── split_1.txt │ │ └── split_2.txt │ │ └── valid.txt ├── tasks │ ├── ag_news │ │ ├── README.md │ │ ├── dev.txt │ │ ├── test.txt │ │ └── train.txt │ ├── column_corpus_options │ │ ├── eng.testa │ │ ├── eng.testb │ │ └── eng.train │ ├── column_with_whitespaces │ │ ├── eng.testa │ │ ├── eng.testb │ │ └── eng.train │ ├── conllu │ │ ├── train.conllu │ │ ├── train.conllup │ │ └── universal_dependencies.conllu │ ├── example_images │ │ └── i_love_berlin.png │ ├── fashion │ │ ├── eng.testa │ │ ├── eng.testb │ │ └── eng.train │ ├── fashion_disjunct │ │ ├── eng.testa │ │ ├── eng.testb │ │ └── eng.train │ ├── fashion_nodev │ │ ├── test.tsv │ │ └── train.tsv │ ├── fewshot_conll │ │ └── 1shot.txt │ ├── imdb │ │ ├── README.md │ │ ├── dev.txt │ │ ├── test.txt │ │ └── train.txt │ ├── imdb_underscore │ │ ├── README.md │ │ ├── dev.txt │ │ ├── test.txt │ │ └── train.txt │ ├── jsonl │ │ ├── testa.jsonl │ │ ├── testb.jsonl │ │ └── train.jsonl │ ├── multi_class │ │ ├── dev.txt │ │ ├── test.txt │ │ └── train.txt │ ├── multi_class_negative_examples │ │ ├── dev.txt │ │ ├── test.txt │ │ └── train.txt │ ├── ner_german_germeval │ │ ├── NER-de-dev.tsv │ │ ├── NER-de-test.tsv │ │ └── NER-de-train.tsv │ ├── ontonotes │ │ └── tiny-conll-2012.zip │ ├── regression │ │ ├── README.md │ │ ├── dev.txt │ │ ├── test.txt │ │ └── train.txt │ ├── span_labels │ │ ├── span_first.txt │ │ ├── span_second.txt │ │ └── span_third.txt │ ├── trivial │ │ ├── trivial_bioes │ │ │ ├── dev.txt │ │ │ ├── test.txt │ │ │ └── train.txt │ │ ├── trivial_bioes_with_boundaries │ │ │ ├── dev.txt │ │ │ ├── test.txt │ │ │ └── train.txt │ │ ├── trivial_text_classification_multi │ │ │ ├── dev.txt │ │ │ ├── test.txt │ │ │ └── train.txt │ │ └── trivial_text_classification_single │ │ │ ├── dev.txt │ │ │ ├── test.txt │ │ │ └── train.txt │ ├── ud_english │ │ ├── en_ewt-ud-dev.conllu │ │ ├── en_ewt-ud-test.conllu │ │ └── en_ewt-ud-train.conllu │ └── up_english │ │ ├── en_ewt-up-dev.conllu │ │ ├── en_ewt-up-test.conllu │ │ └── en_ewt-up-train.conllu ├── text_sequences │ └── resume1.txt └── visual │ ├── loss.tsv │ ├── snippet.txt │ └── weights.txt ├── test_augmentation.py ├── test_biomedical_entity_linking.py ├── test_corpus_dictionary.py ├── test_datasets.py ├── test_datasets_biomedical.py ├── test_labels.py ├── test_language_model.py ├── test_lemmatizer.py ├── test_multitask.py ├── test_sentence.py ├── test_sentence_labeling.py ├── test_tars.py ├── test_tokenize_sentence.py ├── test_trainer.py ├── test_utils.py └── test_visual.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [alanakbik] 4 | # TODO: Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yaml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Create a report to help us improve 3 | title: "[Bug]: " 4 | labels: ["bug"] 5 | assignees: [] 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Thanks for taking the time to fill out this bug report! 11 | - type: textarea 12 | id: description 13 | attributes: 14 | label: Describe the bug 15 | description: Please describe the bug as concrete as possible 16 | placeholder: 17 | validations: 18 | required: true 19 | - type: textarea 20 | id: reproduction 21 | attributes: 22 | label: To Reproduce 23 | description: Steps to reproduce the behavior (e.g. which model did you train? what parameters did you use? etc.). 24 | placeholder: | 25 | import flair 26 | from flair.datasets import WNUT_17 27 | corpus = WNUT_17() 28 | .... 29 | render: python 30 | validations: 31 | required: true 32 | - type: textarea 33 | id: expectation 34 | attributes: 35 | label: Expected behavior 36 | description: A clear and concise description of what you expected to happen. 37 | validations: 38 | required: true 39 | - type: textarea 40 | id: error 41 | attributes: 42 | label: Logs and Stack traces 43 | description: If applicable, please past any relevant logs or error messages, please don't cut off the stack trace. 44 | placeholder: | 45 | Traceback (most recent call last): 46 | File "flair\flair\embeddings\token.py", line 84, in __init__ 47 | self.__embedding_type: str = embeddings[0].embedding_type 48 | IndexError: list index out of range 49 | render: stacktrace 50 | validations: 51 | required: false 52 | - type: textarea 53 | id: screenshots 54 | attributes: 55 | label: Screenshots 56 | description: If applicable, add screenshots to help explain your problem. 57 | placeholder: if applicable, copy paste an image in here 58 | validations: 59 | required: false 60 | - type: textarea 61 | id: context 62 | attributes: 63 | label: Additional Context 64 | description: Add anything worthy to add that wasn't mentioned yet. 65 | validations: 66 | required: false 67 | - type: textarea 68 | id: environment 69 | attributes: 70 | label: Environment 71 | description: Please run the [collect_env.py](../blob/master/collect_env.py) and paste it's output in here 72 | placeholder: python collect_env.py 73 | validations: 74 | required: true -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yaml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-enhancement-request.yaml: -------------------------------------------------------------------------------- 1 | name: Feature/Enhancement request 2 | description: Suggest an idea for this project 3 | title: "[Feature]: " 4 | labels: ["feature"] 5 | assignees: [] 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Thanks for taking the time to to provide an idea to improve flair! 11 | - type: textarea 12 | id: problem 13 | attributes: 14 | label: Problem statement 15 | description: Is your feature/enhancement request related to a problem? Please describe. 16 | placeholder: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 17 | validations: 18 | required: false 19 | - type: textarea 20 | id: solution 21 | attributes: 22 | label: Solution 23 | description: Describe the solution you'd like 24 | placeholder: How should your problem ideally be solved? 25 | validations: 26 | required: true 27 | - type: textarea 28 | id: context 29 | attributes: 30 | label: Additional Context 31 | description: Add anything worthy to add that wasn't mentioned yet. 32 | validations: 33 | required: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.yaml: -------------------------------------------------------------------------------- 1 | name: Question 2 | description: Any question related to the usage of flair 3 | title: "[Question]: " 4 | labels: ["question"] 5 | assignees: [] 6 | body: 7 | - type: textarea 8 | id: question 9 | attributes: 10 | label: Question 11 | description: Ask you question here 12 | placeholder: A clear and concise description of what you want to know. 13 | validations: 14 | required: true -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 120 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 7 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - pinned 8 | - security 9 | # Label to use when marking an issue as stale 10 | staleLabel: wontfix 11 | # Comment to post when marking an issue as stale. Set to `false` to disable 12 | markComment: > 13 | This issue has been automatically marked as stale because it has not had 14 | recent activity. It will be closed if no further activity occurs. Thank you 15 | for your contributions. 16 | # Comment to post when closing a stale issue. Set to `false` to disable 17 | closeComment: false 18 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [master] 4 | pull_request: 5 | branches: [master] 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | env: 11 | HF_HOME: ./cache/transformers 12 | FLAIR_CACHE_ROOT: ./cache/flair 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Set up Python 3.9 16 | id: setup-python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: 3.9 20 | - name: Install Torch cpu 21 | run: pip install torch --index-url https://download.pytorch.org/whl/cpu 22 | - name: Install Flair dependencies 23 | run: pip install -e .[word-embeddings] 24 | - name: Install unittest dependencies 25 | run: pip install -r requirements-dev.txt 26 | - name: Show installed dependencies 27 | run: pip freeze 28 | - name: Cache downloaded models/datasets 29 | uses: actions/cache@v3 30 | with: 31 | path: ./cache 32 | key: cache-v1.2 33 | - name: Run tests 34 | run: | 35 | python -c 'import flair' 36 | pytest --runintegration -vv 37 | -------------------------------------------------------------------------------- /.github/workflows/issues.yml: -------------------------------------------------------------------------------- 1 | on: issue_comment 2 | 3 | jobs: 4 | issue_commented: 5 | name: Issue comment 6 | if: ${{ github.event.issue.author == github.even.issue_comment.author }} 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions-ecosystem/action-remove-labels@v1 10 | with: 11 | labels: "Awaiting Response" -------------------------------------------------------------------------------- /.github/workflows/publish-docs.yml: -------------------------------------------------------------------------------- 1 | name: 'Build doc page' 2 | on: 3 | push: 4 | branches: [ master ] 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | publish_docs: 10 | name: Build the docs using Sphinx and push to gh-pages 11 | runs-on: ubuntu-latest 12 | env: 13 | python-version: 3.9 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v3 17 | - name: setup python ${{ env.python-version }} 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: ${{ env.python-version }} 21 | - name: Install Torch cpu 22 | run: pip install torch --index-url https://download.pytorch.org/whl/cpu 23 | - name: Install Flair dependencies 24 | run: pip install -e . 25 | - name: Install unittest dependencies 26 | run: pip install -r requirements-dev.txt 27 | - name: Install doc dependencies 28 | run: pip install -r docs/requirements.txt 29 | - name: Fetch git tags 30 | run: git fetch --tags origin 31 | - name: Build docs 32 | run: | 33 | sphinx-multiversion docs doc_build/ 34 | - name: Add redirect to stable doc 35 | run: | 36 | cp assets/redirect.html doc_build/index.html 37 | cp assets/redirect.html doc_build/404.html 38 | cp assets/README.md doc_build/README.md 39 | sed -i "s/\[VERSION\]/$(python -c 'import flair;print(flair.__version__)')/g" doc_build/index.html 40 | sed -i "s/\[VERSION\]/$(python -c 'import flair;print(flair.__version__)')/g" doc_build/404.html 41 | - name: Deploy 42 | uses: peaceiris/actions-gh-pages@v3 43 | with: 44 | github_token: ${{ secrets.GITHUB_TOKEN }} 45 | publish_dir: ./doc_build -------------------------------------------------------------------------------- /.github/workflows/test-docs.yml: -------------------------------------------------------------------------------- 1 | name: 'Build doc page for artifact' 2 | on: 3 | push: 4 | branches: "*" 5 | 6 | jobs: 7 | build_docs: 8 | name: Build the docs using Sphinx and publish as artifact 9 | runs-on: ubuntu-latest 10 | env: 11 | python-version: 3.9 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v3 15 | - name: setup python ${{ env.python-version }} 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: ${{ env.python-version }} 19 | - name: Install Torch cpu 20 | run: pip install torch --index-url https://download.pytorch.org/whl/cpu 21 | - name: Install Flair dependencies 22 | run: pip install -e . 23 | - name: Install unittest dependencies 24 | run: pip install -r requirements-dev.txt 25 | - name: Install doc dependencies 26 | run: pip install -r docs/requirements.txt 27 | - name: Fetch git tags 28 | run: git fetch --tags origin 29 | - name: Change config to current branch 30 | env: 31 | BRANCH_NAME: ${{ github.ref_name }} 32 | run: | 33 | sed -i 's/^smv_branch_whitelist = r"^master$"/smv_branch_whitelist = r"^${{ env.BRANCH_NAME }}$"/' docs/conf.py 34 | sed -i 's/^smv_tag_whitelist = r"^v\d+\.\d+\.\d+$"/smv_tag_whitelist = "^$"/' docs/conf.py 35 | cat docs/conf.py 36 | - name: Build docs 37 | run: | 38 | sphinx-multiversion docs doc_build/ 39 | - name: Add redirect to stable doc 40 | env: 41 | BRANCH_NAME: ${{ github.ref_name }} 42 | run: | 43 | cp assets/redirect.html doc_build/index.html 44 | cp assets/redirect.html doc_build/404.html 45 | cp assets/README.md doc_build/README.md 46 | sed -i "s/\[VERSION\]/${{ env.BRANCH_NAME }}/g" doc_build/index.html 47 | sed -i "s/\[VERSION\]/${{ env.BRANCH_NAME }}/g" doc_build/404.html 48 | - name: Archive code coverage results 49 | uses: actions/upload-artifact@v4 50 | with: 51 | name: website 52 | path: doc_build -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | .idea/ 29 | .vscode/ 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | 109 | resources/taggers/ 110 | regression_train/ 111 | /doc_build/ 112 | 113 | scripts/ 114 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Flair 2 | 3 | We are happy to accept your contributions to make `flair` better and more awesome! To avoid unnecessary work on either 4 | side, please stick to the following process: 5 | 6 | 1. Check if there is already [an issue](https://github.com/flairNLP/flair/issues) for your concern. 7 | 2. If there is not, open a new one to start a discussion. We hate to close finished PRs! 8 | 3. If we decide your concern needs code changes, we would be happy to accept a pull request. Please consider the 9 | commit guidelines below. 10 | 11 | 12 | ## Git Commit Guidelines 13 | 14 | If there is already a ticket, use this number at the start of your commit message. 15 | Use meaningful commit messages that described what you did. 16 | 17 | **Example:** `GH-42: Added new type of embeddings: DocumentEmbedding.` 18 | 19 | ## Developing locally 20 | 21 | For contributors looking to get deeper into the API we suggest cloning the repository and checking out the unit 22 | tests for examples of how to call methods. Nearly all classes and methods are documented, so finding your way around 23 | the code should hopefully be easy. 24 | 25 | ### Setup 26 | 27 | Flair requires python-3.9 or higher. To make sure your code also runs on the oldest supported 28 | python version, it is recommended to use python-3.9.x for flair development. 29 | 30 | Create a python environment of your preference and run: 31 | ```bash 32 | pip install -r requirements-dev.txt 33 | pip install -e . 34 | ``` 35 | 36 | ### Tests 37 | 38 | To only run typechecks and check the code formatting execute: 39 | 40 | ```bash 41 | pytest flair 42 | ``` 43 | 44 | To run all basic tests execute: 45 | 46 | ```bash 47 | pytest 48 | ``` 49 | 50 | To run integration tests execute: 51 | 52 | ```bash 53 | pytest --runintegration 54 | ``` 55 | 56 | The integration tests will train small models and therefore take more time. 57 | In general, it is recommended to ensure all basic tests are running through before testing the integration tests 58 | 59 | ### Code Formatting 60 | 61 | To ensure a standardized code style we use the formatter [black](https://github.com/ambv/black) and for standardizing imports we use [ruff](https://github.com/charliermarsh/ruff). 62 | If your code is not formatted properly, the tests will fail. 63 | 64 | We recommend configuring your IDE to run these formatters for you, but you can also always run them manually via 65 | `black . && ruff --fix .` in the flair root folder. 66 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Flair is licensed under the following MIT License (MIT) Copyright © 2018 Zalando SE, https://tech.zalando.com 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include flair/py.typed 2 | include requirements.txt 3 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | We acknowledge that every line of code that we write may potentially contain security issues. 2 | We are trying to deal with it responsibly and provide patches as quickly as possible. 3 | 4 | Please report any issues to [Alan Akbik](http://alanakbik.github.io/). 5 | -------------------------------------------------------------------------------- /assets/README.md: -------------------------------------------------------------------------------- 1 | # Docs For Flair NLP 2 | 3 | This branch is currently under construction. 4 | 5 | It will contain the docs for Flair NLP. 6 | Don't change files, as this branch will be autogenerated using github actions. -------------------------------------------------------------------------------- /assets/redirect.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Redirecting to https://flairnlp.github.io/flair/v[VERSION]/ 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /collect_env.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import transformers 3 | 4 | import flair 5 | 6 | 7 | def main(): 8 | print("#### Versions:") 9 | print(f"##### Flair\n{flair.__version__}") 10 | print(f"##### Pytorch\n{torch.__version__}") 11 | print(f"##### Transformers\n{transformers.__version__}") 12 | print(f"#### GPU\n{torch.cuda.is_available()}") 13 | 14 | 15 | if __name__ == "__main__": 16 | main() 17 | -------------------------------------------------------------------------------- /docs/_static/api.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/_static/css/footer.css: -------------------------------------------------------------------------------- 1 | .bd-footer { 2 | border: none; 3 | background: var(--blue-orange); 4 | height: var(--footer-height); 5 | display: flex; 6 | align-items: center; 7 | } 8 | 9 | .bd-footer *::selection { 10 | background: var(--white-blue); 11 | color: var(--flair-orange); 12 | } 13 | 14 | .bd-footer .bd-footer__inner { 15 | padding: 0 10%; 16 | width: 100%; 17 | max-width: initial; 18 | margin: 0; 19 | box-sizing: border-box; 20 | } 21 | 22 | .bd-footer .bd-footer__inner .footer-item * { 23 | color: white; 24 | } 25 | 26 | .bd-footer .bd-footer__inner a:hover { 27 | color: var(--orange-blue); 28 | } 29 | 30 | .bd-footer .bd-footer__inner .footer-items__end { 31 | display: flex; 32 | flex-direction: row; 33 | } 34 | 35 | .bd-footer .bd-footer__inner .footer-items__end .footer-item:not(:last-of-type) { 36 | margin-right: 3rem; 37 | } 38 | 39 | @media screen and (max-width: 959px) { 40 | .bd-footer { 41 | font-size: 0.8rem; 42 | } 43 | 44 | .bd-footer .bd-footer__inner .footer-items__end .footer-item:not(:last-of-type) { 45 | margin-right: 2rem; 46 | } 47 | } 48 | 49 | @media screen and (max-width: 700px) { 50 | .bd-footer { 51 | font-size: 1rem; 52 | } 53 | 54 | .bd-footer .bd-footer__inner .footer-items__start { 55 | display: none; 56 | } 57 | 58 | .bd-footer .bd-footer__inner .footer-items__end { 59 | width: 100%; 60 | justify-content: space-between; 61 | } 62 | 63 | .bd-footer .bd-footer__inner .footer-items__end .footer-item:not(:last-of-type) { 64 | margin-right: 1rem; 65 | } 66 | } 67 | 68 | @media screen and (max-width: 435px) { 69 | .bd-footer { 70 | font-size: 0.8rem; 71 | } 72 | } -------------------------------------------------------------------------------- /docs/_static/css/legal-notice.css: -------------------------------------------------------------------------------- 1 | .legal-notice strong { 2 | font-size: 1.75rem; 3 | font-weight: 600; 4 | } -------------------------------------------------------------------------------- /docs/_static/css/search.css: -------------------------------------------------------------------------------- 1 | .bd-search-container h1 { 2 | color: var(--blue-white); 3 | font-size: 3rem; 4 | font-weight: 600; 5 | } 6 | 7 | .bd-search-container h2 { 8 | color: var(--blue-white); 9 | font-weight: 600; 10 | font-size: 1.5rem; 11 | } 12 | 13 | .bd-search-container form.bd-search i { 14 | display: none; 15 | } 16 | 17 | .bd-search-container form.bd-search input { 18 | border: 2px var(--flair-orange) solid; 19 | color: var(--flair-blue); 20 | box-shadow: none; 21 | padding: .25em 5.5em .25em .75em; 22 | border-radius: 1rem; 23 | font-size: 1.2rem; 24 | } 25 | 26 | .bd-search-container form.bd-search .search-button__kbd-shortcut { 27 | align-items: center; 28 | } 29 | 30 | .bd-search-container p.search-summary { 31 | color: var(--blue-white) !important; 32 | } 33 | 34 | .bd-search-container ul.search li { 35 | border-top: 2px solid var(--blue-white); 36 | margin: 0 0 .05em 0; 37 | } 38 | 39 | .bd-search-container ul.search li:first-of-type { 40 | margin-top: 1rem; 41 | border: none; 42 | } 43 | 44 | .bd-search-container ul.search li a { 45 | font-weight: 400; 46 | } 47 | 48 | .bd-search-container ul.search li span { 49 | color: var(--blue-white); 50 | } 51 | 52 | .highlighted { 53 | background: var(--flair-orange) !important; 54 | color: white !important; 55 | border-radius: 5px !important; 56 | } 57 | 58 | div#searchbox p.highlight-link { 59 | box-shadow: none !important; 60 | margin: 1rem 0; 61 | } 62 | 63 | div#searchbox p.highlight-link a { 64 | background: var(--white-blue) !important; 65 | border: 2px var(--flair-orange) solid; 66 | color: var(--flair-orange); 67 | transition: all 200ms ease-in-out; 68 | } 69 | 70 | div#searchbox p.highlight-link a:hover { 71 | background: var(--flair-orange) !important; 72 | color: white !important; 73 | box-shadow: none !important; 74 | } 75 | 76 | div#searchbox p.highlight-link a::before { 77 | content: none; 78 | } -------------------------------------------------------------------------------- /docs/_static/css/sidebar.css: -------------------------------------------------------------------------------- 1 | .bd-sidebar-primary.bd-sidebar { 2 | top: 90px; 3 | width: 350px; 4 | padding: 0; 5 | border: none; 6 | overflow: initial; 7 | background: var(--white-blue); 8 | } 9 | 10 | .bd-sidebar-primary.bd-sidebar #rtd-footer-container, .bd-sidebar-primary.bd-sidebar .sidebar-primary-items__end { 11 | display: none; 12 | } 13 | 14 | .bd-sidebar-primary.bd-sidebar::after { 15 | content: ""; 16 | background: linear-gradient(90deg, rgba(0, 0, 0, 0.2) 0%, transparent 100%); 17 | height: 100%; 18 | width: 7px; 19 | position: absolute; 20 | right: 0; 21 | transform: translateX(100%); 22 | top: 0; 23 | } 24 | 25 | .bd-sidebar-primary.bd-sidebar h3 { 26 | display: none; 27 | } 28 | 29 | .bd-sidebar-primary.bd-sidebar * { 30 | color: var(--blue-white); 31 | } 32 | 33 | .bd-sidebar-primary.bd-sidebar a:hover { 34 | color: var(--flair-orange); 35 | } 36 | 37 | .bd-sidebar-primary.bd-sidebar a:hover * { 38 | color: var(--flair-orange); 39 | } 40 | 41 | .bd-sidebar-primary.bd-sidebar a * { 42 | transition: color 200ms cubic-bezier(0, 0.35, 0.08, 0.89); 43 | } 44 | 45 | .bd-sidebar-primary.bd-sidebar .sidebar-primary-items__start { 46 | overflow-y: auto; 47 | padding: 2rem; 48 | } 49 | 50 | .bd-sidebar-primary.bd-sidebar .sidebar-primary-item { 51 | padding: 0; 52 | } 53 | 54 | .bd-sidebar-primary.bd-sidebar .toctree-l1 { 55 | font-size: 1.5rem; 56 | font-weight: 600; 57 | margin-top: .8rem; 58 | } 59 | 60 | .bd-sidebar-primary.bd-sidebar .toctree-l2 { 61 | font-size: 1rem; 62 | font-weight: 400; 63 | margin-top: .8rem; 64 | } 65 | 66 | .bd-sidebar-primary.bd-sidebar .toctree-l2 ul { 67 | margin-left: 1rem; 68 | } 69 | 70 | .bd-sidebar-primary.bd-sidebar .toctree-l3 { 71 | margin-top: .8rem; 72 | } 73 | 74 | .bd-sidebar-primary.bd-sidebar .toctree-l4 { 75 | margin-top: .8rem; 76 | } 77 | 78 | .bd-sidebar-primary.bd-sidebar .current, .bd-sidebar-primary.bd-sidebar .current * { 79 | color: var(--flair-orange); 80 | } 81 | 82 | .bd-sidebar-primary.bd-sidebar code { 83 | padding: 0; 84 | background: transparent; 85 | border: none; 86 | font-weight: 400; 87 | } -------------------------------------------------------------------------------- /docs/_static/css/version-switcher.css: -------------------------------------------------------------------------------- 1 | .version-switcher__container { 2 | position: relative; 3 | z-index: 10; 4 | } 5 | 6 | .version-switcher__container #versionswitcherbutton { 7 | background: var(--flair-orange); 8 | border: 2px white solid; 9 | color: white; 10 | font-weight: 500; 11 | cursor: pointer; 12 | font-size: 1rem; 13 | padding: 0.2em 1em; 14 | border-radius: 0.66em; 15 | font-family: inherit; 16 | width: 7em; 17 | } 18 | 19 | .version-switcher__container #versionswitcherbutton::after { 20 | content: none; 21 | } 22 | 23 | .version-switcher__container #versionswitcherlist { 24 | background: var(--flair-orange); 25 | position: absolute; 26 | top: calc(100% + 0.4rem); 27 | left: 0; 28 | display: none; 29 | flex-direction: column; 30 | border: 2px white solid; 31 | border-radius: 0.66em; 32 | font-size: 1rem; 33 | min-width: 100%; 34 | box-sizing: border-box; 35 | overflow: hidden; 36 | } 37 | 38 | .version-switcher__container.open #versionswitcherlist { 39 | display: flex; 40 | } 41 | 42 | .version-switcher__container .version-switcher__menu a.list-group-item:not(:last-child) { 43 | border-bottom: none; 44 | } 45 | 46 | .version-switcher__container a.list-group-item { 47 | background: var(--flair-orange); 48 | font-size: 1rem; 49 | padding: 0.2em 1em 0.2em 1em; 50 | white-space: nowrap; 51 | overflow: hidden; 52 | text-overflow: ellipsis; 53 | position: relative; 54 | transition: all 200ms cubic-bezier(0, 0.35, 0.08, 0.89); 55 | } 56 | 57 | .version-switcher__container a.list-group-item:hover { 58 | background: var(--white-transparent); 59 | color: var(--white-blue); 60 | } 61 | 62 | .version-switcher__container a.list-group-item span { 63 | color: white; 64 | } 65 | 66 | .version-switcher__container a.list-group-item:not(:last-of-type) { 67 | padding-bottom: calc(0.2em + 2px); 68 | } 69 | 70 | .version-switcher__container a.list-group-item:not(:last-of-type)::after { 71 | content: ""; 72 | height: 2px; 73 | width: 100%; 74 | background: white; 75 | position: absolute; 76 | bottom: 0; 77 | left: 0; 78 | } -------------------------------------------------------------------------------- /docs/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/docs/_static/favicon.ico -------------------------------------------------------------------------------- /docs/_static/glossary.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/_static/magnifying_glass.svg: -------------------------------------------------------------------------------- 1 | 2 | 33 | 37 | 43 | 49 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /docs/_static/magnifying_glass_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 33 | 37 | 43 | 49 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /docs/_static/octocat.svg: -------------------------------------------------------------------------------- 1 | 2 | 17 | 19 | 37 | 41 | 42 | -------------------------------------------------------------------------------- /docs/_static/tutorial.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 17 | 18 | 19 | 20 | 21 | 26 | 27 | 28 | 29 | 30 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /docs/_templates/footer-links/legal-notice.html: -------------------------------------------------------------------------------- 1 | Legal notice -------------------------------------------------------------------------------- /docs/_templates/footer-links/linkedin.html: -------------------------------------------------------------------------------- 1 | LinkedIn -------------------------------------------------------------------------------- /docs/_templates/footer-links/x.html: -------------------------------------------------------------------------------- 1 | Twitter/X -------------------------------------------------------------------------------- /docs/_templates/legal-notice-content.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_templates/page.html: -------------------------------------------------------------------------------- 1 | {% extends "!page.html" %} 2 | {% block body %} 3 | {% if current_version and latest_version and current_version != latest_version and current_version != release and current_version.name != latest_version.release %} 4 |

5 | 6 | {% if current_version.is_released %} 7 | {% if latest_version.release.replace('v', '').split('.') | map('int') | list > current_version.name.replace('v', '').split('.') | map('int') | list %} 8 | You're reading an old version of this documentation. 9 | If you want up-to-date information, please have a look at {{latest_version.name}}. 10 | {% endif %} 11 | {% else %} 12 | You're reading the documentation for a development version. 13 | For the latest stable version, please have a look at {{latest_version.name}}. 14 | {% endif %} 15 | 16 |

17 | {% endif %} 18 | {{ super() }} 19 | {% endblock %}% -------------------------------------------------------------------------------- /docs/_templates/version-switcher.html: -------------------------------------------------------------------------------- 1 | {# As the version switcher will only work when JavaScript is enabled, we add it through JavaScript. 2 | #} 3 | 41 | 46 | -------------------------------------------------------------------------------- /docs/_templates/versioning.html: -------------------------------------------------------------------------------- 1 | {% if versions %} 2 |

{{ _('Versions') }}

3 | 17 | {% endif %} -------------------------------------------------------------------------------- /docs/api/datasets/base.rst: -------------------------------------------------------------------------------- 1 | flair.datasets.base 2 | =================== 3 | 4 | .. currentmodule:: flair.datasets.base 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/datasets/biomedical.rst: -------------------------------------------------------------------------------- 1 | flair.datasets.biomedical 2 | ========================= 3 | 4 | .. currentmodule:: flair.datasets.biomedical 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/datasets/document_classification.rst: -------------------------------------------------------------------------------- 1 | flair.datasets.document_classification 2 | ====================================== 3 | 4 | .. currentmodule:: flair.datasets.document_classification 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/datasets/entity_linking.rst: -------------------------------------------------------------------------------- 1 | flair.datasets.entity_linking 2 | ============================= 3 | 4 | .. currentmodule:: flair.datasets.entity_linking 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/datasets/ocr.rst: -------------------------------------------------------------------------------- 1 | flair.datasets.ocr 2 | ================== 3 | 4 | .. currentmodule:: flair.datasets.ocr 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/datasets/relation_extraction.rst: -------------------------------------------------------------------------------- 1 | flair.datasets.relation_extraction 2 | ================================== 3 | 4 | .. currentmodule:: flair.datasets.relation_extraction 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/datasets/sequence_labeling.rst: -------------------------------------------------------------------------------- 1 | flair.datasets.sequence_labeling 2 | ================================ 3 | 4 | .. currentmodule:: flair.datasets.sequence_labeling 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/datasets/text_image.rst: -------------------------------------------------------------------------------- 1 | flair.datasets.text_image 2 | ========================= 3 | 4 | .. currentmodule:: flair.datasets.text_image 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/datasets/text_text.rst: -------------------------------------------------------------------------------- 1 | flair.datasets.text_text 2 | ======================== 3 | 4 | .. currentmodule:: flair.datasets.text_text 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | 10 | 11 | -------------------------------------------------------------------------------- /docs/api/datasets/treebanks.rst: -------------------------------------------------------------------------------- 1 | flair.datasets.treebanks 2 | ======================== 3 | 4 | .. currentmodule:: flair.datasets.treebanks 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/embeddings/base.rst: -------------------------------------------------------------------------------- 1 | flair.embeddings.base 2 | ===================== 3 | 4 | .. currentmodule:: flair.embeddings.base 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/embeddings/document.rst: -------------------------------------------------------------------------------- 1 | flair.embeddings.document 2 | ========================= 3 | 4 | .. currentmodule:: flair.embeddings.document 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/embeddings/image.rst: -------------------------------------------------------------------------------- 1 | flair.embeddings.image 2 | ====================== 3 | 4 | .. currentmodule:: flair.embeddings.image 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/embeddings/legacy.rst: -------------------------------------------------------------------------------- 1 | flair.embeddings.legacy 2 | ======================= 3 | 4 | .. currentmodule:: flair.embeddings.legacy 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/embeddings/token.rst: -------------------------------------------------------------------------------- 1 | flair.embeddings.token 2 | ====================== 3 | 4 | .. currentmodule:: flair.embeddings.token 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/embeddings/transformer.rst: -------------------------------------------------------------------------------- 1 | flair.embeddings.transformer 2 | ============================ 3 | 4 | .. currentmodule:: flair.embeddings.transformer 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/flair.data.rst: -------------------------------------------------------------------------------- 1 | flair.data 2 | ========== 3 | 4 | .. currentmodule:: flair.data 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/flair.datasets.rst: -------------------------------------------------------------------------------- 1 | flair.datasets 2 | ============== 3 | 4 | .. currentmodule:: flair.datasets 5 | 6 | .. toctree:: 7 | :glob: 8 | :maxdepth: 2 9 | 10 | datasets/* 11 | -------------------------------------------------------------------------------- /docs/api/flair.embeddings.rst: -------------------------------------------------------------------------------- 1 | flair.embeddings 2 | ================ 3 | 4 | .. currentmodule:: flair.embeddings 5 | 6 | .. toctree:: 7 | :glob: 8 | :maxdepth: 2 9 | 10 | embeddings/* -------------------------------------------------------------------------------- /docs/api/flair.models.rst: -------------------------------------------------------------------------------- 1 | flair.models 2 | ============ 3 | 4 | .. currentmodule:: flair.models 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/flair.nn.rst: -------------------------------------------------------------------------------- 1 | flair.nn 2 | ======== 3 | 4 | .. currentmodule:: flair.nn 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/flair.rst: -------------------------------------------------------------------------------- 1 | flair 2 | ===== 3 | 4 | .. currentmodule:: flair 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/flair.splitter.rst: -------------------------------------------------------------------------------- 1 | flair.splitter 2 | ============== 3 | 4 | .. currentmodule:: flair.splitter 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | 10 | -------------------------------------------------------------------------------- /docs/api/flair.tokenization.rst: -------------------------------------------------------------------------------- 1 | flair.tokenization 2 | ================== 3 | 4 | .. currentmodule:: flair.tokenization 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/flair.trainers.plugins.rst: -------------------------------------------------------------------------------- 1 | flair.trainers.plugins 2 | ====================== 3 | 4 | .. currentmodule:: flair.trainers.plugins 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/flair.trainers.rst: -------------------------------------------------------------------------------- 1 | flair.trainers 2 | ============== 3 | 4 | .. currentmodule:: flair.trainers 5 | 6 | .. autosummary:: 7 | :toctree: generated 8 | :nosignatures: 9 | -------------------------------------------------------------------------------- /docs/api/index.rst: -------------------------------------------------------------------------------- 1 | API Docs 2 | ======== 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | flair 9 | flair.* -------------------------------------------------------------------------------- /docs/contributing/index.rst: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | writing_a_good_issue 8 | local_development 9 | making_a_pull_request 10 | updating_documentation 11 | -------------------------------------------------------------------------------- /docs/contributing/local_development.md: -------------------------------------------------------------------------------- 1 | # Local Development 2 | 3 | For contributors looking to get deeper into the API we suggest cloning the repository and checking out the unit 4 | tests for examples of how to call methods. Most classes and methods are documented, so finding your way around 5 | the code should hopefully be easy. 6 | 7 | ## Setup 8 | 9 | Flair requires python-3.9 or higher. To make sure our code also runs on the oldest supported 10 | python version, it is recommended to use python-3.9.x for flair development. 11 | 12 | Create a python environment of your preference and run: 13 | ```bash 14 | pip install -r requirements-dev.txt 15 | pip install -e . 16 | ``` 17 | 18 | ## Tests 19 | 20 | To only run typechecks and check the code formatting execute: 21 | 22 | ```bash 23 | pytest flair 24 | ``` 25 | 26 | To run all basic tests execute: 27 | 28 | ```bash 29 | pytest 30 | ``` 31 | 32 | To run integration tests execute: 33 | 34 | ```bash 35 | pytest --runintegration 36 | ``` 37 | 38 | The integration tests will train small models and therefore take more time. 39 | In general, it is recommended to ensure all basic tests are running through before testing the integration tests 40 | 41 | ## Code Formatting 42 | 43 | To ensure a standardized code style we use the formatter [black](https://github.com/ambv/black) and for standardizing imports we use [ruff](https://github.com/charliermarsh/ruff). 44 | If your code is not formatted properly, the tests will fail. 45 | 46 | We recommend configuring your IDE to run these formatters for you, but you can also always run them manually via 47 | `black . && ruff --fix .` in the flair root folder. -------------------------------------------------------------------------------- /docs/contributing/making_a_pull_request.md: -------------------------------------------------------------------------------- 1 | # Making a pull request 2 | 3 | We are happy to accept your contributions to make `flair` better and more awesome! To avoid unnecessary work on either 4 | side, please stick to the following process: 5 | 6 | 1. Check if there is already [an issue](https://github.com/flairNLP/flair/issues) for your concern. 7 | 2. If there is not, open a new one to start a discussion. We hate to close finished PRs! 8 | 3. If we decide your concern needs code changes, we would be happy to accept a pull request. Please consider the 9 | commit guidelines below. 10 | 11 | 12 | ## Git Commit Guidelines 13 | 14 | If there is already a ticket, use this number at the start of your commit message. 15 | Use meaningful commit messages that described what you did. 16 | 17 | **Example:** `GH-42: Added new type of embeddings: DocumentEmbedding.` -------------------------------------------------------------------------------- /docs/contributing/updating_documentation.md: -------------------------------------------------------------------------------- 1 | # Updating documentation 2 | 3 | 4 | ## What is good documentation? 5 | 6 | Good Documentation 7 | * Always refers to the enduser. Do not document *why* something is the way it is, but rather *how* to use it. 8 | * Doesn't lie and is always up-to-ate. Whenever code is updated, consider if the documentation needs to change accordingly to reflect reality. 9 | * Provides useful links whenever usable. Do not reference another object without linking it. 10 | 11 | 12 | ## Tutorials 13 | 14 | All tutorials are markdown files stored at [the tutorial folder](https://github.com/flairNLP/flair/tree/master/docs/tutorial). 15 | When adding a new tutorial, you must add its name to the `index.rst` file in the respective folder. 16 | We are using the [MyST parser](https://myst-parser.readthedocs.io/en/latest/syntax/typography.html) which adds 17 | some additional syntax over markdown. 18 | 19 | A tutorial should always be easy to understand, and reference api documentation for future readings. 20 | 21 | ```{note} 22 | You can reference symbols by defining links 23 | e.g.: ``[`flair.set_seed`](#flair.set_seed)`` for a function 24 | e.g.: `[entity-linking](project:../tutorial/tutorial-basics/entity-linking.md)` for another tutorial 25 | ``` 26 | 27 | ## Docstrings 28 | 29 | For docstrings we follow the [Google docstring](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) format. 30 | We do not need to specify types or default values, as those will be extracted from the function signature. 31 | 32 | Docstrings have usual a 1 liner giving a simple explanation of the object. Then there is a more detailed explanation followed **if required**. 33 | Ensure that you always use cross-references instead of just mentioning another object, 34 | e.g. ``:class:`flair.models.SequenceTagger` `` can be used to reference the SequenceTagger. 35 | 36 | 37 | ## Building the local docs 38 | 39 | For building the docs, 40 | 41 | * Ensure that you have everything committed. Local changes won't be used for building. 42 | * Install the build dependencies via `pip install -r docs/requirements.txt`. 43 | * In `docs/conf.py` temporarily add your local branch name to the `smv_branch_whitelist` pattern. 44 | E.g. if your branch is called `doc-page` `smv_branch_whitelist` need to have the value `r"^master|doc-page$"` 45 | * run `sphinx-multiversion docs doc_build/` to generate the docs. 46 | * open `doc_build//index.html` to view the docs. 47 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. _flair_docs_mainpage: 2 | 3 | .. title:: Home 4 | 5 | .. raw:: html 6 | :file: _templates/landing_page_styles.html 7 | 8 | .. raw:: html 9 | :file: _templates/landing-page-banner.html 10 | 11 | .. raw:: html 12 | :file: _templates/landing-page-illustrations.html 13 | 14 | .. toctree:: 15 | :maxdepth: 3 16 | :hidden: 17 | 18 | Tutorials 19 | API 20 | Contributing -------------------------------------------------------------------------------- /docs/legal-notice/index.rst: -------------------------------------------------------------------------------- 1 | Legal Notice 2 | ============ 3 | 4 | .. title:: Legal Notice 5 | 6 | .. raw:: html 7 | :file: ../_templates/legal-notice-content.html 8 | 9 | .. toctree:: 10 | :maxdepth: 3 11 | :hidden: 12 | 13 | Tutorials <../tutorial/index> 14 | API <../api/index> 15 | Contributing <../contributing/index> -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-github-style<=1.0.2 # 1.0.3 changes logic that breaks with sphinx-multiversion 2 | sphinx-autodoc-typehints 3 | myst-parser 4 | sphinx<8.0.0 5 | importlib-metadata 6 | sphinx-multiversion 7 | pydata-sphinx-theme<0.14 8 | sphinx_design 9 | sphinx-autosummary-autocollect 10 | 11 | # previous dependencies that are required to build docs for later versions too. 12 | semver 13 | gensim 14 | bpemb -------------------------------------------------------------------------------- /docs/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | 4 | 5 | .. _flair_tutorials: 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | intro 11 | tutorial-basics/index 12 | tutorial-training/index 13 | tutorial-embeddings/index 14 | tutorial-hunflair2/index -------------------------------------------------------------------------------- /docs/tutorial/intro.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | --- 4 | 5 | (getting_started)= 6 | 7 | # Quick Start 8 | 9 | Let's discover **Flair in less than 5 minutes**. 10 | 11 | ## Requirements and Installation 12 | 13 | In your favorite virtual environment, simply do: 14 | 15 | ``` 16 | pip install flair 17 | ``` 18 | 19 | Flair requires Python 3.9+. 20 | 21 | ## Example 1: Tag Entities in Text 22 | 23 | Let's run **named entity recognition** (NER) over the following example sentence: "_I love Berlin and New York._" 24 | 25 | Our goal is to identify names in this sentence, and their types. 26 | 27 | To do this, all you need is to make a [`Sentence`](#flair.data.Sentence) for this text, load a pre-trained model and use it to predict tags for the sentence: 28 | 29 | 30 | ```python 31 | from flair.data import Sentence 32 | from flair.nn import Classifier 33 | 34 | # make a sentence 35 | sentence = Sentence('I love Berlin and New York.') 36 | 37 | # load the NER tagger 38 | tagger = Classifier.load('ner') 39 | 40 | # run NER over sentence 41 | tagger.predict(sentence) 42 | 43 | # print the sentence with all annotations 44 | print(sentence) 45 | ``` 46 | 47 | This should print: 48 | 49 | ```console 50 | Sentence[7]: "I love Berlin and New York." → ["Berlin"/LOC, "New York"/LOC] 51 | ``` 52 | 53 | The output shows that both "Berlin" and "New York" were tagged as **location entities** (LOC) in this sentence. 54 | 55 | 56 | ## Example 2: Detect Sentiment 57 | 58 | Let's run **sentiment analysis** over the same sentence to determine whether it is POSITIVE or NEGATIVE. 59 | 60 | You can do this with essentially the same code as above. Just instead of loading the 'ner' model, you now load the 'sentiment' model: 61 | 62 | 63 | ```python 64 | from flair.data import Sentence 65 | from flair.nn import Classifier 66 | 67 | # make a sentence 68 | sentence = Sentence('I love Berlin and New York.') 69 | 70 | # load the sentiment tagger 71 | tagger = Classifier.load('sentiment') 72 | 73 | # run sentiment analysis over sentence 74 | tagger.predict(sentence) 75 | 76 | # print the sentence with all annotations 77 | print(sentence) 78 | 79 | ``` 80 | 81 | This should print: 82 | 83 | ```console 84 | Sentence[7]: "I love Berlin and New York." → POSITIVE (0.9982) 85 | ``` 86 | 87 | The output shows that the sentence "_I love Berlin and New York._" was tagged as having **POSITIVE** sentiment. 88 | 89 | 90 | ## Summary 91 | 92 | Congrats, you now know how to use Flair to find entities and detect sentiment! 93 | 94 | ## Next steps 95 | 96 | If you want to know more about Flair, next check out [Tutorial 1](tutorial-basics/) that gives an intro into the basics of Flair! -------------------------------------------------------------------------------- /docs/tutorial/tutorial-basics/how-predictions-work.md: -------------------------------------------------------------------------------- 1 | # How predictions work 2 | 3 | All taggers in Flair make predictions. This tutorial helps you understand what information you can get out of each prediction. 4 | 5 | ## Running example 6 | 7 | Let's use our standard NER example to illustrate how annotations work: 8 | 9 | ```python 10 | from flair.nn import Classifier 11 | from flair.data import Sentence 12 | 13 | # load the model 14 | tagger = Classifier.load('ner') 15 | 16 | # make a sentence 17 | sentence = Sentence('George Washington went to Washington.') 18 | 19 | # predict NER tags 20 | tagger.predict(sentence) 21 | 22 | # print the sentence with the tags 23 | print(sentence) 24 | ``` 25 | 26 | This should print: 27 | ```console 28 | Sentence: "George Washington went to Washington ." → ["George Washington"/PER, "Washington"/LOC] 29 | ``` 30 | 31 | Showing us that two entities are labeled in this sentence: "George Washington" as PER (person) and "Washington" 32 | as LOC (location.) 33 | 34 | ## Getting the predictions 35 | 36 | A common question that gets asked is **how to access these predictions directly**. You can do this by using 37 | the [`get_labels()`](#flair.data.Sentence.get_labels) method to iterate over all predictions: 38 | 39 | ```python 40 | for label in sentence.get_labels(): 41 | print(label) 42 | ``` 43 | This should print the two NER predictions: 44 | 45 | ```console 46 | Span[0:2]: "George Washington" → PER (0.9989) 47 | Span[4:5]: "Washington" → LOC (0.9942) 48 | ``` 49 | 50 | As you can see, each entity is printed, together with the predicted class. 51 | The confidence of the prediction is indicated as a score in brackets. 52 | 53 | ## Values for each prediction 54 | 55 | For each prediction, you can even **directly access** the label value, and all other attributes of the [`Label`](#flair.data.Label) class: 56 | 57 | ```python 58 | # iterate over all labels in the sentence 59 | for label in sentence.get_labels(): 60 | # print label value and score 61 | print(f'label.value is: "{label.value}"') 62 | print(f'label.score is: "{label.score}"') 63 | # access the data point to which label attaches and print its text 64 | print(f'the text of label.data_point is: "{label.data_point.text}"\n') 65 | ``` 66 | 67 | This should print: 68 | ```console 69 | label.value is: "PER" 70 | label.score is: "0.998886227607727" 71 | the text of label.data_point is: "George Washington" 72 | 73 | label.value is: "LOC" 74 | label.score is: "0.9942097663879395" 75 | the text of label.data_point is: "Washington" 76 | ``` 77 | 78 | 79 | ### Next 80 | 81 | Congrats, you've made your first predictions with Flair and accessed value and confidence scores of each prediction. 82 | 83 | Next, let's discuss specifically how to [predict named entities with Flair](tagging-entities.md). 84 | -------------------------------------------------------------------------------- /docs/tutorial/tutorial-basics/how-to-tag-corpus.md: -------------------------------------------------------------------------------- 1 | # How to tag a whole corpus 2 | 3 | Often, you may want to tag an entire text corpus. In this case, you need to split the corpus into sentences and pass a 4 | list of [`Sentence`](#flair.data.Sentence) objects to the [`Classifier.predict()`](#flair.nn.Classifier.predict) method. 5 | 6 | For instance, you can use a [`SentenceSplitter`](#flair.splitter.SentenceSplitter) to split your text: 7 | 8 | ```python 9 | from flair.nn import Classifier 10 | from flair.splitter import SegtokSentenceSplitter 11 | 12 | # example text with many sentences 13 | text = "This is a sentence. This is another sentence. I love Berlin." 14 | 15 | # initialize sentence splitter 16 | splitter = SegtokSentenceSplitter() 17 | 18 | # use splitter to split text into list of sentences 19 | sentences = splitter.split(text) 20 | 21 | # predict tags for sentences 22 | tagger = Classifier.load('ner') 23 | tagger.predict(sentences) 24 | 25 | # iterate through sentences and print predicted labels 26 | for sentence in sentences: 27 | print(sentence) 28 | ``` 29 | 30 | Using the `mini_batch_size` parameter of the [`Classifier.predict()`](#flair.nn.Classifier.predict) method, you can set the size of mini batches passed to the 31 | tagger. Depending on your resources, you might want to play around with this parameter to optimize speed. 32 | 33 | ### Next 34 | 35 | That's it - you completed tutorial 1! Congrats! 36 | 37 | You've learned how basic classes work and how to use Flair to make various predictions. 38 | 39 | Next, you can check out our tutorial on how to [train your own model](../tutorial-training/how-model-training-works.md). 40 | -------------------------------------------------------------------------------- /docs/tutorial/tutorial-basics/index.rst: -------------------------------------------------------------------------------- 1 | Tutorial 1: Basic Tagging 2 | ========================= 3 | 4 | This tutorial shows you in more detail how to tag your text and access predictions, 5 | and showcases various models we ship with Flair. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | basic-types 11 | how-predictions-work 12 | tagging-entities 13 | tagging-sentiment 14 | entity-linking 15 | entity-mention-linking 16 | part-of-speech-tagging 17 | other-models 18 | how-to-tag-corpus 19 | -------------------------------------------------------------------------------- /docs/tutorial/tutorial-basics/tagging-sentiment.md: -------------------------------------------------------------------------------- 1 | # Tagging sentiment 2 | 3 | This tutorials shows you how to do sentiment analysis in Flair. 4 | 5 | ## Tagging sentiment with our standard model 6 | 7 | Our standard sentiment analysis model uses distilBERT embeddings and was trained over a mix of corpora, notably 8 | the Amazon review corpus, and can thus handle a variety of domains and language. 9 | 10 | Let's use an example sentence: 11 | 12 | ```python 13 | from flair.nn import Classifier 14 | from flair.data import Sentence 15 | 16 | # load the model 17 | tagger = Classifier.load('sentiment') 18 | 19 | # make a sentence 20 | sentence = Sentence('This movie is not at all bad.') 21 | 22 | # predict NER tags 23 | tagger.predict(sentence) 24 | 25 | # print sentence with predicted tags 26 | print(sentence) 27 | ``` 28 | 29 | This should print: 30 | ```console 31 | Sentence[8]: "This movie is not at all bad." → POSITIVE (0.9929) 32 | ``` 33 | 34 | Showing us that the sentence overall is tagged to be of POSITIVE sentiment. 35 | 36 | ## Tagging sentiment with our fast model 37 | 38 | We also offer an RNN-based variant which is faster but less accurate. Use it like this: 39 | 40 | 41 | ```python 42 | from flair.nn import Classifier 43 | from flair.data import Sentence 44 | 45 | # load the model 46 | tagger = Classifier.load('sentiment-fast') 47 | 48 | # make a sentence 49 | sentence = Sentence('This movie is very bad.') 50 | 51 | # predict NER tags 52 | tagger.predict(sentence) 53 | 54 | # print sentence with predicted tags 55 | print(sentence) 56 | ``` 57 | 58 | This should print: 59 | ```console 60 | Sentence[6]: "This movie is very bad." → NEGATIVE (0.9999) 61 | ``` 62 | 63 | This indicates that the sentence is of NEGATIVE sentiment. As you can see, its the same code as above, just loading the 64 | '**sentiment-fast**' model instead of '**sentiment**'. 65 | 66 | 67 | ### List of Sentiment Models 68 | 69 | We end this section with a list of all models we currently ship with Flair: 70 | 71 | | ID | Language | Task | Training Dataset | Accuracy | 72 | | ------------- | ---- | ------------- |------------- |------------- | 73 | | 'sentiment' | English | detecting positive and negative sentiment (transformer-based) | movie and product reviews | **98.87** | 74 | | 'sentiment-fast' | English | detecting positive and negative sentiment (RNN-based) | movie and product reviews | **96.83**| 75 | | 'de-offensive-language' | German | detecting offensive language | [GermEval 2018 Task 1](https://projects.fzai.h-da.de/iggsa/projekt/) | **75.71** (Macro F1) | 76 | 77 | 78 | ### Next 79 | 80 | Congrats, you learned how to predict sentiment with Flair! 81 | 82 | Next, let's discuss how to [link entities to Wikipedia with Flair](entity-linking.md). 83 | 84 | -------------------------------------------------------------------------------- /docs/tutorial/tutorial-embeddings/index.rst: -------------------------------------------------------------------------------- 1 | Tutorial 3: Embeddings 2 | ====================== 3 | 4 | This tutorial shows you how to use Flair to produce embeddings for words and documents. 5 | Embeddings are vector representations that are useful for a variety of reasons. 6 | All Flair models are trained on top of embeddings, so if you want to train your own models, 7 | you should understand how embeddings work. 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | embeddings 13 | transformer-embeddings 14 | flair-embeddings 15 | classic-word-embeddings 16 | other-embeddings 17 | -------------------------------------------------------------------------------- /docs/tutorial/tutorial-hunflair2/index.rst: -------------------------------------------------------------------------------- 1 | Tutorial: HunFlair2 2 | =================== 3 | 4 | *HunFlair2* is a state-of-the-art named entity tagger and linker for biomedical texts. It comes with 5 | models for genes/proteins, chemicals, diseases, species and cell lines. *HunFlair2* 6 | builds on pretrained domain-specific language models and outperforms other biomedical 7 | NER tools on unseen corpora. 8 | 9 | .. toctree:: 10 | :glob: 11 | :maxdepth: 1 12 | 13 | overview 14 | tagging 15 | linking 16 | training-ner-models 17 | customize-linking 18 | -------------------------------------------------------------------------------- /docs/tutorial/tutorial-training/how-to-train-text-classifier.md: -------------------------------------------------------------------------------- 1 | # Train a Text Classifier 2 | 3 | This tutorial shows you how to train your own text classifier models with Flair. For instance, you 4 | could train your own sentiment analysis model, or offensive language detection model. 5 | 6 | 7 | ## Training a text classification model with transformers 8 | 9 | For text classification, you reach state-of-the-art scores by fine-tuning a transformer. 10 | 11 | Training a model is easy: load the appropriate corpus, make a label dictionary, then fine-tune a [`TextClassifier`](#flair.models.TextClassifier) 12 | model using the [`ModelTrainer.fine_tune()`](#flair.trainers.ModelTrainer.fine_tune) method. See the example script below: 13 | 14 | ```python 15 | from flair.data import Corpus 16 | from flair.datasets import TREC_6 17 | from flair.embeddings import TransformerDocumentEmbeddings 18 | from flair.models import TextClassifier 19 | from flair.trainers import ModelTrainer 20 | 21 | # 1. get the corpus 22 | corpus: Corpus = TREC_6() 23 | 24 | # 2. what label do we want to predict? 25 | label_type = 'question_class' 26 | 27 | # 3. create the label dictionary 28 | label_dict = corpus.make_label_dictionary(label_type=label_type) 29 | 30 | # 4. initialize transformer document embeddings (many models are available) 31 | document_embeddings = TransformerDocumentEmbeddings('distilbert-base-uncased', fine_tune=True) 32 | 33 | # 5. create the text classifier 34 | classifier = TextClassifier(document_embeddings, label_dictionary=label_dict, label_type=label_type) 35 | 36 | # 6. initialize trainer 37 | trainer = ModelTrainer(classifier, corpus) 38 | 39 | # 7. run training with fine-tuning 40 | trainer.fine_tune('resources/taggers/question-classification-with-transformer', 41 | learning_rate=5.0e-5, 42 | mini_batch_size=4, 43 | max_epochs=10, 44 | ) 45 | ``` 46 | 47 | Once the model is trained you can load it to predict the class of new sentences. Just call the [`predict`](#flair.nn.DefaultClassifier.predict) method of the model. 48 | 49 | ```python 50 | classifier = TextClassifier.load('resources/taggers/question-classification-with-transformer/final-model.pt') 51 | 52 | # create example sentence 53 | sentence = Sentence('Who built the Eiffel Tower ?') 54 | 55 | # predict class and print 56 | classifier.predict(sentence) 57 | 58 | print(sentence.labels) 59 | ``` 60 | 61 | 62 | ## Next 63 | 64 | Next, learn [how to train an entity linker](how-to-train-span-classifier.md). -------------------------------------------------------------------------------- /docs/tutorial/tutorial-training/index.rst: -------------------------------------------------------------------------------- 1 | Tutorial 2: Training models 2 | =========================== 3 | 4 | This tutorial illustrates how you can train your own state-of-the-art NLP models with Flair. 5 | 6 | .. toctree:: 7 | :glob: 8 | :maxdepth: 1 9 | 10 | how-model-training-works 11 | train-vs-fine-tune 12 | how-to-load-prepared-dataset 13 | how-to-load-custom-dataset 14 | how-to-train-sequence-tagger 15 | how-to-train-text-classifier 16 | how-to-train-span-classifier 17 | how-to-train-multitask-model 18 | -------------------------------------------------------------------------------- /docs/tutorial/tutorial-training/train-vs-fine-tune.md: -------------------------------------------------------------------------------- 1 | # Training vs fine-tuning 2 | 3 | There are two broad ways you train a model: The "classic" approach and the fine-tuning approach. This section 4 | explains the differences. 5 | 6 | 7 | ## Fine-Tuning 8 | 9 | Fine-tuning is the current state-of-the-art approach. The main idea is that you take a pre-trained language model that 10 | consists of (hundreds of) millions of trained parameters. To this language model you add a simple prediction head with 11 | randomly initialized weights. 12 | 13 | Since in this case, the vast majority of parameters in the model is already trained, you only need to "fine-tune" this 14 | model. This means: Very small learning rate (LR) and just a few epochs. You are essentially just minimally modifying 15 | the model to adapt it to the task you want to solve. 16 | 17 | Use this method by calling [`ModelTrainer.fine_tune()`](#flair.trainers.ModelTrainer.fine_tune). 18 | Since most models in Flair were trained this way, this is likely the approach you'll want to use. 19 | 20 | 21 | ## Training 22 | 23 | On the other hand, you should use the classic training approach if the majority of the trainable parameters in your 24 | model is randomly initialized. This can happen for instance if you freeze the model weights of the pre-trained language 25 | model, leaving only the randomly initialited prediction head as trainable parameters. This training approach is also 26 | referred to as "feature-based" or "probing" in some papers. 27 | 28 | Since the majority of parameters is randomly initialized, you need to fully train the model. This means: high learning 29 | rate and many epochs. 30 | 31 | Use this method by calling [`ModelTrainer.train()`](#flair.trainers.ModelTrainer.train) . 32 | 33 | ```{note} 34 | Another application of classic training is for linear probing of pre-trained language models. In this scenario, you 35 | "freeze" the weights of the language model (meaning that they cannot be changed) and add a prediction head that is 36 | trained from scratch. So, even though a language model is involved, its parameters are not trainable. This means that 37 | all trainable parameters in this scenario are randomly initialized, therefore necessitating the use of the classic 38 | training approach. 39 | ``` 40 | 41 | 42 | ## Paper 43 | 44 | If you are interested in an experimental comparison of the two above-mentioned approach, check out [our paper](https://arxiv.org/pdf/2011.06993) 45 | that compares fine-tuning to the feature-based approach. 46 | 47 | 48 | ## Next 49 | 50 | Next, learn how to load a [training dataset](how-to-load-prepared-dataset.md). -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | This folder contains actively maintained examples of use of Flair, organized along various NLP tasks. 4 | 5 | ## Table of Tasks 6 | 7 | | Task | Documentation 8 | | ------------------------------ | ------------- 9 | | Named Entity Recognition (NER) | [Here](ner/) 10 | | Multi GPU | [Here](multi_gpu/) 11 | -------------------------------------------------------------------------------- /examples/multi_gpu/README.md: -------------------------------------------------------------------------------- 1 | # Multi GPU 2 | 3 | Training can be distributed across multiple GPUs on a local machine when using 4 | [`ModelTrainer`](#flair.trainers.trainer.ModelTrainer). 5 | 6 | ## Example 7 | 8 | See the script `run_multi_gpu.py` and its comments. 9 | 10 | ## Tutorial 11 | 12 | There are 2 changes that are always required, as well as a few things to consider 13 | 14 | Always Required: 15 | 1) Pass the argument `multi_gpu=True` to your [`.train()`](#flair.trainers.trainer.ModelTrainer.train) or `.fine_tune()` 16 | 2) Wrap your code in [`launch_distributed`](#flair.distributed_utils.launch_distributed), e.g. 17 | `launch_distributed(main, *args)`. This spawns multiple processes, each driving a GPU 18 | 19 | Other considerations: 20 | - The corpus and other preprocessing must be the same on all processes. For example, if corpus initialization involves 21 | anything random, you should either 22 | - Set the random seed before initializing the corpus (e.g. [`flair.set_seed(42)`) OR 23 | - Initialize the corpus before calling `launch_distributed` and pass the corpus as an argument so it's serialized to 24 | all processes 25 | - The effective batch size will be larger by a factor of num_gpus 26 | - Each GPU will now process `mini_batch_size` examples before the optimizer steps, resulting in fewer total steps 27 | taken relative to training with a single device. To obtain comparable results between single/multi gpu, 28 | both mathematically, and in terms of wall time, consider the method in the example script. 29 | - Large batch sizes may be necessary to see faster runs, otherwise the communication overhead may dominate 30 | 31 | Only the parameter updates in the training process will be distributed across multiple GPUs. Evaluation and prediction 32 | are still done on a single device. 33 | -------------------------------------------------------------------------------- /examples/multi_gpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/examples/multi_gpu/__init__.py -------------------------------------------------------------------------------- /examples/multi_gpu/run_multi_gpu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import flair 4 | from flair.datasets import IMDB 5 | from flair.distributed_utils import launch_distributed 6 | from flair.embeddings import TransformerDocumentEmbeddings 7 | from flair.models import TextClassifier 8 | from flair.trainers import ModelTrainer 9 | 10 | 11 | def main(multi_gpu): 12 | # Note: Multi-GPU can affect corpus loading 13 | # This code will run multiple times -- each GPU gets its own process and each process runs this code. We need to 14 | # ensure that the corpus has the same elements and order on all processes, despite sampling. We do that by using 15 | # the same seed on all processes. 16 | flair.set_seed(42) 17 | 18 | corpus = IMDB() 19 | corpus.downsample(0.1) 20 | label_type = "sentiment" 21 | label_dictionary = corpus.make_label_dictionary(label_type) 22 | 23 | embeddings = TransformerDocumentEmbeddings(model="distilbert-base-uncased") 24 | model = TextClassifier(embeddings, label_type, label_dictionary=label_dictionary) 25 | 26 | # Note: Multi-GPU can affect choice of batch size. 27 | # In order to compare batch updates fairly between single and multi-GPU training, we should: 28 | # 1) Step the optimizer after the same number of examples to achieve com 29 | # 2) Process the same number of examples in each forward pass 30 | mini_batch_chunk_size = 32 # Make this as large as possible without running out of GPU-memory to pack device 31 | num_devices_when_distributing = max(torch.cuda.device_count(), 1) 32 | mini_batch_size = mini_batch_chunk_size if multi_gpu else mini_batch_chunk_size * num_devices_when_distributing 33 | # e.g. Suppose your machine has 2 GPUs. If multi_gpu=False, the first gpu will process 32 examples, then the 34 | # first gpu will process another 32 examples, then the optimizer will step. If multi_gpu=True, each gpu will 35 | # process 32 examples at the same time, then the optimizer will step. 36 | 37 | trainer = ModelTrainer(model, corpus) 38 | trainer.fine_tune( 39 | "resources/taggers/multi-gpu", 40 | multi_gpu=multi_gpu, # Required for multi-gpu 41 | max_epochs=2, 42 | mini_batch_chunk_size=mini_batch_chunk_size, 43 | mini_batch_size=mini_batch_size, 44 | ) 45 | 46 | 47 | if __name__ == "__main__": 48 | """Minimal example demonstrating how to train a model on multiple GPUs.""" 49 | multi_gpu = True 50 | 51 | if multi_gpu: 52 | launch_distributed(main, multi_gpu) # Required for multi-gpu 53 | else: 54 | main(multi_gpu) 55 | -------------------------------------------------------------------------------- /examples/ner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/examples/ner/__init__.py -------------------------------------------------------------------------------- /flair/class_utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import inspect 3 | from collections.abc import Iterable 4 | from types import ModuleType 5 | from typing import Any, Optional, Protocol, TypeVar, Union, overload 6 | 7 | T = TypeVar("T") 8 | 9 | 10 | class StringLike(Protocol): 11 | def __str__(self) -> str: ... 12 | 13 | 14 | def get_non_abstract_subclasses(cls: type[T]) -> Iterable[type[T]]: 15 | for subclass in cls.__subclasses__(): 16 | yield from get_non_abstract_subclasses(subclass) 17 | if inspect.isabstract(subclass): 18 | continue 19 | yield subclass 20 | 21 | 22 | def get_state_subclass_by_name(cls: type[T], cls_name: Optional[str]) -> type[T]: 23 | for sub_cls in get_non_abstract_subclasses(cls): 24 | if sub_cls.__name__ == cls_name: 25 | return sub_cls 26 | raise ValueError(f"Could not find any class with name '{cls_name}'") 27 | 28 | 29 | @overload 30 | def lazy_import(group: str, module: str, first_symbol: None) -> ModuleType: ... 31 | 32 | 33 | @overload 34 | def lazy_import(group: str, module: str, first_symbol: str, *symbols: str) -> list[Any]: ... 35 | 36 | 37 | def lazy_import( 38 | group: str, module: str, first_symbol: Optional[str] = None, *symbols: str 39 | ) -> Union[list[Any], ModuleType]: 40 | try: 41 | imported_module = importlib.import_module(module) 42 | except ImportError: 43 | raise ImportError( 44 | f"Could not import {module}. Please install the optional '{group}' dependency. Via 'pip install flair[{group}]'" 45 | ) 46 | if first_symbol is None: 47 | return imported_module 48 | symbols = (first_symbol, *symbols) 49 | 50 | return [getattr(imported_module, symbol) for symbol in symbols] 51 | -------------------------------------------------------------------------------- /flair/embeddings/__init__.py: -------------------------------------------------------------------------------- 1 | # Expose base classses 2 | from flair.embeddings.transformer import ( 3 | TransformerEmbeddings, 4 | TransformerJitDocumentEmbeddings, 5 | TransformerJitWordEmbeddings, 6 | TransformerOnnxDocumentEmbeddings, 7 | TransformerOnnxWordEmbeddings, 8 | ) 9 | 10 | from .base import Embeddings, ScalarMix 11 | 12 | # Expose document embedding classes 13 | from .document import ( 14 | DocumentCNNEmbeddings, 15 | DocumentEmbeddings, 16 | DocumentLMEmbeddings, 17 | DocumentPoolEmbeddings, 18 | DocumentRNNEmbeddings, 19 | DocumentTFIDFEmbeddings, 20 | SentenceTransformerDocumentEmbeddings, 21 | TransformerDocumentEmbeddings, 22 | ) 23 | 24 | # Expose image embedding classes 25 | from .image import ( 26 | ConvTransformNetworkImageEmbeddings, 27 | IdentityImageEmbeddings, 28 | ImageEmbeddings, 29 | NetworkImageEmbeddings, 30 | PrecomputedImageEmbeddings, 31 | ) 32 | 33 | # Expose legacy embedding classes 34 | from .legacy import ( 35 | CharLMEmbeddings, 36 | DocumentLSTMEmbeddings, 37 | DocumentMeanEmbeddings, 38 | ELMoEmbeddings, 39 | ) 40 | 41 | # Expose token embedding classes 42 | from .token import ( 43 | BytePairEmbeddings, 44 | CharacterEmbeddings, 45 | FastTextEmbeddings, 46 | FlairEmbeddings, 47 | HashEmbeddings, 48 | MuseCrosslingualEmbeddings, 49 | NILCEmbeddings, 50 | OneHotEmbeddings, 51 | PooledFlairEmbeddings, 52 | StackedEmbeddings, 53 | TokenEmbeddings, 54 | TransformerWordEmbeddings, 55 | WordEmbeddings, 56 | ) 57 | 58 | __all__ = [ 59 | "BPEmbSerializable", 60 | "BytePairEmbeddings", 61 | "CharLMEmbeddings", 62 | "CharacterEmbeddings", 63 | "ConvTransformNetworkImageEmbeddings", 64 | "DocumentCNNEmbeddings", 65 | "DocumentEmbeddings", 66 | "DocumentLMEmbeddings", 67 | "DocumentLSTMEmbeddings", 68 | "DocumentMeanEmbeddings", 69 | "DocumentPoolEmbeddings", 70 | "DocumentRNNEmbeddings", 71 | "DocumentTFIDFEmbeddings", 72 | "ELMoEmbeddings", 73 | "Embeddings", 74 | "FastTextEmbeddings", 75 | "FlairEmbeddings", 76 | "HashEmbeddings", 77 | "IdentityImageEmbeddings", 78 | "ImageEmbeddings", 79 | "MuseCrosslingualEmbeddings", 80 | "NILCEmbeddings", 81 | "NetworkImageEmbeddings", 82 | "OneHotEmbeddings", 83 | "PooledFlairEmbeddings", 84 | "PrecomputedImageEmbeddings", 85 | "ScalarMix", 86 | "SentenceTransformerDocumentEmbeddings", 87 | "StackedEmbeddings", 88 | "TokenEmbeddings", 89 | "TransformerDocumentEmbeddings", 90 | "TransformerEmbeddings", 91 | "TransformerJitDocumentEmbeddings", 92 | "TransformerJitWordEmbeddings", 93 | "TransformerOnnxDocumentEmbeddings", 94 | "TransformerOnnxWordEmbeddings", 95 | "TransformerWordEmbeddings", 96 | "WordEmbeddings", 97 | ] 98 | -------------------------------------------------------------------------------- /flair/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .entity_linker_model import SpanClassifier 2 | from .entity_mention_linking import EntityMentionLinker 3 | from .language_model import LanguageModel 4 | from .lemmatizer_model import Lemmatizer 5 | from .multitask_model import MultitaskModel 6 | from .pairwise_classification_model import TextPairClassifier 7 | from .pairwise_regression_model import TextPairRegressor 8 | from .prefixed_tagger import PrefixedSequenceTagger # This import has to be after SequenceTagger! 9 | from .regexp_tagger import RegexpTagger 10 | from .relation_classifier_model import RelationClassifier 11 | from .relation_extractor_model import RelationExtractor 12 | from .sequence_tagger_model import SequenceTagger 13 | from .tars_model import FewshotClassifier, TARSClassifier, TARSTagger 14 | from .text_classification_model import TextClassifier 15 | from .text_regression_model import TextRegressor 16 | from .triple_classification_model import TextTripleClassifier 17 | from .word_tagger_model import TokenClassifier, WordTagger 18 | 19 | __all__ = [ 20 | "EntityMentionLinker", 21 | "FewshotClassifier", 22 | "LanguageModel", 23 | "Lemmatizer", 24 | "MultitaskModel", 25 | "PrefixedSequenceTagger", 26 | "RegexpTagger", 27 | "RelationClassifier", 28 | "RelationExtractor", 29 | "SequenceTagger", 30 | "SpanClassifier", 31 | "TARSClassifier", 32 | "TARSTagger", 33 | "TextClassifier", 34 | "TextPairClassifier", 35 | "TextPairRegressor", 36 | "TextRegressor", 37 | "TextTripleClassifier", 38 | "TokenClassifier", 39 | "WordTagger", 40 | ] 41 | -------------------------------------------------------------------------------- /flair/models/sequence_tagger_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/models/sequence_tagger_utils/__init__.py -------------------------------------------------------------------------------- /flair/models/sequence_tagger_utils/crf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import flair 4 | 5 | START_TAG: str = "" 6 | STOP_TAG: str = "" 7 | 8 | 9 | class CRF(torch.nn.Module): 10 | """Conditional Random Field. 11 | 12 | Conditional Random Field Implementation according to sgrvinod (https://github.com/sgrvinod). 13 | Classifier which predicts single tag / class / label for given word based on not just the word, 14 | but also on previous seen annotations. 15 | """ 16 | 17 | def __init__(self, tag_dictionary, tagset_size: int, init_from_state_dict: bool) -> None: 18 | """Initialize the Conditional Random Field. 19 | 20 | Args: 21 | tag_dictionary: tag dictionary in order to find ID for start and stop tags 22 | tagset_size: number of tag from tag dictionary 23 | init_from_state_dict: whether we load pretrained model from state dict 24 | """ 25 | super().__init__() 26 | 27 | self.tagset_size = tagset_size 28 | # Transitions are used in the following way: transitions[to, from]. 29 | self.transitions = torch.nn.Parameter(torch.randn(tagset_size, tagset_size)) 30 | # If we are not using a pretrained model and train a fresh one, we need to set transitions from any tag 31 | # to START-tag and from STOP-tag to any other tag to -10000. 32 | if not init_from_state_dict: 33 | self.transitions.detach()[tag_dictionary.get_idx_for_item(START_TAG), :] = -10000 34 | 35 | self.transitions.detach()[:, tag_dictionary.get_idx_for_item(STOP_TAG)] = -10000 36 | self.to(flair.device) 37 | 38 | def forward(self, features: torch.Tensor) -> torch.Tensor: 39 | """Forward propagation of Conditional Random Field. 40 | 41 | Args: 42 | features: output from RNN / Linear layer in shape (batch size, seq len, hidden size) 43 | 44 | Returns: CRF scores (emission scores for each token + transitions prob from previous state) in shape (batch_size, seq len, tagset size, tagset size) 45 | """ 46 | batch_size, seq_len = features.size()[:2] 47 | 48 | emission_scores = features 49 | emission_scores = emission_scores.unsqueeze(-1).expand(batch_size, seq_len, self.tagset_size, self.tagset_size) 50 | 51 | crf_scores = emission_scores + self.transitions.unsqueeze(0).unsqueeze(0) 52 | return crf_scores 53 | -------------------------------------------------------------------------------- /flair/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .decoder import DeepNCMDecoder, LabelVerbalizerDecoder, PrototypicalDecoder 2 | from .dropout import LockedDropout, WordDropout 3 | from .model import Classifier, DefaultClassifier, Model 4 | 5 | __all__ = [ 6 | "Classifier", 7 | "DeepNCMDecoder", 8 | "DefaultClassifier", 9 | "LabelVerbalizerDecoder", 10 | "LockedDropout", 11 | "Model", 12 | "PrototypicalDecoder", 13 | "WordDropout", 14 | ] 15 | -------------------------------------------------------------------------------- /flair/nn/distance/__init__.py: -------------------------------------------------------------------------------- 1 | from .cosine import CosineDistance, LogitCosineDistance, NegativeScaledDotProduct 2 | from .euclidean import EuclideanDistance, EuclideanMean 3 | from .hyperbolic import HyperbolicDistance, HyperbolicMean 4 | 5 | __all__ = [ 6 | "CosineDistance", 7 | "EuclideanDistance", 8 | "EuclideanMean", 9 | "HyperbolicDistance", 10 | "HyperbolicMean", 11 | "LogitCosineDistance", 12 | "NegativeScaledDotProduct", 13 | ] 14 | -------------------------------------------------------------------------------- /flair/nn/distance/cosine.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # Source: https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/util.py#L23 4 | 5 | 6 | def dot_product(a: torch.Tensor, b: torch.Tensor, normalize=False): 7 | """Computes dot product for pairs of vectors. 8 | 9 | Args: 10 | a: the left tensor 11 | b: the right tensor 12 | normalize: Vectors are normalized (leads to cosine similarity) 13 | 14 | Returns: Matrix with res[i][j] = dot_product(a[i], b[j]) 15 | """ 16 | if len(a.shape) == 1: 17 | a = a.unsqueeze(0) 18 | 19 | if len(b.shape) == 1: 20 | b = b.unsqueeze(0) 21 | 22 | if normalize: 23 | a = torch.nn.functional.normalize(a, p=2, dim=1) 24 | b = torch.nn.functional.normalize(b, p=2, dim=1) 25 | 26 | return torch.mm(a, b.transpose(0, 1)) 27 | 28 | 29 | class CosineDistance(torch.nn.Module): 30 | def forward(self, a, b): 31 | return -dot_product(a, b, normalize=True) 32 | 33 | 34 | class LogitCosineDistance(torch.nn.Module): 35 | def forward(self, a, b): 36 | return torch.logit(0.5 - 0.5 * dot_product(a, b, normalize=True)) 37 | 38 | 39 | class NegativeScaledDotProduct(torch.nn.Module): 40 | def forward(self, a, b): 41 | sqrt_d = torch.sqrt(torch.tensor(a.size(-1))) 42 | return -dot_product(a, b, normalize=False) / sqrt_d 43 | -------------------------------------------------------------------------------- /flair/nn/distance/euclidean.py: -------------------------------------------------------------------------------- 1 | """Euclidean distances implemented in pytorch. 2 | 3 | This module was copied from the repository the following repository: 4 | https://github.com/asappresearch/dynamic-classification 5 | 6 | It contains the code from the paper "Metric Learning for Dynamic Text 7 | Classification". 8 | 9 | https://arxiv.org/abs/1911.01026 10 | 11 | In case this file is modified, please consider contributing to the original 12 | repository. 13 | 14 | It was published under MIT License: 15 | https://github.com/asappresearch/dynamic-classification/blob/master/LICENSE.md 16 | 17 | Source: https://github.com/asappresearch/dynamic-classification/blob/55beb5a48406c187674bea40487c011e8fa45aab/distance/euclidean.py 18 | """ 19 | 20 | import torch 21 | from torch import Tensor, nn 22 | 23 | 24 | class EuclideanDistance(nn.Module): 25 | """Implement a EuclideanDistance object.""" 26 | 27 | def forward(self, mat_1: Tensor, mat_2: Tensor) -> Tensor: 28 | """Returns the squared euclidean distance between each element in mat_1 and each element in mat_2. 29 | 30 | Parameters 31 | ---------- 32 | mat_1: torch.Tensor 33 | matrix of shape (n_1, n_features) 34 | mat_2: torch.Tensor 35 | matrix of shape (n_2, n_features) 36 | 37 | Returns: 38 | ------- 39 | dist: torch.Tensor 40 | distance matrix of shape (n_1, n_2) 41 | 42 | """ 43 | return torch.cdist(mat_1, mat_2).pow(2) 44 | 45 | 46 | class EuclideanMean(nn.Module): 47 | """Implement a EuclideanMean object.""" 48 | 49 | def forward(self, data: Tensor) -> Tensor: 50 | """Performs a forward pass through the network. 51 | 52 | Parameters 53 | ---------- 54 | data : torch.Tensor 55 | The input data, as a float tensor 56 | 57 | Returns: 58 | ------- 59 | torch.Tensor 60 | The encoded output, as a float tensor 61 | 62 | """ 63 | return data.mean(0) 64 | -------------------------------------------------------------------------------- /flair/nn/dropout.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class LockedDropout(torch.nn.Module): 5 | """Implementation of locked (or variational) dropout. 6 | 7 | Randomly drops out entire parameters in embedding space. 8 | """ 9 | 10 | def __init__(self, dropout_rate=0.5, batch_first=True, inplace=False) -> None: 11 | super().__init__() 12 | self.dropout_rate = dropout_rate 13 | self.batch_first = batch_first 14 | self.inplace = inplace 15 | 16 | def forward(self, x): 17 | if not self.training or not self.dropout_rate: 18 | return x 19 | 20 | if not self.batch_first: 21 | m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - self.dropout_rate) 22 | else: 23 | m = x.data.new(x.size(0), 1, x.size(2)).bernoulli_(1 - self.dropout_rate) 24 | 25 | mask = torch.autograd.Variable(m, requires_grad=False) / (1 - self.dropout_rate) 26 | mask = mask.expand_as(x) 27 | return mask * x 28 | 29 | def extra_repr(self): 30 | inplace_str = ", inplace" if self.inplace else "" 31 | return f"p={self.dropout_rate}{inplace_str}" 32 | 33 | 34 | class WordDropout(torch.nn.Module): 35 | """Implementation of word dropout. 36 | 37 | Randomly drops out entire words (or characters) in embedding space. 38 | """ 39 | 40 | def __init__(self, dropout_rate=0.05, inplace=False) -> None: 41 | super().__init__() 42 | self.dropout_rate = dropout_rate 43 | self.inplace = inplace 44 | 45 | def forward(self, x): 46 | if not self.training or not self.dropout_rate: 47 | return x 48 | 49 | m = x.data.new(x.size(0), x.size(1), 1).bernoulli_(1 - self.dropout_rate) 50 | 51 | mask = torch.autograd.Variable(m, requires_grad=False) 52 | return mask * x 53 | 54 | def extra_repr(self): 55 | inplace_str = ", inplace" if self.inplace else "" 56 | return f"p={self.dropout_rate}{inplace_str}" 57 | -------------------------------------------------------------------------------- /flair/nn/multitask.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable 2 | from typing import Union 3 | 4 | from flair.data import Corpus, MultiCorpus 5 | from flair.models import MultitaskModel 6 | from flair.nn import Classifier, Model 7 | 8 | 9 | def make_multitask_model_and_corpus( 10 | mapping: Iterable[Union[tuple[Classifier, Corpus], tuple[Classifier, Corpus, float]]] 11 | ) -> tuple[Model, Corpus]: 12 | models = [] 13 | corpora = [] 14 | loss_factors = [] 15 | ids = [] 16 | 17 | for task_id, _map in enumerate(mapping): 18 | models.append(_map[0]) 19 | corpora.append(_map[1]) 20 | if len(_map) == 3: 21 | loss_factors.append(_map[2]) 22 | else: 23 | loss_factors.append(1.0) 24 | 25 | ids.append(f"Task_{task_id}") 26 | 27 | return MultitaskModel(models=models, task_ids=ids, loss_factors=loss_factors), MultiCorpus(corpora, ids) 28 | -------------------------------------------------------------------------------- /flair/nn/recurrent.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | rnn_layers = {"lstm": (nn.LSTM, 2), "gru": (nn.GRU, 1)} 4 | 5 | 6 | def create_recurrent_layer(layer_type, initial_size, hidden_size, nlayers, dropout=0, **kwargs): 7 | layer_type = layer_type.lower() 8 | assert layer_type in rnn_layers 9 | module, hidden_count = rnn_layers[layer_type] 10 | 11 | if nlayers == 1: 12 | dropout = 0 13 | 14 | return module(initial_size, hidden_size, nlayers, dropout=dropout, **kwargs), hidden_count 15 | -------------------------------------------------------------------------------- /flair/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/py.typed -------------------------------------------------------------------------------- /flair/trainers/__init__.py: -------------------------------------------------------------------------------- 1 | from .language_model_trainer import LanguageModelTrainer, TextCorpus 2 | from .trainer import ModelTrainer 3 | 4 | __all__ = ["LanguageModelTrainer", "ModelTrainer", "TextCorpus"] 5 | -------------------------------------------------------------------------------- /flair/trainers/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BasePlugin, Pluggable, TrainerPlugin, TrainingInterrupt 2 | from .functional.anneal_on_plateau import AnnealingPlugin 3 | from .functional.checkpoints import CheckpointPlugin 4 | from .functional.deepncm_trainer_plugin import DeepNCMPlugin 5 | from .functional.linear_scheduler import LinearSchedulerPlugin 6 | from .functional.reduce_transformer_vocab import ReduceTransformerVocabPlugin 7 | from .functional.weight_extractor import WeightExtractorPlugin 8 | from .loggers.clearml_logger import ClearmlLoggerPlugin 9 | from .loggers.log_file import LogFilePlugin 10 | from .loggers.loss_file import LossFilePlugin 11 | from .loggers.metric_history import MetricHistoryPlugin 12 | from .loggers.tensorboard import TensorboardLogger 13 | from .metric_records import MetricName, MetricRecord 14 | 15 | __all__ = [ 16 | "AnnealingPlugin", 17 | "BasePlugin", 18 | "CheckpointPlugin", 19 | "ClearmlLoggerPlugin", 20 | "DeepNCMPlugin", 21 | "LinearSchedulerPlugin", 22 | "LogFilePlugin", 23 | "LossFilePlugin", 24 | "MetricHistoryPlugin", 25 | "MetricName", 26 | "MetricRecord", 27 | "Pluggable", 28 | "ReduceTransformerVocabPlugin", 29 | "TensorboardLogger", 30 | "TrainerPlugin", 31 | "TrainingInterrupt", 32 | "WeightExtractorPlugin", 33 | ] 34 | -------------------------------------------------------------------------------- /flair/trainers/plugins/functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/trainers/plugins/functional/__init__.py -------------------------------------------------------------------------------- /flair/trainers/plugins/functional/checkpoints.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any 3 | 4 | from flair.trainers.plugins.base import TrainerPlugin 5 | 6 | log = logging.getLogger("flair") 7 | 8 | 9 | class CheckpointPlugin(TrainerPlugin): 10 | def __init__( 11 | self, 12 | save_model_each_k_epochs, 13 | save_optimizer_state, 14 | base_path, 15 | ) -> None: 16 | super().__init__() 17 | self.save_optimizer_state = save_optimizer_state 18 | self.save_model_each_k_epochs = save_model_each_k_epochs 19 | self.base_path = base_path 20 | 21 | @TrainerPlugin.hook 22 | def after_training_epoch(self, epoch, **kw): 23 | """Saves the model each k epochs.""" 24 | if self.save_model_each_k_epochs > 0 and epoch % self.save_model_each_k_epochs == 0: 25 | log.info( 26 | f"Saving model at current epoch since 'save_model_each_k_epochs={self.save_model_each_k_epochs}' " 27 | f"was set" 28 | ) 29 | model_name = "model_epoch_" + str(epoch) + ".pt" 30 | 31 | # Use trainer's _save_model method - we have access to trainer through self.trainer 32 | self.trainer._save_model(self.base_path / model_name, save_optimizer_state=self.save_optimizer_state) 33 | 34 | @property 35 | def attach_to_all_processes(self) -> bool: 36 | return False 37 | 38 | def get_state(self) -> dict[str, Any]: 39 | return { 40 | **super().get_state(), 41 | "base_path": str(self.base_path), 42 | "save_model_each_k_epochs": self.save_model_each_k_epochs, 43 | "save_optimizer_state": self.save_optimizer_state, 44 | } 45 | -------------------------------------------------------------------------------- /flair/trainers/plugins/functional/deepncm_trainer_plugin.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable 2 | 3 | import torch 4 | 5 | from flair.models import MultitaskModel 6 | from flair.nn import DeepNCMDecoder 7 | from flair.trainers.plugins.base import TrainerPlugin 8 | 9 | 10 | class DeepNCMPlugin(TrainerPlugin): 11 | """Plugin for training DeepNCMClassifier. 12 | 13 | Handles both multitask and single-task scenarios. 14 | """ 15 | 16 | @property 17 | def decoders(self) -> Iterable[DeepNCMDecoder]: 18 | """Iterator over all DeepNCMDecoder decoders in the trainer.""" 19 | model = self.trainer.model 20 | 21 | models = model.tasks.values() if isinstance(model, MultitaskModel) else [model] 22 | 23 | for sub_model in models: 24 | if hasattr(sub_model, "decoder") and isinstance(sub_model.decoder, DeepNCMDecoder): 25 | yield sub_model.decoder 26 | 27 | @TrainerPlugin.hook 28 | def after_training_epoch(self, **kwargs): 29 | """Reset class counts after each training epoch.""" 30 | for decoder in self.decoders: 31 | if decoder.mean_update_method == "condensation": 32 | decoder.class_counts.data = torch.ones_like(decoder.class_counts) 33 | 34 | @TrainerPlugin.hook 35 | def after_training_batch(self, **kwargs): 36 | """Update prototypes after each training batch.""" 37 | for decoder in self.decoders: 38 | decoder.update_prototypes() 39 | 40 | def __str__(self) -> str: 41 | return "DeepNCMPlugin" 42 | -------------------------------------------------------------------------------- /flair/trainers/plugins/functional/weight_extractor.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from flair.trainers.plugins.base import TrainerPlugin 4 | from flair.training_utils import WeightExtractor 5 | 6 | 7 | class WeightExtractorPlugin(TrainerPlugin): 8 | """Simple Plugin for weight extraction.""" 9 | 10 | def __init__(self, base_path) -> None: 11 | super().__init__() 12 | self.base_path = base_path 13 | self.weight_extractor = WeightExtractor(base_path) 14 | 15 | @TrainerPlugin.hook 16 | def after_training_batch(self, batch_no, epoch, total_number_of_batches, **kw): 17 | """Extracts weights.""" 18 | modulo = max(1, int(total_number_of_batches / 10)) 19 | iteration = epoch * total_number_of_batches + batch_no 20 | 21 | if (iteration + 1) % modulo == 0: 22 | self.weight_extractor.extract_weights(self.model.state_dict(), iteration) 23 | 24 | @property 25 | def attach_to_all_processes(self) -> bool: 26 | return False 27 | 28 | def get_state(self) -> dict[str, Any]: 29 | return { 30 | **super().get_state(), 31 | "base_path": str(self.base_path), 32 | } 33 | -------------------------------------------------------------------------------- /flair/trainers/plugins/loggers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/trainers/plugins/loggers/__init__.py -------------------------------------------------------------------------------- /flair/trainers/plugins/loggers/clearml_logger.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from flair.trainers.plugins.base import TrainerPlugin 4 | from flair.trainers.plugins.metric_records import MetricRecord 5 | 6 | 7 | class ClearmlLoggerPlugin(TrainerPlugin): 8 | def __init__(self, task_id_or_task: Any): 9 | if isinstance(task_id_or_task, str): 10 | self.task_id = task_id_or_task 11 | self.task = None 12 | else: 13 | self.task = task_id_or_task 14 | self.task_id = self.task.task_id 15 | super().__init__() 16 | 17 | @property 18 | def logger(self): 19 | try: 20 | import clearml 21 | except ImportError: 22 | raise ImportError( 23 | "Please install clearml 1.11.0 or higher before using the clearml plugin" 24 | "otherwise you can remove the clearml plugin from the training or model card." 25 | ) 26 | if self.task is None: 27 | self.task = clearml.Task.get_task(task_id=self.task_id) 28 | return self.task.get_logger() 29 | 30 | @TrainerPlugin.hook 31 | def metric_recorded(self, record: MetricRecord) -> None: 32 | record_name = ".".join(record.name) 33 | 34 | if record.is_scalar: 35 | self.logger.report_scalar(record_name, record_name, record.value, record.global_step) 36 | elif record.is_scalar_list: 37 | for i, v in enumerate(record.value): 38 | self.logger.report_scalar(record_name, f"{record_name}_{i}", v, record.global_step) 39 | elif record.is_string: 40 | self.logger.report_text(record.value, print_console=False) 41 | elif record.is_histogram: 42 | self.logger.report_histogram(record_name, record_name, record.value, record.global_step) 43 | 44 | @property 45 | def attach_to_all_processes(self) -> bool: 46 | return False 47 | -------------------------------------------------------------------------------- /flair/trainers/plugins/loggers/log_file.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import Any 4 | 5 | from flair.trainers.plugins.base import TrainerPlugin 6 | from flair.training_utils import add_file_handler 7 | 8 | log = logging.getLogger("flair") 9 | 10 | 11 | class LogFilePlugin(TrainerPlugin): 12 | """Plugin for the training.log file.""" 13 | 14 | def __init__(self, base_path) -> None: 15 | super().__init__() 16 | self.base_path = base_path 17 | self.log_handler = add_file_handler(log, Path(base_path) / "training.log") 18 | 19 | @TrainerPlugin.hook("_training_exception", "after_training") 20 | def close_file_handler(self, **kw): 21 | self.log_handler.close() 22 | log.removeHandler(self.log_handler) 23 | 24 | @property 25 | def attach_to_all_processes(self) -> bool: 26 | return False 27 | 28 | def get_state(self) -> dict[str, Any]: 29 | return {**super().get_state(), "base_path": str(self.base_path)} 30 | -------------------------------------------------------------------------------- /flair/trainers/plugins/loggers/metric_history.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections.abc import Mapping 3 | from typing import Any 4 | 5 | from flair.trainers.plugins.base import TrainerPlugin 6 | 7 | log = logging.getLogger("flair") 8 | 9 | 10 | default_metrics_to_collect = { 11 | ("train", "loss"): "train_loss_history", 12 | ("dev", "score"): "dev_score_history", 13 | ("dev", "loss"): "dev_loss_history", 14 | } 15 | 16 | 17 | class MetricHistoryPlugin(TrainerPlugin): 18 | def __init__(self, metrics_to_collect: Mapping = default_metrics_to_collect) -> None: 19 | super().__init__() 20 | 21 | self.metric_history: dict[str, list] = {} 22 | self.metrics_to_collect: Mapping = metrics_to_collect 23 | for target in self.metrics_to_collect.values(): 24 | self.metric_history[target] = [] 25 | 26 | @TrainerPlugin.hook 27 | def metric_recorded(self, record): 28 | if tuple(record.name) in self.metrics_to_collect: 29 | target = self.metrics_to_collect[tuple(record.name)] 30 | self.metric_history[target].append(record.value) 31 | 32 | @TrainerPlugin.hook 33 | def after_training(self, **kw): 34 | """Returns metric history.""" 35 | self.trainer.return_values.update(self.metric_history) 36 | 37 | @property 38 | def attach_to_all_processes(self) -> bool: 39 | return False 40 | 41 | def get_state(self) -> dict[str, Any]: 42 | return { 43 | **super().get_state(), 44 | "metrics_to_collect": dict(self.metrics_to_collect), 45 | } 46 | -------------------------------------------------------------------------------- /flair/visual/__init__.py: -------------------------------------------------------------------------------- 1 | from .activations import Highlighter 2 | from .manifold import Visualizer 3 | 4 | __all__ = ["Highlighter", "Visualizer"] 5 | -------------------------------------------------------------------------------- /flair/visual/activations.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | 4 | class Highlighter: 5 | def __init__(self) -> None: 6 | self.color_map = [ 7 | "#ff0000", 8 | "#ff4000", 9 | "#ff8000", 10 | "#ffbf00", 11 | "#ffff00", 12 | "#bfff00", 13 | "#80ff00", 14 | "#40ff00", 15 | "#00ff00", 16 | "#00ff40", 17 | "#00ff80", 18 | "#00ffbf", 19 | "#00ffff", 20 | "#00bfff", 21 | "#0080ff", 22 | "#0040ff", 23 | "#0000ff", 24 | "#4000ff", 25 | "#8000ff", 26 | "#bf00ff", 27 | "#ff00ff", 28 | "#ff00bf", 29 | "#ff0080", 30 | "#ff0040", 31 | "#ff0000", 32 | ] 33 | 34 | def highlight(self, activation, text): 35 | activation = activation.detach().cpu().numpy() 36 | 37 | step_size = (max(activation) - min(activation)) / len(self.color_map) 38 | 39 | lookup = numpy.array(list(numpy.arange(min(activation), max(activation), step_size))) 40 | 41 | colors = [] 42 | 43 | for _i, act in enumerate(activation): 44 | try: 45 | colors.append(self.color_map[numpy.where(act > lookup)[0][-1]]) 46 | except IndexError: 47 | colors.append(len(self.color_map) - 1) 48 | 49 | str_ = "

" 50 | 51 | for i, (char, color) in enumerate(zip(list(text), colors)): 52 | str_ += self._render(char, color) 53 | 54 | if i % 100 == 0 and i > 0: 55 | str_ += "
" 56 | 57 | return str_ 58 | 59 | def highlight_selection(self, activations, text, file_="resources/data/highlight.html", n=10): 60 | ix = numpy.random.default_rng().choice(activations.shape[1], size=n) 61 | 62 | rendered = "" 63 | 64 | for i in ix: 65 | rendered += self.highlight(activations[:, i], text) 66 | 67 | with open(file_, "w") as f: 68 | f.write(rendered) 69 | 70 | @staticmethod 71 | def _render(char, color): 72 | return f'{char}' 73 | -------------------------------------------------------------------------------- /flair/visual/tree_printer.py: -------------------------------------------------------------------------------- 1 | from pptree import print_tree 2 | 3 | from flair.data import Sentence, Token 4 | 5 | 6 | class NodeToken: 7 | def __init__(self, token: Token, tag_type: str) -> None: 8 | self.token: Token = token 9 | self.tag_type: str = tag_type 10 | self.children: list[NodeToken] = [] 11 | 12 | def set_haed(self, parent): 13 | parent.children.append(self) 14 | 15 | def __str__(self) -> str: 16 | return f" {self.token.text}({self.token.get_labels(self.tag_type)[0].value}) " 17 | 18 | 19 | def tree_printer(sentence: Sentence, tag_type: str): 20 | tree: list[NodeToken] = [NodeToken(token, tag_type) for token in sentence] 21 | for x in tree: 22 | if x.token.head_id != 0: 23 | head_token = x.token.get_head() 24 | 25 | for y in tree: 26 | if y.token == head_token: 27 | x.set_haed(y) 28 | else: 29 | root_node = x 30 | print_tree(root_node, "children") 31 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | black[jupyter]==24.2.* 2 | konoha[janome]<6.0.0 3 | mypy>=1.2.0 4 | pytest>=7.3.1 5 | pytest-black-ng==0.4.* 6 | pytest-github-actions-annotate-failures>=0.1.8 7 | pytest-mypy>=0.10.3 8 | pytest-ruff==0.3.* 9 | ruff==0.7.* 10 | types-dataclasses>=0.6.6 11 | types-Deprecated>=1.2.9.2 12 | types-requests>=2.28.11.17 13 | types-tabulate>=0.9.0.2 14 | pyab3p 15 | transformers!=4.40.1,!=4.40.0 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.20.27 2 | conllu>=4.0,<5.0.0 3 | deprecated>=1.2.13 4 | ftfy>=6.1.0 5 | gdown>=4.4.0 6 | huggingface-hub>=0.10.0 7 | langdetect>=1.0.9 8 | lxml>=4.8.0 9 | matplotlib>=2.2.3 10 | more-itertools>=8.13.0 11 | mpld3>=0.3 12 | pptree>=3.1 13 | python-dateutil>=2.8.2 14 | pytorch_revgrad>=0.2.0 15 | regex>=2022.1.18 16 | scikit-learn>=1.0.2 17 | segtok>=1.5.11 18 | sqlitedict>=2.0.0 19 | tabulate>=0.8.10 20 | torch>=1.13.1 21 | tqdm>=4.63.0 22 | transformer-smaller-training-vocab>=0.2.3 23 | transformers[sentencepiece]>=4.25.0,<5.0.0 24 | wikipedia-api>=0.5.7 25 | bioc<3.0.0,>=2.0.0 26 | -------------------------------------------------------------------------------- /resources/docs/HUNFLAIR_TUTORIAL_3_ENTITY_LINKING.md: -------------------------------------------------------------------------------- 1 | # HunFlair Tutorial 3: Entity Linking 2 | 3 | After adding named entity recognition tags to your sentence, you can run named entity linking on these annotations. 4 | 5 | ```python 6 | from flair.models import EntityMentionLinker 7 | from flair.nn import Classifier 8 | from flair.tokenization import SciSpacyTokenizer 9 | from flair.data import Sentence 10 | 11 | sentence = Sentence( 12 | "The mutation in the ABCD1 gene causes X-linked adrenoleukodystrophy, " 13 | "a neurodegenerative disease, which is exacerbated by exposure to high " 14 | "levels of mercury in dolphin populations.", 15 | use_tokenizer=SciSpacyTokenizer() 16 | ) 17 | 18 | ner_tagger = Classifier.load("hunflair") 19 | ner_tagger.predict(sentence) 20 | 21 | nen_tagger = EntityMentionLinker.load("disease-linker") 22 | nen_tagger.predict(sentence) 23 | 24 | nen_tagger = EntityMentionLinker.load("gene-linker") 25 | nen_tagger.predict(sentence) 26 | 27 | nen_tagger = EntityMentionLinker.load("chemical-linker") 28 | nen_tagger.predict(sentence) 29 | 30 | nen_tagger = EntityMentionLinker.load("species-linker") 31 | nen_tagger.predict(sentence) 32 | 33 | for tag in sentence.get_labels(): 34 | print(tag) 35 | ``` 36 | 37 | This should print: 38 | 39 | ``` 40 | Span[4:5]: "ABCD1" → Gene (0.9575) 41 | Span[4:5]: "ABCD1" → abcd1 - NCBI-GENE-HUMAN:215 (14.5503) 42 | Span[7:11]: "X-linked adrenoleukodystrophy" → Disease (0.9867) 43 | Span[7:11]: "X-linked adrenoleukodystrophy" → x linked adrenoleukodystrophy - CTD-DISEASES:MESH:D000326 (13.9717) 44 | Span[13:15]: "neurodegenerative disease" → Disease (0.8865) 45 | Span[13:15]: "neurodegenerative disease" → neurodegenerative disease - CTD-DISEASES:MESH:D019636 (14.2779) 46 | Span[25:26]: "mercury" → Chemical (0.9456) 47 | Span[25:26]: "mercury" → mercury - CTD-CHEMICALS:MESH:D008628 (14.9185) 48 | Span[27:28]: "dolphin" → Species (0.8082) 49 | Span[27:28]: "dolphin" → marine dolphins - NCBI-TAXONOMY:9726 (14.473) 50 | ``` 51 | 52 | The output contains both the NER disease annotations and their entity / concept identifiers according to 53 | a knowledge base or ontology. We have pre-configured combinations of models and dictionaries for 54 | "disease", "chemical" and "gene". 55 | 56 | You can also provide your own model and dictionary: 57 | 58 | ```python 59 | from flair.models import EntityMentionLinker 60 | 61 | nen_tagger = EntityMentionLinker.build("name_or_path_to_your_model", 62 | dictionary_names_or_path="name_or_path_to_your_dictionary") 63 | nen_tagger = EntityMentionLinker.build("path_to_custom_disease_model", dictionary_names_or_path="disease") 64 | ``` 65 | 66 | You can use any combination of provided models, provided dictionaries and your own. 67 | -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_8_MODEL_OPTIMIZATION.md: -------------------------------------------------------------------------------- 1 | # Tutorial 8: Model Tuning 2 | 3 | **Important**: This tutorial has been removed. 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_CORPUS_CUSTOM.md: -------------------------------------------------------------------------------- 1 | # Tutorial 4.3: Loading a Custom Corpus 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-load-custom-dataset 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ 6 | -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_CORPUS_PREPARED.md: -------------------------------------------------------------------------------- 1 | # Tutorial 4.1: Loading a Prepared Corpus 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-load-prepared-dataset 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ 6 | 7 | -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_EMBEDDINGS_OVERVIEW.md: -------------------------------------------------------------------------------- 1 | # Tutorial 3: Embeddings 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/category/tutorial-3-embeddings 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_FLAIR_BASICS.md: -------------------------------------------------------------------------------- 1 | # Tutorial 1: NLP Base Types 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/basic-types 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TAGGING_CIRCUS.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2.6: Other Crazy Models in Flair 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/other-models 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TAGGING_LINKING.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2.3: Entity Linking on Your Text 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/entity-linking 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TAGGING_NER.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2.1: Tagging Entities in your Text 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/tagging-entities 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TAGGING_OVERVIEW.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2: Tagging your Text 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/category/tutorial-1-basic-tagging 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TAGGING_POS.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2.4: Tagging Parts of Speech in your Text 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/part-of-speech-tagging 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TAGGING_RELATIONS.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2.5: Relation Extraction on Your Text 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/other-models 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TAGGING_SENTIMENT.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2.2: Sentiment Analysis on Your Text 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/tagging-sentiment 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TRAINING_MODELS.md: -------------------------------------------------------------------------------- 1 | # Tutorial 4.1: How Model Training works in Flair 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-model-training-works 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ 6 | 7 | -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TRAINING_OVERVIEW.md: -------------------------------------------------------------------------------- 1 | # Tutorial 4: Training your own Models 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/category/tutorial-2-training-models 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ 6 | -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TRAINING_SEQUENCE_LABELER.md: -------------------------------------------------------------------------------- 1 | # Tutorial 4.4: Training Sequence Labeling Models 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-train-sequence-tagger 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ 6 | -------------------------------------------------------------------------------- /resources/docs/TUTORIAL_TRAINING_TEXT_CLASSIFIER.md: -------------------------------------------------------------------------------- 1 | # Tutorial 4.5: Training Text Classification Models 2 | 3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-train-text-classifier 4 | 5 | All Flair documentation is now found at: https://flairnlp.github.io/ -------------------------------------------------------------------------------- /resources/docs/embeddings/BYTE_PAIR_EMBEDDINGS.md: -------------------------------------------------------------------------------- 1 | # Byte Pair Embeddings 2 | 3 | `BytePairEmbeddings` are word embeddings that are precomputed on the subword-level. This means that they are able to 4 | embed any word by splitting words into subwords and looking up their embeddings. `BytePairEmbeddings` were proposed 5 | and computed by [Heinzerling and Strube (2018)](https://www.aclweb.org/anthology/L18-1473) who found that they offer nearly the same accuracy as word embeddings, but at a fraction 6 | of the model size. So they are a great choice if you want to train small models. 7 | 8 | You initialize with a language code (275 languages supported), a number of 'syllables' (one of ) and 9 | a number of dimensions (one of 50, 100, 200 or 300). The following initializes and uses byte pair embeddings 10 | for English: 11 | 12 | ```python 13 | from flair.embeddings import BytePairEmbeddings 14 | 15 | # init embedding 16 | embedding = BytePairEmbeddings('en') 17 | 18 | # create a sentence 19 | sentence = Sentence('The grass is green .') 20 | 21 | # embed words in sentence 22 | embedding.embed(sentence) 23 | ``` 24 | 25 | More information can be found 26 | on the [byte pair embeddings](https://nlp.h-its.org/bpemb/) web page. 27 | 28 | `BytePairEmbeddings` also have a multilingual model capable of embedding any word in any language. 29 | You can instantiate it with: 30 | 31 | ```python 32 | # init embedding 33 | embedding = BytePairEmbeddings('multi') 34 | ``` 35 | 36 | You can also load custom `BytePairEmbeddings` by specifying a path to model_file_path and embedding_file_path arguments. They correspond respectively to a SentencePiece model file and to an embedding file (Word2Vec plain text or GenSim binary). For example: 37 | 38 | ```python 39 | # init custom embedding 40 | embedding = BytePairEmbeddings(model_file_path='your/path/m.model', embedding_file_path='your/path/w2v.txt') 41 | ``` 42 | -------------------------------------------------------------------------------- /resources/docs/embeddings/CHARACTER_EMBEDDINGS.md: -------------------------------------------------------------------------------- 1 | # Character Embeddings 2 | 3 | `CharacterEmbeddings` allow you to add character-level word embeddings during model training. Note that these embeddings 4 | are randomly initialized when you initialize the class, so they are not meaningful unless you train them on a specific 5 | downstream task. 6 | 7 | For instance, the standard sequence labeling architecture used by [Lample et al. (2016)](https://www.aclweb.org/anthology/N16-1030) is a combination of classic word embeddings with task-trained character features. Normally this would require you to implement a [hierarchical embedding architecture](http://neuroner.com/NeuroNERengine_with_caption_no_figure.png) in which character-level embeddings for each word are computed using an RNN and then concatenated with word embeddings. 8 | 9 | In Flair, we simplify this by treating `CharacterEmbeddings` just like any other embedding class. To reproduce the 10 | Lample architecture, you need only combine them with standard `WordEmbeddings` in an embedding stack: 11 | 12 | 13 | ```python 14 | # init embedding stack 15 | embedding = StackedEmbeddings( 16 | [ 17 | # standard word embeddings 18 | WordEmbeddings('glove'), 19 | 20 | # character-level features 21 | CharacterEmbeddings(), 22 | ] 23 | ) 24 | ``` 25 | 26 | If you pass this stacked embedding to a train method, the character-level features will now automatically be trained 27 | for your downstream task. 28 | -------------------------------------------------------------------------------- /resources/docs/embeddings/ELMO_EMBEDDINGS.md: -------------------------------------------------------------------------------- 1 | # ELMo Embeddings 2 | 3 | [ELMo embeddings](http://www.aclweb.org/anthology/N18-1202) were presented by Peters et al. in 2018. They are using 4 | a bidirectional recurrent neural network to predict the next word in a text. 5 | We are using the implementation of [AllenNLP](https://allennlp.org/elmo). As this implementation comes with a lot of 6 | sub-dependencies, which we don't want to include in Flair, you need to first install the library via 7 | `pip install allennlp==0.9.0` before you can use it in Flair. 8 | Using the embeddings is as simple as using any other embedding type: 9 | 10 | ```python 11 | from flair.embeddings import ELMoEmbeddings 12 | 13 | # init embedding 14 | embedding = ELMoEmbeddings() 15 | 16 | # create a sentence 17 | sentence = Sentence('The grass is green .') 18 | 19 | # embed words in sentence 20 | embedding.embed(sentence) 21 | ``` 22 | 23 | ELMo word embeddings can be constructed by combining ELMo layers in different ways. The available combination strategies are: 24 | - `"all"`: Use the concatenation of the three ELMo layers. 25 | - `"top"`: Use the top ELMo layer. 26 | - `"average"`: Use the average of the three ELMo layers. 27 | 28 | By default, the top 3 layers are concatenated to form the word embedding. 29 | 30 | AllenNLP provides the following pre-trained models. To use any of the following models inside Flair 31 | simple specify the embedding id when initializing the `ELMoEmbeddings`. 32 | 33 | | ID | Language | Embedding | 34 | | ------------- | ------------- | ------------- | 35 | | 'small' | English | 1024-hidden, 1 layer, 14.6M parameters | 36 | | 'medium' | English | 2048-hidden, 1 layer, 28.0M parameters | 37 | | 'original' | English | 4096-hidden, 2 layers, 93.6M parameters | 38 | | 'large' | English | | 39 | | 'pt' | Portuguese | | 40 | | 'pubmed' | English biomedical data | [more information](https://allennlp.org/elmo) | 41 | -------------------------------------------------------------------------------- /resources/docs/embeddings/FASTTEXT_EMBEDDINGS.md: -------------------------------------------------------------------------------- 1 | # FastText Embeddings 2 | 3 | FastText Embeddings can give you vectors for out of vocabulary(oov) words by using the sub-word information. 4 | To use this functionality with Flair, use `FastTextEmbeddings` class as shown: 5 | 6 | ```python 7 | from flair.embeddings import FastTextEmbeddings 8 | 9 | # init embedding 10 | embedding = FastTextEmbeddings('/path/to/local/custom_fasttext_embeddings.bin') 11 | 12 | # create a sentence 13 | sentence = Sentence('The grass is green .') 14 | 15 | # embed words in sentence 16 | embedding.embed(sentence) 17 | ``` 18 | 19 | You can initialize the class by passing the remote downloadable URL as well. 20 | 21 | ```python 22 | embedding = FastTextEmbeddings('/path/to/remote/downloadable/custom_fasttext_embeddings.bin', use_local=False) 23 | ``` 24 | 25 | Note that FastText embeddings typically have huge models resulting in equally huge models for downstream tasks. 26 | 27 | Alternatively, you can use FastText embeddings without the oov functionality by using normal `WordEmbeddings` which 28 | are smaller and get 29 | the oov functionality from the `BytePairEmbeddings` which are tiny. So, instead of using English `FastTextEmbeddings` 30 | with oov handling, you could use this stack: 31 | 32 | ```python 33 | from flair.embeddings import WordEmbeddings, BytePairEmbeddings, StackedEmbeddings 34 | 35 | # init embedding 36 | embedding = StackedEmbeddings( 37 | [ 38 | # standard FastText word embeddings for English 39 | WordEmbeddings('en'), 40 | # Byte pair embeddings for English 41 | BytePairEmbeddings('en'), 42 | ] 43 | ) 44 | 45 | # create a sentence 46 | sentence = Sentence('The grass is green .') 47 | 48 | # embed words in sentence 49 | embedding.embed(sentence) 50 | ``` 51 | -------------------------------------------------------------------------------- /resources/docs/flair_logo_2020.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020.png -------------------------------------------------------------------------------- /resources/docs/flair_logo_2020_FINAL_day_dpi72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020_FINAL_day_dpi72.png -------------------------------------------------------------------------------- /resources/docs/flair_logo_2020_FINAL_night_dpi72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020_FINAL_night_dpi72.png -------------------------------------------------------------------------------- /resources/docs/flair_logo_2020_FINAL_night_light_dpi72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020_FINAL_night_light_dpi72.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from setuptools import find_packages, setup 4 | 5 | required = Path("requirements.txt").read_text(encoding="utf-8").split("\n") 6 | 7 | setup( 8 | name="flair", 9 | version="0.15.1", 10 | description="A very simple framework for state-of-the-art NLP", 11 | long_description=Path("README.md").read_text(encoding="utf-8"), 12 | long_description_content_type="text/markdown", 13 | author="Alan Akbik", 14 | author_email="alan.akbik@gmail.com", 15 | url="https://github.com/flairNLP/flair", 16 | packages=find_packages(exclude=["tests", "tests.*"]), # same as name 17 | license="MIT", 18 | install_requires=required, 19 | extras_require={ 20 | "word-embeddings": ["gensim>=4.2.0", "bpemb>=0.3.5"], 21 | }, 22 | include_package_data=True, 23 | python_requires=">=3.9", 24 | ) 25 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | import torch 5 | 6 | import flair 7 | 8 | 9 | @pytest.fixture(scope="module") 10 | def resources_path(): 11 | return Path(__file__).parent / "resources" 12 | 13 | 14 | @pytest.fixture(scope="module") 15 | def tasks_base_path(resources_path): 16 | return resources_path / "tasks" 17 | 18 | 19 | @pytest.fixture() 20 | def results_base_path(resources_path): 21 | path = resources_path / "results" 22 | try: 23 | yield path 24 | finally: 25 | for p in reversed(list(path.rglob("*"))): 26 | if p.is_file(): 27 | p.unlink() 28 | else: 29 | p.rmdir() 30 | if path.is_dir(): 31 | path.rmdir() 32 | 33 | 34 | @pytest.fixture(autouse=True) 35 | def set_cpu(force_cpu): 36 | if force_cpu: 37 | flair.device = torch.device("cpu") 38 | 39 | 40 | def pytest_addoption(parser): 41 | parser.addoption( 42 | "--runintegration", 43 | action="store_true", 44 | default=False, 45 | help="run integration tests", 46 | ) 47 | parser.addoption( 48 | "--force-cpu", 49 | action="store_true", 50 | default=False, 51 | help="use cpu for tests even when gpu is available", 52 | ) 53 | 54 | 55 | def pytest_collection_modifyitems(config, items): 56 | if not config.getoption("--runintegration"): 57 | skip_integration = pytest.mark.skip(reason="need --runintegration option to run") 58 | for item in items: 59 | if "integration" in item.keywords: 60 | item.add_marker(skip_integration) 61 | 62 | 63 | def pytest_generate_tests(metafunc): 64 | option_value = metafunc.config.getoption("--force-cpu") 65 | if "force_cpu" in metafunc.fixturenames and option_value is not None: 66 | metafunc.parametrize("force_cpu", [option_value]) 67 | -------------------------------------------------------------------------------- /tests/embeddings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/embeddings/__init__.py -------------------------------------------------------------------------------- /tests/embeddings/test_byte_pair_embeddings.py: -------------------------------------------------------------------------------- 1 | from flair.embeddings import BytePairEmbeddings 2 | from tests.embedding_test_utils import BaseEmbeddingsTest 3 | 4 | 5 | class TestBytePairEmbeddings(BaseEmbeddingsTest): 6 | embedding_cls = BytePairEmbeddings 7 | is_token_embedding = True 8 | is_document_embedding = False 9 | default_args = {"language": "en"} 10 | -------------------------------------------------------------------------------- /tests/embeddings/test_document_transform_word_embeddings.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from flair.embeddings import ( 4 | DocumentCNNEmbeddings, 5 | DocumentLMEmbeddings, 6 | DocumentPoolEmbeddings, 7 | DocumentRNNEmbeddings, 8 | FlairEmbeddings, 9 | TokenEmbeddings, 10 | WordEmbeddings, 11 | ) 12 | from tests.embedding_test_utils import BaseEmbeddingsTest 13 | 14 | word: TokenEmbeddings = WordEmbeddings("turian") 15 | flair_embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast") 16 | flair_embedding_back: TokenEmbeddings = FlairEmbeddings("news-backward-fast") 17 | 18 | 19 | class BaseDocumentsViaWordEmbeddingsTest(BaseEmbeddingsTest): 20 | is_document_embedding = True 21 | is_token_embedding = False 22 | base_embeddings: list[TokenEmbeddings] = [word, flair_embedding] 23 | 24 | def create_embedding_from_name(self, name: str): 25 | """Overwrite this method if it is more complex to load an embedding by name.""" 26 | assert self.name_field is not None 27 | kwargs = dict(self.default_args) 28 | kwargs.pop(self.name_field) 29 | return self.embedding_cls(name, **kwargs) # type: ignore[call-arg] 30 | 31 | def create_embedding_with_args(self, args: dict[str, Any]): 32 | kwargs = dict(self.default_args) 33 | for k, v in args.items(): 34 | kwargs[k] = v 35 | return self.embedding_cls(self.base_embeddings, **kwargs) # type: ignore[call-arg] 36 | 37 | 38 | class TestDocumentLstmEmbeddings(BaseDocumentsViaWordEmbeddingsTest): 39 | embedding_cls = DocumentRNNEmbeddings 40 | default_args = { 41 | "hidden_size": 128, 42 | "bidirectional": False, 43 | } 44 | valid_args = [{"bidirectional": False}, {"bidirectional": True}] 45 | 46 | 47 | class TestDocumentPoolEmbeddings(BaseDocumentsViaWordEmbeddingsTest): 48 | embedding_cls = DocumentPoolEmbeddings 49 | default_args = { 50 | "fine_tune_mode": "nonlinear", 51 | } 52 | valid_args = [{"pooling": "mean"}, {"pooling": "max"}, {"pooling": "min"}] 53 | 54 | 55 | class TestDocumentCNNEmbeddings(BaseDocumentsViaWordEmbeddingsTest): 56 | embedding_cls = DocumentCNNEmbeddings 57 | default_args = { 58 | "kernels": ((50, 2), (50, 3)), 59 | } 60 | valid_args = [{"reproject_words_dimension": None}, {"reproject_words_dimension": 100}] 61 | 62 | 63 | class TestDocumentLMEmbeddings(BaseDocumentsViaWordEmbeddingsTest): 64 | embedding_cls = DocumentLMEmbeddings 65 | base_embeddings = [flair_embedding, flair_embedding_back] 66 | default_args: dict[str, Any] = {} 67 | -------------------------------------------------------------------------------- /tests/embeddings/test_flair_embeddings.py: -------------------------------------------------------------------------------- 1 | from flair.data import Dictionary, Sentence 2 | from flair.embeddings import ( 3 | DocumentLMEmbeddings, 4 | DocumentRNNEmbeddings, 5 | FlairEmbeddings, 6 | ) 7 | from flair.models import LanguageModel 8 | from tests.embedding_test_utils import BaseEmbeddingsTest 9 | 10 | 11 | class TestFlairEmbeddings(BaseEmbeddingsTest): 12 | embedding_cls = FlairEmbeddings 13 | is_token_embedding = True 14 | is_document_embedding = False 15 | default_args = {"model": "news-forward-fast"} 16 | 17 | name_field = "model" 18 | invalid_names = ["other", "not/existing/path/to/embeddings"] 19 | 20 | def test_fine_tunable_flair_embedding(self): 21 | language_model_forward = LanguageModel(Dictionary.load("chars"), is_forward_lm=True, hidden_size=32, nlayers=1) 22 | 23 | embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings( 24 | [FlairEmbeddings(language_model_forward, fine_tune=True)], 25 | hidden_size=128, 26 | bidirectional=False, 27 | ) 28 | 29 | sentence: Sentence = Sentence("I love Berlin.") 30 | 31 | embeddings.embed(sentence) 32 | 33 | assert len(sentence.get_embedding()) == 128 34 | assert len(sentence.get_embedding()) == embeddings.embedding_length 35 | 36 | sentence.clear_embeddings() 37 | 38 | assert len(sentence.get_embedding()) == 0 39 | 40 | embeddings: DocumentLMEmbeddings = DocumentLMEmbeddings( 41 | [FlairEmbeddings(language_model_forward, fine_tune=True)] 42 | ) 43 | 44 | sentence: Sentence = Sentence("I love Berlin.") 45 | 46 | embeddings.embed(sentence) 47 | 48 | assert len(sentence.get_embedding()) == 32 49 | assert len(sentence.get_embedding()) == embeddings.embedding_length 50 | 51 | sentence.clear_embeddings() 52 | 53 | assert len(sentence.get_embedding()) == 0 54 | del embeddings 55 | -------------------------------------------------------------------------------- /tests/embeddings/test_simple_token_embeddings.py: -------------------------------------------------------------------------------- 1 | from flair.data import Dictionary 2 | from flair.embeddings import CharacterEmbeddings, HashEmbeddings, OneHotEmbeddings 3 | from tests.embedding_test_utils import BaseEmbeddingsTest 4 | 5 | vocab_dictionary = Dictionary(add_unk=True) 6 | vocab_dictionary.add_item("I") 7 | vocab_dictionary.add_item("love") 8 | vocab_dictionary.add_item("berlin") 9 | 10 | 11 | class TestCharacterEmbeddings(BaseEmbeddingsTest): 12 | embedding_cls = CharacterEmbeddings 13 | is_token_embedding = True 14 | is_document_embedding = False 15 | default_args = {"path_to_char_dict": None} 16 | 17 | 18 | class TestOneHotEmbeddings(BaseEmbeddingsTest): 19 | embedding_cls = OneHotEmbeddings 20 | is_token_embedding = True 21 | is_document_embedding = False 22 | default_args = {"vocab_dictionary": vocab_dictionary} 23 | 24 | 25 | class TestHashEmbeddings(BaseEmbeddingsTest): 26 | embedding_cls = HashEmbeddings 27 | is_token_embedding = True 28 | is_document_embedding = False 29 | default_args = {"num_embeddings": 10} 30 | -------------------------------------------------------------------------------- /tests/embeddings/test_stacked_embeddings.py: -------------------------------------------------------------------------------- 1 | from flair.data import Sentence 2 | from flair.embeddings import ( 3 | FlairEmbeddings, 4 | StackedEmbeddings, 5 | TokenEmbeddings, 6 | WordEmbeddings, 7 | ) 8 | from flair.embeddings.base import load_embeddings 9 | 10 | 11 | def test_stacked_embeddings(): 12 | glove: TokenEmbeddings = WordEmbeddings("turian") 13 | flair_embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast") 14 | embeddings: StackedEmbeddings = StackedEmbeddings([glove, flair_embedding]) 15 | 16 | sentence: Sentence = Sentence("I love Berlin. Berlin is a great place to live.") 17 | embeddings.embed(sentence) 18 | 19 | for token in sentence.tokens: 20 | assert len(token.get_embedding()) == 1074 21 | 22 | token.clear_embeddings() 23 | 24 | assert len(token.get_embedding()) == 0 25 | del embeddings 26 | 27 | 28 | def test_stacked_embeddings_stay_the_same_after_saving_and_loading(): 29 | glove: TokenEmbeddings = WordEmbeddings("turian") 30 | flair_embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast") 31 | embeddings: StackedEmbeddings = StackedEmbeddings([glove, flair_embedding]) 32 | 33 | assert not embeddings.training 34 | 35 | sentence_old: Sentence = Sentence("I love Berlin") 36 | embeddings.embed(sentence_old) 37 | names_old = embeddings.get_names() 38 | embedding_length_old = embeddings.embedding_length 39 | 40 | save_data = embeddings.save_embeddings(use_state_dict=True) 41 | new_embeddings = load_embeddings(save_data) 42 | 43 | sentence_new: Sentence = Sentence("I love Berlin") 44 | new_embeddings.embed(sentence_new) 45 | names_new = new_embeddings.get_names() 46 | embedding_length_new = new_embeddings.embedding_length 47 | 48 | assert not new_embeddings.training 49 | assert names_old == names_new 50 | assert embedding_length_old == embedding_length_new 51 | 52 | for token_old, token_new in zip(sentence_old, sentence_new): 53 | assert (token_old.get_embedding(names_old) == token_new.get_embedding(names_new)).all() 54 | -------------------------------------------------------------------------------- /tests/embeddings/test_tfidf_embeddings.py: -------------------------------------------------------------------------------- 1 | from flair.data import Sentence 2 | from flair.embeddings import DocumentTFIDFEmbeddings 3 | from tests.embedding_test_utils import BaseEmbeddingsTest 4 | 5 | 6 | class TFIDFEmbeddingsTest(BaseEmbeddingsTest): 7 | embedding_cls = DocumentTFIDFEmbeddings 8 | is_document_embedding = True 9 | is_token_embedding = False 10 | 11 | default_args = { 12 | "train_dataset": [ 13 | Sentence("This is a sentence"), 14 | Sentence("This is another sentence"), 15 | Sentence("another a This I Berlin"), 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /tests/embeddings/test_transformer_document_embeddings.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from flair.data import Dictionary, Sentence 4 | from flair.embeddings import TransformerDocumentEmbeddings 5 | from flair.models import TextClassifier 6 | from flair.nn import Classifier 7 | from tests.embedding_test_utils import BaseEmbeddingsTest 8 | 9 | 10 | class TestTransformerDocumentEmbeddings(BaseEmbeddingsTest): 11 | embedding_cls = TransformerDocumentEmbeddings 12 | is_document_embedding = True 13 | is_token_embedding = False 14 | default_args = {"model": "distilbert-base-uncased", "allow_long_sentences": False} 15 | valid_args = [ 16 | {"layers": "-1,-2,-3,-4", "layer_mean": False}, 17 | {"layers": "all", "layer_mean": True}, 18 | {"layers": "all", "layer_mean": False}, 19 | ] 20 | 21 | name_field = "embeddings" 22 | invalid_names = ["other", "not/existing/path/to/embeddings"] 23 | 24 | 25 | def test_if_loaded_embeddings_have_all_attributes(tasks_base_path): 26 | # dummy model with embeddings 27 | embeddings = TransformerDocumentEmbeddings( 28 | "distilbert-base-uncased", 29 | use_context=True, 30 | use_context_separator=False, 31 | ) 32 | 33 | model = TextClassifier(label_type="ner", label_dictionary=Dictionary(), embeddings=embeddings) 34 | 35 | # save the dummy and load it again 36 | model.save(tasks_base_path / "single.pt") 37 | loaded_single_task = Classifier.load(tasks_base_path / "single.pt") 38 | 39 | # check that context_length and use_context_separator is the same for both 40 | assert model.embeddings.context_length == loaded_single_task.embeddings.context_length 41 | assert model.embeddings.use_context_separator == loaded_single_task.embeddings.use_context_separator 42 | 43 | 44 | @pytest.mark.parametrize("cls_pooling", ["cls", "mean", "max"]) 45 | def test_cls_pooling(cls_pooling): 46 | embeddings = TransformerDocumentEmbeddings( 47 | model="distilbert-base-uncased", 48 | layers="-1", 49 | cls_pooling=cls_pooling, 50 | allow_long_sentences=True, 51 | ) 52 | sentence = Sentence("Today is a good day.") 53 | embeddings.embed(sentence) 54 | assert sentence.embedding is not None 55 | -------------------------------------------------------------------------------- /tests/embeddings/test_word_embeddings.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from flair.embeddings import MuseCrosslingualEmbeddings, NILCEmbeddings, WordEmbeddings 4 | from tests.embedding_test_utils import BaseEmbeddingsTest 5 | 6 | 7 | class TestWordEmbeddings(BaseEmbeddingsTest): 8 | embedding_cls = WordEmbeddings 9 | is_token_embedding = True 10 | is_document_embedding = False 11 | default_args = {"embeddings": "turian"} 12 | 13 | name_field = "embeddings" 14 | invalid_names = ["other", "not/existing/path/to/embeddings"] 15 | 16 | 17 | class TestMuseCrosslingualEmbeddings(BaseEmbeddingsTest): 18 | embedding_cls = MuseCrosslingualEmbeddings 19 | is_token_embedding = True 20 | is_document_embedding = False 21 | default_args: dict[str, Any] = {} 22 | 23 | 24 | class TestNILCEmbeddings(BaseEmbeddingsTest): 25 | embedding_cls = NILCEmbeddings 26 | is_token_embedding = True 27 | is_document_embedding = False 28 | default_args = {"embeddings": "fasttext", "model": "cbow", "size": 50} 29 | valid_args = [{"embeddings": "glove"}] 30 | 31 | name_field = "embeddings" 32 | invalid_names = ["other", "not/existing/path/to/embeddings"] 33 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/models/__init__.py -------------------------------------------------------------------------------- /tests/models/test_entity_linker.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from flair.data import Sentence 4 | from flair.datasets import NEL_ENGLISH_AIDA 5 | from flair.embeddings import TransformerWordEmbeddings 6 | from flair.models import SpanClassifier 7 | from tests.model_test_utils import BaseModelTest 8 | 9 | 10 | class TestEntityLinker(BaseModelTest): 11 | model_cls = SpanClassifier 12 | train_label_type = "nel" 13 | training_args = {"max_epochs": 2} 14 | 15 | @pytest.fixture() 16 | def embeddings(self): 17 | return TransformerWordEmbeddings(model="distilbert-base-uncased", layers="-1", fine_tune=True) 18 | 19 | @pytest.fixture() 20 | def corpus(self, tasks_base_path): 21 | return NEL_ENGLISH_AIDA().downsample(0.01) 22 | 23 | @pytest.fixture() 24 | def train_test_sentence(self): 25 | sentence = Sentence("I love NYC and hate OYC") 26 | 27 | sentence[2:3].add_label("nel", "New York City") 28 | sentence[5:6].add_label("nel", "Old York City") 29 | return sentence 30 | 31 | @pytest.fixture() 32 | def labeled_sentence(self): 33 | sentence = Sentence("I love NYC and hate OYC") 34 | 35 | sentence[2:3].add_label("nel", "New York City") 36 | sentence[5:6].add_label("nel", "Old York City") 37 | return sentence 38 | -------------------------------------------------------------------------------- /tests/models/test_model_license.py: -------------------------------------------------------------------------------- 1 | from flair.nn import Model 2 | 3 | 4 | def test_model_license_persistence(tmp_path): 5 | """Test setting and persisting license information for a model.""" 6 | # Create temporary file path using pytest's tmp_path fixture 7 | model_path = tmp_path / "test_model_license.pt" 8 | 9 | # Load a base model 10 | model = Model.load("ner-fast") 11 | 12 | # Check initial license (should be none/default) 13 | assert model.license_info == "No license information available" 14 | 15 | # Set a new license 16 | test_license = "MIT License - Copyright (c) 2024" 17 | model.license_info = test_license 18 | assert model.license_info == test_license 19 | 20 | # Save the model with the new license 21 | model.save(str(model_path)) 22 | 23 | # Load the saved model and check license persists 24 | loaded_model = Model.load(model_path) 25 | assert loaded_model.license_info == test_license 26 | -------------------------------------------------------------------------------- /tests/models/test_regexp_tagger.py: -------------------------------------------------------------------------------- 1 | from flair.data import Sentence 2 | from flair.models import RegexpTagger 3 | 4 | 5 | def test_regexp_tagger(): 6 | 7 | sentence = Sentence('Der sagte: "das ist durchaus interessant"') 8 | 9 | tagger = RegexpTagger( 10 | mapping=[(r'["„»]((?:(?=(\\?))\2.)*?)[”"“«]', "quote_part", 1), (r'["„»]((?:(?=(\\?))\2.)*?)[”"“«]', "quote")] 11 | ) 12 | 13 | tagger.predict(sentence) 14 | 15 | assert sentence.get_label("quote_part").data_point.text == "das ist durchaus interessant" 16 | assert sentence.get_label("quote").data_point.text == '"das ist durchaus interessant"' 17 | -------------------------------------------------------------------------------- /tests/models/test_relation_extractor.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from flair.data import Sentence 4 | from flair.datasets import ColumnCorpus 5 | from flair.embeddings import TransformerWordEmbeddings 6 | from flair.models import RelationExtractor 7 | from tests.model_test_utils import BaseModelTest 8 | 9 | 10 | class TestRelationExtractor(BaseModelTest): 11 | model_cls = RelationExtractor 12 | train_label_type = "relation" 13 | pretrained_model = "relations" 14 | model_args = { 15 | "entity_label_type": "ner", 16 | "train_on_gold_pairs_only": True, 17 | "entity_pair_filters": { # Define valid entity pair combinations, used as relation candidates 18 | ("ORG", "PER"), # founded_by 19 | ("LOC", "PER"), # place_of_birth 20 | }, 21 | } 22 | training_args = { 23 | "max_epochs": 4, 24 | "mini_batch_size": 4, 25 | "learning_rate": 0.1, 26 | } 27 | 28 | @pytest.fixture() 29 | def corpus(self, tasks_base_path): 30 | return ColumnCorpus( 31 | data_folder=tasks_base_path / "conllu", 32 | train_file="train.conllup", 33 | dev_file="train.conllup", 34 | test_file="train.conllup", 35 | column_format={1: "text", 2: "pos", 3: "ner"}, 36 | ) 37 | 38 | @pytest.fixture() 39 | def example_sentence(self): 40 | sentence = Sentence(["Microsoft", "was", "found", "by", "Bill", "Gates"]) 41 | sentence[:1].add_label(typename="ner", value="ORG", score=1.0) 42 | sentence[4:].add_label(typename="ner", value="PER", score=1.0) 43 | return sentence 44 | 45 | @pytest.fixture() 46 | def train_test_sentence(self): 47 | sentence = Sentence(["Apple", "was", "founded", "by", "Steve", "Jobs", "."]) 48 | sentence[0:1].add_label("ner", "ORG") 49 | sentence[4:6].add_label("ner", "PER") 50 | return sentence 51 | 52 | @pytest.fixture() 53 | def embeddings(self): 54 | return TransformerWordEmbeddings(model="distilbert-base-uncased", fine_tune=True) 55 | 56 | def assert_training_example(self, predicted_training_example): 57 | relations = predicted_training_example.get_relations("relation") 58 | assert len(relations) == 1 59 | assert relations[0].tag == "founded_by" 60 | 61 | def has_embedding(self, sentence): 62 | return all(token.get_embedding().cpu().numpy().size != 0 for token in sentence) 63 | -------------------------------------------------------------------------------- /tests/models/test_text_regressor.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import flair 4 | from flair.embeddings import DocumentRNNEmbeddings, WordEmbeddings 5 | from flair.models.text_regression_model import TextRegressor 6 | from tests.model_test_utils import BaseModelTest 7 | 8 | 9 | class TestTextRegressor(BaseModelTest): 10 | model_cls = TextRegressor 11 | train_label_type = "regression" 12 | training_args = { 13 | "max_epochs": 3, 14 | "mini_batch_size": 2, 15 | "learning_rate": 0.1, 16 | "main_evaluation_metric": ("correlation", "pearson"), 17 | } 18 | 19 | def build_model(self, embeddings, label_dict, **kwargs): 20 | # no need for label_dict 21 | return self.model_cls(embeddings, self.train_label_type) 22 | 23 | @pytest.fixture() 24 | def embeddings(self): 25 | glove_embedding = WordEmbeddings("turian") 26 | return DocumentRNNEmbeddings([glove_embedding], 128, 1, False, 64, False, False) 27 | 28 | @pytest.fixture() 29 | def corpus(self, tasks_base_path): 30 | return flair.datasets.ClassificationCorpus(tasks_base_path / "regression", label_type=self.train_label_type) 31 | -------------------------------------------------------------------------------- /tests/models/test_word_tagger.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import flair 4 | from flair.embeddings import TransformerWordEmbeddings 5 | from flair.models import TokenClassifier 6 | from tests.model_test_utils import BaseModelTest 7 | 8 | 9 | class TestWordTagger(BaseModelTest): 10 | model_cls = TokenClassifier 11 | train_label_type = "pos" 12 | training_args = { 13 | "max_epochs": 2, 14 | "learning_rate": 0.1, 15 | "mini_batch_size": 2, 16 | } 17 | 18 | def has_embedding(self, sentence): 19 | for token in sentence: 20 | if token.get_embedding().cpu().numpy().size == 0: 21 | return False 22 | return None 23 | 24 | def build_model(self, embeddings, label_dict, **kwargs): 25 | model_args = dict(self.model_args) 26 | for k in kwargs: 27 | if k in model_args: 28 | del model_args[k] 29 | return self.model_cls( 30 | embeddings=embeddings, 31 | label_dictionary=label_dict, 32 | label_type=self.train_label_type, 33 | **model_args, 34 | **kwargs, 35 | ) 36 | 37 | @pytest.fixture() 38 | def corpus(self, tasks_base_path): 39 | return flair.datasets.UD_ENGLISH(tasks_base_path) 40 | 41 | @pytest.fixture() 42 | def embeddings(self): 43 | return TransformerWordEmbeddings("distilbert-base-uncased") 44 | -------------------------------------------------------------------------------- /tests/resources/corpora/lorem_ipsum/test.txt: -------------------------------------------------------------------------------- 1 | Adipiscing commodo elit at imperdiet. Consequat interdum varius sit amet mattis vulputate enim nulla. Nulla aliquet porttitor lacus luctus accumsan tortor. Curabitur gravida arcu ac tortor. Adipiscing elit pellentesque habitant morbi. Sed viverra tellus in hac habitasse platea dictumst. Turpis cursus in hac habitasse. Pharetra vel turpis nunc eget. Enim facilisis gravida neque convallis a cras semper auctor neque. Interdum posuere lorem ipsum dolor sit amet consectetur adipiscing elit. 2 | 3 | Mauris sit amet massa vitae tortor condimentum lacinia. Neque gravida in fermentum et sollicitudin. Blandit volutpat maecenas volutpat blandit aliquam. Gravida neque convallis a cras semper auctor neque vitae. Viverra aliquet eget sit amet tellus cras adipiscing enim eu. Risus sed vulputate odio ut enim blandit volutpat maecenas. Amet tellus cras adipiscing enim eu. Viverra tellus in hac habitasse platea dictumst vestibulum rhoncus est. Magna etiam tempor orci eu lobortis elementum. Leo vel fringilla est ullamcorper eget. Nisl nisi scelerisque eu ultrices. Eros donec ac odio tempor orci dapibus ultrices in. Nisl nisi scelerisque eu ultrices vitae auctor eu augue. Hac habitasse platea dictumst vestibulum rhoncus est pellentesque elit. Habitasse platea dictumst vestibulum rhoncus est pellentesque elit. In ornare quam viverra orci sagittis. Morbi quis commodo odio aenean. Nam at lectus urna duis convallis convallis tellus id interdum. -------------------------------------------------------------------------------- /tests/resources/corpora/lorem_ipsum/valid.txt: -------------------------------------------------------------------------------- 1 | Nulla at volutpat diam ut venenatis tellus in metus vulputate. Porttitor leo a diam sollicitudin tempor. Tincidunt vitae semper quis lectus nulla at volutpat diam. Ornare aenean euismod elementum nisi quis eleifend quam adipiscing. Tortor pretium viverra suspendisse potenti. Arcu risus quis varius quam quisque id. Non sodales neque sodales ut etiam sit amet nisl. Porttitor lacus luctus accumsan tortor posuere ac ut consequat. Diam sit amet nisl suscipit. Ut sem nulla pharetra diam sit amet nisl suscipit adipiscing. Varius quam quisque id diam. Elementum tempus egestas sed sed risus pretium quam vulputate. Eu ultrices vitae auctor eu augue ut lectus. Tincidunt id aliquet risus feugiat in ante metus dictum at. Mauris cursus mattis molestie a iaculis at erat pellentesque. Leo urna molestie at elementum eu. 2 | 3 | Posuere morbi leo urna molestie. Tincidunt nunc pulvinar sapien et. Mattis molestie a iaculis at erat pellentesque. Arcu cursus euismod quis viverra nibh cras pulvinar mattis nunc. Phasellus vestibulum lorem sed risus ultricies tristique nulla aliquet enim. Aenean et tortor at risus viverra. Ut placerat orci nulla pellentesque dignissim. Est lorem ipsum dolor sit amet. Eros donec ac odio tempor. Elementum integer enim neque volutpat ac tincidunt vitae. -------------------------------------------------------------------------------- /tests/resources/tasks/ag_news/README.md: -------------------------------------------------------------------------------- 1 | ## AG_NEWS 2 | 3 | Data is taken from [here](https://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html). 4 | 5 | The dataset contains a collection of news articles grouped into different categories. 6 | We took a small random sample and converted it to the expected format of our data fetcher: 7 | ``` 8 | __label__ 9 | ``` 10 | 11 | #### Publications Using the Dataset 12 | 13 | * G. M. Del Corso, A. Gulli, and F. Romani. Ranking a stream of news. In Proceedings of 14th International World Wide Web Conference, pages 97–106, Chiba, Japan, 2005. 14 | * A. Gulli. The anatomy of a news search engine. In Proceedings of 14th International World Wide Web Conference, pages 880–881, Chiba, Japan, 2005. 15 | -------------------------------------------------------------------------------- /tests/resources/tasks/ag_news/test.txt: -------------------------------------------------------------------------------- 1 | __label__World Libya Seems Honest About Nuke Program -- UN Report VIENNA, Austria (Reuters) - The U.N. nuclear watchdog said in a confidential report circulated Monday that Libya appears to have been telling the truth in its declarations on the covert atomic weapons program that it agreed to abandon last year. 2 | __label__Business Judge Orders Parmalat Auditors to Trial MILAN (Reuters) - An Italian judge opened preliminary hearings on Tuesday into the 14-billion-euro collapse of Parmalat and immediately ruled that two former auditors of the food group should stand trial in one of Europe's biggest fraud cases. 3 | __label__Business Market Turmoil Saps Confidence \N 4 | __label__Sci/Tech Repairing airplane wings with nanotubes in flight An electrical pulse through nanotubes and wires helps find the crack. 5 | __label__Sports US lacks golden touch LAKE PLACID, N.Y. -- After Team USA racked up six goals in a 6-3 victory over the Swedes Saturday, coach Ben Smith said he hoped his players saved a few because goals were going to be hard to get yesterday against Canada. 6 | __label__World Experts Doubt Drop In Violence in Iraq The U.S. military's claim that violence has decreased sharply in Iraq in recent months has come under scrutiny from many experts within and outside the government, who contend that some of the underlying statistics are questionable and selectively ignore negative trends.
7 | __label__Sports Passing the torch Calgary Sun. Dan Marino wouldn't swap his collection of NFL records and Hall-of-Fame nomination for the world. Not even for that elusive Super Bowl victory. 8 | __label__Sci/Tech Saved, and Enslaved, by the Cell A growing number of experts say cellphone use may be making us less autonomous and less capable of solving problems on our own. 9 | __label__Sports Kolzig helps Capitals slide by Avalanche WASHINGTON (Reuters) - The Washington Capitals stayed in the hunt for a playoff spot after Olaf Kolzig made 18 saves in a 2-1 win over the struggling Colorado Avalanche on Wednesday. 10 | __label__World Pension Fund of New York Files Suit Against Merck The main pension fund of New York State filed a lawsuit against Merck & Company, accusing it of misleading shareholders about the safety of its pain drug Vioxx. 11 | -------------------------------------------------------------------------------- /tests/resources/tasks/ag_news/train.txt: -------------------------------------------------------------------------------- 1 | __label__World Light relay 'should be dropped' A leading astronomy group in the US voices opposition to a global 'light relay' planned for next year. 2 | __label__World LA City Council Ordered to Pay Attention (AP) AP - During public hearings, members of the City Council talk on cell phones, chat among themselves, read mail or wander around the room. A state appeals court says they should be doing something else: paying attention. 3 | __label__Business Tribune Profit Declines on Lower Newspaper Ad Revenue Tribune said its third-quarter earnings dropped 7 percent, beating expectations, adding that the housing slump and lower consumer spending worsened advertising revenue. 4 | __label__World Blair is warned about an attack on Iran LONDON -- Foreign policy specialists warned Prime Minister Tony Blair yesterday that military action against Iran could worsen violence across the Middle East and urged him to persuade the United States to hold talks with Tehran. 5 | __label__World Bangladesh Awakes in Shock as Blast Toll Hits 16 Extra armed police patrolled the streets of the Bangladeshi capital and traffic was light on Sunday, a working day, as shocked Bangladeshis woke up to the aftermath of grenade blasts that killed at least 16 people. 6 | __label__Business Wall St. Turns to the Time Out as Punishment Regulators are wielding a new weapon against Wall Street firms instead of multimillion-dollar fines: temporarily shutting down certain business lines. 7 | __label__World Legal move over halted BAE probe Campaigners threaten the government with legal action after a probe into arms deals with Saudi Arabia is dropped. 8 | __label__Sports Gamecocks, Tigers Say "No" To Bowl Bids COLUMBIA, SC -- Clemson and South Carolina will not accept bowl bids, punishment for a brawl between players toward the end of Saturday's game, the schools announced Monday. 9 | __label__Sports Capitals Have Budget to Attract Free Agents The NHL free agent signing period begins Sunday at noon, and Capitals General Manager George McPhee is shopping with a bigger-than-usual budget.
10 | __label__Sci/Tech Hobbit-sized Humans Called Homo floresiensis Discovered by ... Long live the real Bilbo Baggins, the first Little People of the World, Homo floresiensis and Homo sapien archeologists Michael Morwood, Peter Brown and Professor Soejono! 11 | -------------------------------------------------------------------------------- /tests/resources/tasks/column_corpus_options/eng.testa: -------------------------------------------------------------------------------- 1 | WORD TAG 2 | This O 3 | is O 4 | Coca Cola O 5 | -------------------------------------------------------------------------------- /tests/resources/tasks/column_corpus_options/eng.testb: -------------------------------------------------------------------------------- 1 | WORD TAG 2 | This O 3 | is O 4 | New York O 5 | -------------------------------------------------------------------------------- /tests/resources/tasks/column_corpus_options/eng.train: -------------------------------------------------------------------------------- 1 | WORD TAG 2 | This O 3 | is O 4 | New Berlin LOC 5 | -------------------------------------------------------------------------------- /tests/resources/tasks/column_with_whitespaces/eng.testa: -------------------------------------------------------------------------------- 1 | It O + 2 | is O + 3 | a O + 4 | French B-LOC - 5 | - O - 6 | speaking O + 7 | town O - 8 | . O + -------------------------------------------------------------------------------- /tests/resources/tasks/column_with_whitespaces/eng.testb: -------------------------------------------------------------------------------- 1 | It O + 2 | is O + 3 | a O + 4 | US B-LOC - 5 | - O - 6 | based O + 7 | company O - 8 | . O + -------------------------------------------------------------------------------- /tests/resources/tasks/column_with_whitespaces/eng.train: -------------------------------------------------------------------------------- 1 | It O + 2 | is O + 3 | a O + 4 | German B-LOC - 5 | - O - 6 | owned O + 7 | firm O - 8 | . O + -------------------------------------------------------------------------------- /tests/resources/tasks/conllu/train.conllu: -------------------------------------------------------------------------------- 1 | # text = Larry Page and Sergey Brin founded Google. 2 | # relations = 7;7;1;2;founded_by|7;7;4;5;founded_by 3 | 1 Larry PROPN B-PER _ 4 | 2 Page PROPN I-PER _ 5 | 3 and CCONJ O _ 6 | 4 Sergey PROPN B-PER _ 7 | 5 Brin PROPN I-PER _ 8 | 6 founded VERB O _ 9 | 7 Google PROPN B-ORG SpaceAfter=No 10 | 8 . PUNCT O _ 11 | 12 | # text = Microsoft was founded by Bill Gates. 13 | # relations = 1;1;5;6;founded_by 14 | 1 Microsoft PROPN B-ORG _ 15 | 2 was AUX O _ 16 | 3 founded VERB O _ 17 | 4 by ADP O _ 18 | 5 Bill PROPN B-PER _ 19 | 6 Gates PROPN I-PER SpaceAfter=No 20 | 7 . PUNCT O _ 21 | 22 | # text = Konrad Zuse was born in Berlin on 22 June 1910. 23 | # relations = 6;6;1;2;place_of_birth 24 | 1 Konrad PROPN B-PER _ 25 | 2 Zuse PROPN I-PER _ 26 | 3 was AUX O _ 27 | 4 born VERB O _ 28 | 5 in ADP O _ 29 | 6 Berlin PROPN B-LOC _ 30 | 7 on ADP O _ 31 | 8 22 NUM B-DATE _ 32 | 9 June PROPN I-DATE _ 33 | 10 1910 NUM I-DATE SpaceAfter=No 34 | 11 . PUNCT O _ 35 | 36 | # text = Joseph Weizenbaum was born in Berlin, Germany. 37 | # relations = 6;6;1;2;place_of_birth 38 | 1 Joseph PROPN B-PER _ 39 | 2 Weizenbaum PROPN I-PER _ 40 | 3 was AUX O _ 41 | 4 born VERB O _ 42 | 5 in ADP O _ 43 | 6 Berlin PROPN B-LOC _ 44 | 7 , PUNCT O _ 45 | 8 Germany PROPN B-LOC SpaceAfter=No 46 | 9 . PUNCT O _ 47 | -------------------------------------------------------------------------------- /tests/resources/tasks/conllu/train.conllup: -------------------------------------------------------------------------------- 1 | # global.columns = id form upos ner misc 2 | # text = Larry Page and Sergey Brin founded Google. 3 | # relations = 7;7;1;2;founded_by|7;7;4;5;founded_by 4 | 1 Larry PROPN B-PER _ 5 | 2 Page PROPN I-PER _ 6 | 3 and CCONJ O _ 7 | 4 Sergey PROPN B-PER _ 8 | 5 Brin PROPN I-PER _ 9 | 6 founded VERB O _ 10 | 7 Google PROPN B-ORG SpaceAfter=No 11 | 8 . PUNCT O _ 12 | 13 | # text = Microsoft was founded by Bill Gates. 14 | # relations = 1;1;5;6;founded_by 15 | 1 Microsoft PROPN B-ORG _ 16 | 2 was AUX O _ 17 | 3 founded VERB O _ 18 | 4 by ADP O _ 19 | 5 Bill PROPN B-PER _ 20 | 6 Gates PROPN I-PER SpaceAfter=No 21 | 7 . PUNCT O _ 22 | 23 | # text = Konrad Zuse was born in Berlin on 22 June 1910. 24 | # relations = 6;6;1;2;place_of_birth 25 | 1 Konrad PROPN B-PER _ 26 | 2 Zuse PROPN I-PER _ 27 | 3 was AUX O _ 28 | 4 born VERB O _ 29 | 5 in ADP O _ 30 | 6 Berlin PROPN B-LOC _ 31 | 7 on ADP O _ 32 | 8 22 NUM B-DATE _ 33 | 9 June PROPN I-DATE _ 34 | 10 1910 NUM I-DATE SpaceAfter=No 35 | 11 . PUNCT O _ 36 | 37 | # text = Joseph Weizenbaum, a professor at MIT, was born in Berlin, Germany. 38 | # relations = 12;12;1;2;place_of_birth|14;14;1;2;place_of_birth 39 | 1 Joseph PROPN B-PER _ 40 | 2 Weizenbaum PROPN I-PER SpaceAfter=No 41 | 3 , PUNCT O _ 42 | 4 a DET O _ 43 | 5 professor NOUN O _ 44 | 6 at ADP O _ 45 | 7 MIT PROPN B-ORG SpaceAfter=No 46 | 8 , PUNCT O _ 47 | 9 was AUX O _ 48 | 10 born VERB O _ 49 | 11 in ADP O _ 50 | 12 Berlin PROPN B-LOC SpaceAfter=No 51 | 13 , PUNCT O _ 52 | 14 Germany PROPN B-LOC SpaceAfter=No 53 | 15 . PUNCT O _ 54 | 55 | # text = The German-American computer scientist Joseph Weizenbaum (8 January 1923 - 5 March 2008) was born in Berlin. 56 | # relations = 21;21;7;8;place_of_birth 57 | 1 The DET O _ 58 | 2 German PROPN O SpaceAfter=No 59 | 3 - PUNCT O SpaceAfter=No 60 | 4 American PROPN O _ 61 | 5 computer PROPN O _ 62 | 6 scientist NOUN O _ 63 | 7 Joseph PROPN B-PER _ 64 | 8 Weizenbaum PROPN I-PER _ 65 | 9 ( PUNCT O SpaceAfter=No 66 | 10 8 NUM O _ 67 | 11 January PROPN O _ 68 | 12 1923 NUM O _ 69 | 13 - SYM O _ 70 | 14 5 NUM O _ 71 | 15 March PROPN O _ 72 | 16 2008 NUM O SpaceAfter=No 73 | 17 ) PUNCT O _ 74 | 18 was PRON O _ 75 | 19 born ADV O _ 76 | 20 in ADP O _ 77 | 21 Berlin PROPN B-LOC SpaceAfter=No 78 | 22 . PUNCT O _ 79 | -------------------------------------------------------------------------------- /tests/resources/tasks/conllu/universal_dependencies.conllu: -------------------------------------------------------------------------------- 1 | # text = They buy and sell books. 2 | 1 They they PRON PRP Case=Nom|Number=Plur 2 nsubj 2:nsubj|4:nsubj _ 3 | 2 buy buy VERB VBP Number=Plur|Person=3|Tense=Pres 0 root 0:root _ 4 | 3 and and CONJ CC _ 4 cc 4:cc _ 5 | 4 sell sell VERB VBP Number=Plur|Person=3|Tense=Pres 2 conj 0:root|2:conj _ 6 | 5 books book NOUN NNS Number=Plur 2 obj 2:obj|4:obj SpaceAfter=No 7 | 6 . . PUNCT . _ 2 punct 2:punct _ 8 | -------------------------------------------------------------------------------- /tests/resources/tasks/example_images/i_love_berlin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/resources/tasks/example_images/i_love_berlin.png -------------------------------------------------------------------------------- /tests/resources/tasks/fashion/eng.testa: -------------------------------------------------------------------------------- 1 | Most _ _ O 2 | wedding _ _ B-Occasion 3 | dresses _ _ B-NominalProduct 4 | , _ _ O 5 | for _ _ O 6 | example _ _ O 7 | , _ _ O 8 | are _ _ O 9 | simply _ _ O 10 | too _ _ O 11 | enormous _ _ O 12 | and _ _ O 13 | terrifyingly _ _ O 14 | loaded _ _ O 15 | with _ _ O 16 | sentimental _ _ O 17 | value _ _ O 18 | for _ _ O 19 | DIY _ _ B-ProductDesign 20 | dyeing _ _ I-ProductDesign 21 | . _ _ O -------------------------------------------------------------------------------- /tests/resources/tasks/fashion/eng.testb: -------------------------------------------------------------------------------- 1 | For _ _ O 2 | my _ _ O 3 | Nurse _ _ B-NamedOrganizationBrand 4 | Ratched _ _ I-NamedOrganizationBrand 5 | dress _ _ B-NominalProduct 6 | , _ _ O 7 | I _ _ O 8 | had _ _ O 9 | brought _ _ O 10 | two _ _ O 11 | dyeing _ _ O 12 | options _ _ O 13 | — _ _ O 14 | one _ _ O 15 | more _ _ O 16 | ambitious _ _ O 17 | than _ _ O 18 | the _ _ O 19 | other _ _ O 20 | . _ _ O -------------------------------------------------------------------------------- /tests/resources/tasks/fashion/eng.train: -------------------------------------------------------------------------------- 1 | From _ _ O 2 | the _ _ O 3 | charming _ _ O 4 | Arlésienne _ _ B-NamedPerson 5 | to _ _ O 6 | the _ _ O 7 | shepherdess _ _ B-NominalProduct 8 | in _ _ O 9 | a _ _ O 10 | fairy _ _ O 11 | tale _ _ O 12 | , _ _ O 13 | with _ _ O 14 | faille _ _ B-ProductPart 15 | , _ _ O 16 | piqué _ _ B-ProductPart 17 | , _ _ O 18 | taffeta _ _ B-ProductPart 19 | , _ _ O 20 | tulle _ _ B-ProductPart 21 | , _ _ O 22 | embroidery _ _ B-ProductPart 23 | , _ _ O 24 | lace _ _ B-ProductPart 25 | , _ _ O 26 | the _ _ O 27 | repertoire _ _ B-ProductDesign 28 | is _ _ O 29 | inexhaustible _ _ O 30 | . _ _ O 31 | 32 | 33 | 34 | 35 | Subscribe _ _ O 36 | to _ _ O 37 | Highsnobiety _ _ B-NamedOrganizationPublisher 38 | on _ _ O 39 | YouTube _ _ B-NamedOrganizationOther 40 | Eric _ _ B-NamedPerson 41 | Schoenborn _ _ I-NamedPerson 42 | and _ _ O 43 | Ed _ _ B-NamedPerson 44 | Selego _ _ I-NamedPerson 45 | have _ _ O 46 | joined _ _ O 47 | forces _ _ O 48 | with _ _ O 49 | Nocturnal _ _ B-NamedOrganizationBrand 50 | skate _ _ B-Activity 51 | shop _ _ O 52 | to _ _ O 53 | turn _ _ O 54 | Drexel _ _ B-NamedLocation 55 | University _ _ I-NamedLocation 56 | ’ _ _ O 57 | s _ _ O 58 | Leonard _ _ B-NamedLocation 59 | Pearlstein _ _ I-NamedLocation 60 | Gallery _ _ I-NamedLocation 61 | into _ _ O 62 | an _ _ O 63 | interactive _ _ O 64 | skate _ _ B-Activity 65 | pop _ _ O 66 | - _ _ O 67 | up _ _ O 68 | park _ _ O 69 | . _ _ O 70 | 71 | Philly _ _ B-NamedPerson 72 | Radness _ _ I-NamedPerson 73 | accounts _ _ O 74 | for _ _ O 75 | the _ _ O 76 | second _ _ O 77 | installment _ _ O 78 | in _ _ O 79 | the _ _ O 80 | Phenomenal _ _ O 81 | Radness _ _ O 82 | project _ _ O 83 | , _ _ O 84 | after _ _ O 85 | its _ _ O 86 | debut _ _ S-Occasion 87 | in _ _ O 88 | Miami _ _ B-NamedLocation 89 | a _ _ O 90 | few _ _ O 91 | years _ _ O 92 | ago _ _ O 93 | . _ _ O 94 | 95 | Milan _ _ B-NamedLocation 96 | was _ _ O 97 | all _ _ O 98 | the _ _ O 99 | really _ _ O 100 | big _ _ O 101 | girls _ _ O 102 | . _ _ O 103 | 104 | It _ _ O 105 | was _ _ O 106 | the _ _ O 107 | best _ _ O 108 | ! _ _ O 109 | 110 | We _ _ O 111 | go _ _ O 112 | to _ _ O 113 | flea _ _ O 114 | markets _ _ O 115 | together _ _ O 116 | when _ _ O 117 | we _ _ O 118 | ' _ _ O 119 | re _ _ O 120 | in _ _ O 121 | LA _ _ B-NamedLocation 122 | . _ _ O -------------------------------------------------------------------------------- /tests/resources/tasks/fashion_disjunct/eng.testa: -------------------------------------------------------------------------------- 1 | Most _ _ O 2 | wedding _ _ B-Occasion 3 | dresses _ _ B-NominalProduct 4 | , _ _ O 5 | for _ _ O 6 | example _ _ O 7 | , _ _ O 8 | are _ _ O 9 | simply _ _ O 10 | too _ _ O 11 | enormous _ _ O 12 | and _ _ O 13 | terrifyingly _ _ B-CreativeWord 14 | loaded _ _ O 15 | with _ _ O 16 | sentimental _ _ O 17 | value _ _ O 18 | for _ _ O 19 | DIY _ _ B-ProductDesign 20 | dyeing _ _ I-ProductDesign 21 | . _ _ O -------------------------------------------------------------------------------- /tests/resources/tasks/fashion_disjunct/eng.testb: -------------------------------------------------------------------------------- 1 | For _ _ O 2 | my _ _ O 3 | Nurse _ _ B-NamedOrganizationBrand 4 | Ratched _ _ I-NamedOrganizationBrand 5 | dress _ _ B-NominalProduct 6 | , _ _ O 7 | I _ _ O 8 | had _ _ O 9 | brought _ _ O 10 | two _ _ O 11 | dyeing _ _ O 12 | options _ _ O 13 | — _ _ O 14 | one _ _ O 15 | more _ _ O 16 | ambitious _ _ B-Ambitiousness 17 | than _ _ O 18 | the _ _ O 19 | other _ _ O 20 | . _ _ O -------------------------------------------------------------------------------- /tests/resources/tasks/fashion_disjunct/eng.train: -------------------------------------------------------------------------------- 1 | From _ _ O 2 | the _ _ O 3 | charming _ _ O 4 | Arlésienne _ _ B-NamedPerson 5 | to _ _ O 6 | the _ _ O 7 | shepherdess _ _ O 8 | in _ _ O 9 | a _ _ O 10 | fairy _ _ O 11 | tale _ _ O 12 | , _ _ O 13 | with _ _ O 14 | faille _ _ B-ProductPart 15 | , _ _ O 16 | piqué _ _ B-ProductPart 17 | , _ _ O 18 | taffeta _ _ B-ProductPart 19 | , _ _ O 20 | tulle _ _ B-ProductPart 21 | , _ _ O 22 | embroidery _ _ B-ProductPart 23 | , _ _ O 24 | lace _ _ B-ProductPart 25 | , _ _ O 26 | the _ _ O 27 | repertoire _ _ O 28 | is _ _ O 29 | inexhaustible _ _ O 30 | . _ _ O 31 | 32 | 33 | 34 | 35 | Subscribe _ _ O 36 | to _ _ O 37 | Highsnobiety _ _ B-NamedOrganizationPublisher 38 | on _ _ O 39 | YouTube _ _ B-NamedOrganizationOther 40 | Eric _ _ B-NamedPerson 41 | Schoenborn _ _ I-NamedPerson 42 | and _ _ O 43 | Ed _ _ B-NamedPerson 44 | Selego _ _ I-NamedPerson 45 | have _ _ O 46 | joined _ _ O 47 | forces _ _ O 48 | with _ _ O 49 | Nocturnal _ _ B-NamedOrganizationBrand 50 | skate _ _ B-Activity 51 | shop _ _ O 52 | to _ _ O 53 | turn _ _ O 54 | Drexel _ _ B-NamedLocation 55 | University _ _ I-NamedLocation 56 | ’ _ _ O 57 | s _ _ O 58 | Leonard _ _ B-NamedLocation 59 | Pearlstein _ _ I-NamedLocation 60 | Gallery _ _ I-NamedLocation 61 | into _ _ O 62 | an _ _ O 63 | interactive _ _ O 64 | skate _ _ B-Activity 65 | pop _ _ O 66 | - _ _ O 67 | up _ _ O 68 | park _ _ O 69 | . _ _ O 70 | 71 | Philly _ _ B-NamedPerson 72 | Radness _ _ I-NamedPerson 73 | accounts _ _ O 74 | for _ _ O 75 | the _ _ O 76 | second _ _ O 77 | installment _ _ O 78 | in _ _ O 79 | the _ _ O 80 | Phenomenal _ _ O 81 | Radness _ _ O 82 | project _ _ O 83 | , _ _ O 84 | after _ _ O 85 | its _ _ O 86 | debut _ _ O 87 | in _ _ O 88 | Miami _ _ B-NamedLocation 89 | a _ _ O 90 | few _ _ O 91 | years _ _ O 92 | ago _ _ O 93 | . _ _ O 94 | 95 | Milan _ _ B-NamedLocation 96 | was _ _ O 97 | all _ _ O 98 | the _ _ O 99 | really _ _ O 100 | big _ _ O 101 | girls _ _ O 102 | . _ _ O 103 | 104 | It _ _ O 105 | was _ _ O 106 | the _ _ O 107 | best _ _ O 108 | ! _ _ O 109 | 110 | We _ _ O 111 | go _ _ O 112 | to _ _ O 113 | flea _ _ O 114 | markets _ _ O 115 | together _ _ O 116 | when _ _ O 117 | we _ _ O 118 | ' _ _ O 119 | re _ _ O 120 | in _ _ O 121 | LA _ _ B-NamedLocation 122 | . _ _ O -------------------------------------------------------------------------------- /tests/resources/tasks/fashion_nodev/test.tsv: -------------------------------------------------------------------------------- 1 | Most _ _ O 2 | wedding _ _ B-Occasion 3 | dresses _ _ B-NominalProduct 4 | , _ _ O 5 | for _ _ O 6 | example _ _ O 7 | , _ _ O 8 | are _ _ O 9 | simply _ _ O 10 | too _ _ O 11 | enormous _ _ O 12 | and _ _ O 13 | terrifyingly _ _ O 14 | loaded _ _ O 15 | with _ _ O 16 | sentimental _ _ O 17 | value _ _ O 18 | for _ _ O 19 | DIY _ _ B-ProductDesign 20 | dyeing _ _ I-ProductDesign 21 | . _ _ O -------------------------------------------------------------------------------- /tests/resources/tasks/fashion_nodev/train.tsv: -------------------------------------------------------------------------------- 1 | From _ _ O 2 | the _ _ O 3 | charming _ _ O 4 | Arlésienne _ _ B-NamedPerson 5 | to _ _ O 6 | the _ _ O 7 | shepherdess _ _ O 8 | in _ _ O 9 | a _ _ O 10 | fairy _ _ O 11 | tale _ _ O 12 | , _ _ O 13 | with _ _ O 14 | faille _ _ B-ProductPart 15 | , _ _ O 16 | piqué _ _ B-ProductPart 17 | , _ _ O 18 | taffeta _ _ B-ProductPart 19 | , _ _ O 20 | tulle _ _ B-ProductPart 21 | , _ _ O 22 | embroidery _ _ B-ProductPart 23 | , _ _ O 24 | lace _ _ B-ProductPart 25 | , _ _ O 26 | the _ _ O 27 | repertoire _ _ O 28 | is _ _ O 29 | inexhaustible _ _ O 30 | . _ _ O 31 | 32 | 33 | 34 | 35 | Subscribe _ _ O 36 | to _ _ O 37 | Highsnobiety _ _ B-NamedOrganizationPublisher 38 | on _ _ O 39 | YouTube _ _ B-NamedOrganizationOther 40 | Eric _ _ B-NamedPerson 41 | Schoenborn _ _ I-NamedPerson 42 | and _ _ O 43 | Ed _ _ B-NamedPerson 44 | Selego _ _ I-NamedPerson 45 | have _ _ O 46 | joined _ _ O 47 | forces _ _ O 48 | with _ _ O 49 | Nocturnal _ _ B-NamedOrganizationBrand 50 | skate _ _ B-Activity 51 | shop _ _ O 52 | to _ _ O 53 | turn _ _ O 54 | Drexel _ _ B-NamedLocation 55 | University _ _ I-NamedLocation 56 | ’ _ _ O 57 | s _ _ O 58 | Leonard _ _ B-NamedLocation 59 | Pearlstein _ _ I-NamedLocation 60 | Gallery _ _ I-NamedLocation 61 | into _ _ O 62 | an _ _ O 63 | interactive _ _ O 64 | skate _ _ B-Activity 65 | pop _ _ O 66 | - _ _ O 67 | up _ _ O 68 | park _ _ O 69 | . _ _ O 70 | 71 | Philly _ _ B-NamedPerson 72 | Radness _ _ I-NamedPerson 73 | accounts _ _ O 74 | for _ _ O 75 | the _ _ O 76 | second _ _ O 77 | installment _ _ O 78 | in _ _ O 79 | the _ _ O 80 | Phenomenal _ _ O 81 | Radness _ _ O 82 | project _ _ O 83 | , _ _ O 84 | after _ _ O 85 | its _ _ O 86 | debut _ _ O 87 | in _ _ O 88 | Miami _ _ B-NamedLocation 89 | a _ _ O 90 | few _ _ O 91 | years _ _ O 92 | ago _ _ O 93 | . _ _ O 94 | 95 | Milan _ _ B-NamedLocation 96 | was _ _ O 97 | all _ _ O 98 | the _ _ O 99 | really _ _ O 100 | big _ _ O 101 | girls _ _ O 102 | . _ _ O 103 | 104 | It _ _ O 105 | was _ _ O 106 | the _ _ O 107 | best _ _ O 108 | ! _ _ O 109 | 110 | We _ _ O 111 | go _ _ O 112 | to _ _ O 113 | flea _ _ O 114 | markets _ _ O 115 | together _ _ O 116 | when _ _ O 117 | we _ _ O 118 | ' _ _ O 119 | re _ _ O 120 | in _ _ O 121 | LA _ _ B-NamedLocation 122 | . _ _ O -------------------------------------------------------------------------------- /tests/resources/tasks/fewshot_conll/1shot.txt: -------------------------------------------------------------------------------- 1 | Three O 2 | Russian B-MISC 3 | servicemen O 4 | were O 5 | killed O 6 | on O 7 | Saturday O 8 | when O 9 | unidentified O 10 | gunmen O 11 | attacked O 12 | guards O 13 | at O 14 | an O 15 | anti-aircraft O 16 | installation O 17 | outside O 18 | Moscow B-LOC 19 | , O 20 | Interfax B-ORG 21 | news O 22 | agency O 23 | said O 24 | . O 25 | 26 | " O 27 | I O 28 | think O 29 | that O 30 | , O 31 | on O 32 | balance O 33 | , O 34 | it O 35 | is O 36 | looking O 37 | a O 38 | little O 39 | bit O 40 | on O 41 | the O 42 | strong O 43 | side O 44 | , O 45 | " O 46 | Lindsey B-PER 47 | said O 48 | . O 49 | -------------------------------------------------------------------------------- /tests/resources/tasks/imdb/README.md: -------------------------------------------------------------------------------- 1 | ## IMDB 2 | 3 | Data is taken from [here](http://ai.stanford.edu/~amaas/data/sentiment/). 4 | 5 | The dataset contains data for a binary sentiment classification. 6 | We took a small random sample and converted it to the expected format of our data fetcher: 7 | ``` 8 | __label__ 9 | ``` 10 | 11 | #### Publications Using the Dataset 12 | 13 | * Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng, and Christopher Potts. (2011). Learning Word Vectors for Sentiment Analysis. The 49th Annual Meeting of the Association for Computational Linguistics (ACL 2011). 14 | -------------------------------------------------------------------------------- /tests/resources/tasks/imdb_underscore/README.md: -------------------------------------------------------------------------------- 1 | ## IMDB 2 | 3 | Data is taken from [here](http://ai.stanford.edu/~amaas/data/sentiment/). 4 | 5 | The dataset contains data for a binary sentiment classification. 6 | We took a small random sample and converted it to the expected format of our data fetcher: 7 | ``` 8 | __label__ 9 | ``` 10 | 11 | #### Publications Using the Dataset 12 | 13 | * Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng, and Christopher Potts. (2011). Learning Word Vectors for Sentiment Analysis. The 49th Annual Meeting of the Association for Computational Linguistics (ACL 2011). 14 | -------------------------------------------------------------------------------- /tests/resources/tasks/jsonl/testa.jsonl: -------------------------------------------------------------------------------- 1 | {"id": 101319, "data": "This is New Berlin", "label": [[8, 18, "LOC"]], "metadata": [["from", 123]]} 2 | {"id": 101320, "data": "EU rejects German call to boycott British lamb .", "label": [[0, 2, "ORG"], [11, 17, "MISC"], [34, 46, "MISC"]], "metadata": [["from", 124]]} 3 | {"id": 101321, "data": "Peter Blackburn", "label": [[0, 15, "PER"]], "metadata": [["from", 125]]} 4 | -------------------------------------------------------------------------------- /tests/resources/tasks/jsonl/testb.jsonl: -------------------------------------------------------------------------------- 1 | {"id": 101319, "data": "This is New Berlin", "label": [[8, 18, "LOC"]]} 2 | {"id": 101320, "data": "EU rejects German call to boycott British lamb .", "label": [[0, 2, "ORG"], [11, 17, "MISC"], [34, 46, "MISC"]]} 3 | {"id": 101321, "data": "Peter Blackburn", "label": [[0, 15, "PER"]]} 4 | -------------------------------------------------------------------------------- /tests/resources/tasks/jsonl/train.jsonl: -------------------------------------------------------------------------------- 1 | {"id": 101319, "data": "This is New Berlin", "label": [[8, 18, "LOC"]]} 2 | {"id": 101319, "data": "This is New Berlin.", "label": [[8, 18, "LOC"]]} 3 | {"id": 101319, "data": "This is New Berlin.", "label": [[8, 19, "LOC"]]} 4 | {"id": 101320, "data": "EU rejects German call to boycott British lamb .", "label": [[0, 2, "ORG"], [11, 17, "MISC"], [34, 46, "MISC"]]} 5 | {"id": 101321, "data": "Peter Blackburn", "label": [[0, 15, "PER"]]} 6 | -------------------------------------------------------------------------------- /tests/resources/tasks/multi_class/dev.txt: -------------------------------------------------------------------------------- 1 | __label__apple apple 2 | __label__tv tv 3 | __label__guitar guitar 4 | __label__apple __label__tv apple tv -------------------------------------------------------------------------------- /tests/resources/tasks/multi_class/test.txt: -------------------------------------------------------------------------------- 1 | __label__guitar guitar 2 | __label__apple apple 3 | __label__tv tv 4 | __label__apple __label__tv apple tv 5 | __label__apple __label__guitar apple tv -------------------------------------------------------------------------------- /tests/resources/tasks/multi_class/train.txt: -------------------------------------------------------------------------------- 1 | __label__tv tv 2 | __label__apple __label__tv apple tv 3 | __label__apple apple 4 | __label__tv tv 5 | __label__apple __label__tv apple tv 6 | __label__guitar guitar 7 | __label__guitar guitar -------------------------------------------------------------------------------- /tests/resources/tasks/multi_class_negative_examples/dev.txt: -------------------------------------------------------------------------------- 1 | __label__apple apple 2 | __label__tv tv 3 | __label__guitar guitar 4 | __label__apple __label__tv apple tv 5 | dev example without labels 6 | -------------------------------------------------------------------------------- /tests/resources/tasks/multi_class_negative_examples/test.txt: -------------------------------------------------------------------------------- 1 | __label__guitar guitar 2 | __label__apple apple 3 | __label__tv tv 4 | __label__apple __label__tv apple tv 5 | __label__apple __label__guitar apple tv 6 | test example without labels 7 | -------------------------------------------------------------------------------- /tests/resources/tasks/multi_class_negative_examples/train.txt: -------------------------------------------------------------------------------- 1 | __label__tv tv 2 | __label__apple __label__tv apple tv 3 | __label__apple apple 4 | __label__tv tv 5 | __label__apple __label__tv apple tv 6 | __label__guitar guitar 7 | __label__guitar guitar 8 | train example without labels 9 | -------------------------------------------------------------------------------- /tests/resources/tasks/ner_german_germeval/NER-de-dev.tsv: -------------------------------------------------------------------------------- 1 | # http://de.wikipedia.org/wiki/Toyota_Crown [2009-08-13] 2 | 1 1980 O O 3 | 2 kam O O 4 | 3 der O O 5 | 4 Crown B-OTH O 6 | 5 als O O 7 | 6 Versuch O O 8 | 7 von O O 9 | 8 Toyota B-ORG O 10 | 9 , O O 11 | 10 sich O O 12 | 11 in O O 13 | 12 der O O 14 | 13 Oberen O O 15 | 14 Mittelklasse O O 16 | 15 zu O O 17 | 16 etablieren O O 18 | 17 , O O 19 | 18 auch O O 20 | 19 nach O O 21 | 20 Deutschland B-LOC O 22 | 21 . O O 23 | -------------------------------------------------------------------------------- /tests/resources/tasks/ner_german_germeval/NER-de-test.tsv: -------------------------------------------------------------------------------- 1 | # http://de.wikipedia.org/wiki/Schönburg_(Rhein) [2009-10-23] 2 | 1 1951 O O 3 | 2 bis O O 4 | 3 1953 O O 5 | 4 wurde O O 6 | 5 der O O 7 | 6 nördliche O O 8 | 7 Teil O O 9 | 8 als O O 10 | 9 Jugendburg O O 11 | 10 des O O 12 | 11 Kolpingwerkes B-OTH O 13 | 12 gebaut O O 14 | 13 . O O -------------------------------------------------------------------------------- /tests/resources/tasks/ner_german_germeval/NER-de-train.tsv: -------------------------------------------------------------------------------- 1 | # n-tv.de vom 26.02.2005 [2005-02-26] 2 | 1 Schartau B-PER O 3 | 2 sagte O O 4 | 3 dem O O 5 | 4 " O O 6 | 5 Tagesspiegel B-ORG O 7 | 6 " O O 8 | 7 vom O O 9 | 8 Freitag O O 10 | 9 , O O 11 | 10 Fischer B-PER O 12 | 11 sei O O 13 | 12 " O O 14 | 13 in O O 15 | 14 einer O O 16 | 15 Weise O O 17 | 16 aufgetreten O O 18 | 17 , O O 19 | 18 die O O 20 | 19 alles O O 21 | 20 andere O O 22 | 21 als O O 23 | 22 überzeugend O O 24 | 23 war O O 25 | 24 " O O 26 | 25 . O O 27 | 28 | # welt.de vom 29.10.2005 [2005-10-29] 29 | 1 Firmengründer O O 30 | 2 Wolf B-PER O 31 | 3 Peter I-PER O 32 | 4 Bree I-PER O 33 | 5 arbeitete O O 34 | 6 Anfang O O 35 | 7 der O O 36 | 8 siebziger O O 37 | 9 Jahre O O 38 | 10 als O O 39 | 11 Möbelvertreter O O 40 | 12 , O O 41 | 13 als O O 42 | 14 er O O 43 | 15 einen O O 44 | 16 fliegenden O O 45 | 17 Händler O O 46 | 18 aus O O 47 | 19 dem O O 48 | 20 Libanon B-LOC O 49 | 21 traf O O 50 | 22 . O O -------------------------------------------------------------------------------- /tests/resources/tasks/ontonotes/tiny-conll-2012.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/resources/tasks/ontonotes/tiny-conll-2012.zip -------------------------------------------------------------------------------- /tests/resources/tasks/regression/README.md: -------------------------------------------------------------------------------- 1 | ## REGRESSION 2 | 3 | Data is taken from [here](http://saifmohammad.com/WebPages/EmotionIntensity-SharedTask.html). 4 | 5 | The dataset contains a collection of tweets with joy intensity value. 6 | We took the joy dataset and converted it to the expected format of our data fetcher: 7 | ``` 8 | __label__ 9 | ``` 10 | 11 | #### Publication About the Dataset 12 | 13 | * Emotion Intensities in Tweets. Saif M. Mohammad and Felipe Bravo-Marquez. In Proceedings of the sixth joint conference on lexical and computational semantics (*Sem), August 2017, Vancouver, Canada. 14 | * WASSA-2017 Shared Task on Emotion Intensity. Saif M. Mohammad and Felipe Bravo-Marquez. In Proceedings of the EMNLP 2017 Workshop on Computational Approaches to Subjectivity, Sentiment, and Social Media (WASSA), September 2017, Copenhagen, Denmark. 15 | -------------------------------------------------------------------------------- /tests/resources/tasks/span_labels/span_first.txt: -------------------------------------------------------------------------------- 1 | Vgl. O 2 | Rundschreiben O 3 | RAB PARTA 4 | 1/2010 YEAR 5 | Rz MISC 6 | 8. MISC -------------------------------------------------------------------------------- /tests/resources/tasks/span_labels/span_second.txt: -------------------------------------------------------------------------------- 1 | -DOCSTART- 2 | 3 | Vgl. O 4 | Rundschreiben O 5 | RAB PARTA 6 | 1/2010 YEAR 7 | Rz MISC 8 | 8. MISC -------------------------------------------------------------------------------- /tests/resources/tasks/span_labels/span_third.txt: -------------------------------------------------------------------------------- 1 | -DOCSTART- 2 | 3 | Rundschreiben O 4 | 5 | Vgl. O 6 | Rundschreiben O 7 | RAB PARTA 8 | 1/2010 YEAR 9 | Rz MISC 10 | 8. MISC -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_bioes/dev.txt: -------------------------------------------------------------------------------- 1 | this O 2 | is O 3 | New B-LOC 4 | York I-LOC 5 | 6 | this O 7 | is O 8 | Berlin B-LOC 9 | 10 | here O 11 | is O 12 | New B-LOC 13 | York I-LOC 14 | 15 | here O 16 | is O 17 | Berlin B-LOC 18 | 19 | I O 20 | like O 21 | New B-LOC 22 | York I-LOC 23 | 24 | I O 25 | like O 26 | Berlin B-LOC 27 | 28 | we O 29 | like O 30 | New B-LOC 31 | York I-LOC 32 | 33 | we O 34 | like O 35 | Berlin B-LOC 36 | -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_bioes/test.txt: -------------------------------------------------------------------------------- 1 | this O 2 | is O 3 | New B-LOC 4 | York I-LOC 5 | 6 | this O 7 | is O 8 | Berlin B-LOC 9 | 10 | here O 11 | is O 12 | New B-LOC 13 | York I-LOC 14 | 15 | here O 16 | is O 17 | Berlin B-LOC 18 | 19 | I O 20 | like O 21 | New B-LOC 22 | York I-LOC 23 | 24 | I O 25 | like O 26 | Berlin B-LOC 27 | 28 | we O 29 | like O 30 | New B-LOC 31 | York I-LOC 32 | 33 | we O 34 | like O 35 | Berlin B-LOC 36 | -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_bioes/train.txt: -------------------------------------------------------------------------------- 1 | this O 2 | is O 3 | New B-LOC 4 | York I-LOC 5 | 6 | this O 7 | is O 8 | Berlin B-LOC 9 | 10 | here O 11 | is O 12 | New B-LOC 13 | York I-LOC 14 | 15 | here O 16 | is O 17 | Berlin B-LOC 18 | 19 | I O 20 | like O 21 | New B-LOC 22 | York I-LOC 23 | 24 | I O 25 | like O 26 | Berlin B-LOC 27 | 28 | we O 29 | like O 30 | New B-LOC 31 | York I-LOC 32 | 33 | we O 34 | like O 35 | Berlin B-LOC 36 | 37 | this O 38 | is O 39 | New B-LOC 40 | York I-LOC 41 | 42 | this O 43 | is O 44 | Berlin B-LOC 45 | 46 | here O 47 | is O 48 | New B-LOC 49 | York I-LOC 50 | 51 | here O 52 | is O 53 | Berlin B-LOC 54 | 55 | I O 56 | like O 57 | New B-LOC 58 | York I-LOC 59 | 60 | I O 61 | like O 62 | Berlin B-LOC 63 | 64 | we O 65 | like O 66 | New B-LOC 67 | York I-LOC 68 | 69 | we O 70 | like O 71 | Berlin B-LOC -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_bioes_with_boundaries/dev.txt: -------------------------------------------------------------------------------- 1 | this O 2 | is O 3 | New B-LOC 4 | York I-LOC 5 | 6 | here O 7 | is O 8 | New B-LOC 9 | York I-LOC 10 | 11 | I O 12 | like O 13 | New B-LOC 14 | York I-LOC 15 | 16 | we O 17 | like O 18 | New B-LOC 19 | York I-LOC 20 | 21 | -DOCSTART- 22 | 23 | this O 24 | is O 25 | Berlin B-LOC 26 | 27 | here O 28 | is O 29 | Berlin B-LOC 30 | 31 | I O 32 | like O 33 | Berlin B-LOC 34 | 35 | we O 36 | like O 37 | Berlin B-LOC -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_bioes_with_boundaries/test.txt: -------------------------------------------------------------------------------- 1 | this O 2 | is O 3 | New B-LOC 4 | York I-LOC 5 | 6 | here O 7 | is O 8 | New B-LOC 9 | York I-LOC 10 | 11 | I O 12 | like O 13 | New B-LOC 14 | York I-LOC 15 | 16 | we O 17 | like O 18 | New B-LOC 19 | York I-LOC 20 | 21 | -DOCSTART- 22 | 23 | this O 24 | is O 25 | Berlin B-LOC 26 | 27 | here O 28 | is O 29 | Berlin B-LOC 30 | 31 | I O 32 | like O 33 | Berlin B-LOC 34 | 35 | we O 36 | like O 37 | Berlin B-LOC 38 | 39 | -DOCSTART- -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_bioes_with_boundaries/train.txt: -------------------------------------------------------------------------------- 1 | this O 2 | is O 3 | New B-LOC 4 | York I-LOC 5 | 6 | here O 7 | is O 8 | New B-LOC 9 | York I-LOC 10 | 11 | I O 12 | like O 13 | New B-LOC 14 | York I-LOC 15 | 16 | we O 17 | like O 18 | New B-LOC 19 | York I-LOC 20 | 21 | -DOCSTART- 22 | 23 | this O 24 | is O 25 | Berlin B-LOC 26 | 27 | here O 28 | is O 29 | Berlin B-LOC 30 | 31 | I O 32 | like O 33 | Berlin B-LOC 34 | 35 | we O 36 | like O 37 | Berlin B-LOC 38 | 39 | -DOCSTART- 40 | 41 | this O 42 | is O 43 | New B-LOC 44 | York I-LOC 45 | 46 | here O 47 | is O 48 | New B-LOC 49 | York I-LOC 50 | 51 | I O 52 | like O 53 | New B-LOC 54 | York I-LOC 55 | 56 | we O 57 | like O 58 | New B-LOC 59 | York I-LOC -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_text_classification_multi/dev.txt: -------------------------------------------------------------------------------- 1 | __label__pizza this is pizza 2 | __label__Berlin this is Berlin 3 | __label__Berlin __label__pizza this is Berlin and pizza 4 | __label__pizza here is pizza 5 | __label__Berlin here is Berlin 6 | __label__Berlin __label__pizza here is Berlin and pizza 7 | __label__pizza I like pizza 8 | __label__Berlin I like Berlin 9 | __label__Berlin __label__pizza I like Berlin and pizza 10 | __label__pizza we like pizza 11 | __label__Berlin we like Berlin 12 | __label__Berlin __label__pizza we like Berlin and pizza -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_text_classification_multi/test.txt: -------------------------------------------------------------------------------- 1 | __label__pizza this is pizza 2 | __label__Berlin this is Berlin 3 | __label__Berlin __label__pizza this is Berlin and pizza 4 | __label__pizza here is pizza 5 | __label__Berlin here is Berlin 6 | __label__Berlin __label__pizza here is Berlin and pizza 7 | __label__pizza I like pizza 8 | __label__Berlin I like Berlin 9 | __label__Berlin __label__pizza I like Berlin and pizza 10 | __label__pizza we like pizza 11 | __label__Berlin we like Berlin 12 | __label__Berlin __label__pizza we like Berlin and pizza -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_text_classification_multi/train.txt: -------------------------------------------------------------------------------- 1 | __label__pizza this is pizza 2 | __label__Berlin this is Berlin 3 | __label__Berlin __label__pizza this is Berlin and pizza 4 | __label__pizza here is pizza 5 | __label__Berlin here is Berlin 6 | __label__Berlin __label__pizza here is Berlin and pizza 7 | __label__pizza I like pizza 8 | __label__Berlin I like Berlin 9 | __label__Berlin __label__pizza I like Berlin and pizza 10 | __label__pizza we like pizza 11 | __label__Berlin we like Berlin 12 | __label__Berlin __label__pizza we like Berlin and pizza 13 | __label__pizza this is pizza 14 | __label__Berlin this is Berlin 15 | __label__Berlin __label__pizza this is Berlin and pizza 16 | __label__pizza here is pizza 17 | __label__Berlin here is Berlin 18 | __label__Berlin __label__pizza here is Berlin and pizza 19 | __label__pizza I like pizza 20 | __label__Berlin I like Berlin 21 | __label__Berlin __label__pizza I like Berlin and pizza 22 | __label__pizza we like pizza 23 | __label__Berlin we like Berlin 24 | __label__Berlin __label__pizza we like Berlin and pizza -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_text_classification_single/dev.txt: -------------------------------------------------------------------------------- 1 | __label__New_York this is New York 2 | __label__Berlin this is Berlin 3 | __label__New_York here is New York 4 | __label__Berlin here is Berlin 5 | __label__New_York I like New York 6 | __label__Berlin I like Berlin 7 | __label__New_York we like New York 8 | __label__Berlin we like Berlin 9 | -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_text_classification_single/test.txt: -------------------------------------------------------------------------------- 1 | __label__New_York this is New York 2 | __label__Berlin this is Berlin 3 | __label__New_York here is New York 4 | __label__Berlin here is Berlin 5 | __label__New_York I like New York 6 | __label__Berlin I like Berlin 7 | __label__New_York we like New York 8 | __label__Berlin we like Berlin 9 | -------------------------------------------------------------------------------- /tests/resources/tasks/trivial/trivial_text_classification_single/train.txt: -------------------------------------------------------------------------------- 1 | __label__New_York this is New York 2 | __label__Berlin this is Berlin 3 | __label__New_York here is New York 4 | __label__Berlin here is Berlin 5 | __label__New_York I like New York 6 | __label__Berlin I like Berlin 7 | __label__New_York we like New York 8 | __label__Berlin we like Berlin 9 | __label__New_York this is New York 10 | __label__Berlin this is Berlin 11 | __label__New_York here is New York 12 | __label__Berlin here is Berlin 13 | __label__New_York I like New York 14 | __label__Berlin I like Berlin 15 | __label__New_York we like New York 16 | __label__Berlin we like Berlin -------------------------------------------------------------------------------- /tests/resources/tasks/ud_english/en_ewt-ud-dev.conllu: -------------------------------------------------------------------------------- 1 | # newdoc id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713 2 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0001 3 | # text = From the AP comes this story : 4 | 1 From from ADP IN _ 3 case 3:case _ 5 | 2 the the DET DT Definite=Def|PronType=Art 3 det 3:det _ 6 | 3 AP AP PROPN NNP Number=Sing 4 obl 4:obl:from _ 7 | 4 comes come VERB VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 0 root 0:root _ 8 | 5 this this DET DT Number=Sing|PronType=Dem 6 det 6:det _ 9 | 6 story story NOUN NN Number=Sing 4 nsubj 4:nsubj _ 10 | 7 : : PUNCT : _ 4 punct 4:punct _ 11 | 12 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0002 13 | # text = President Bush on Tuesday nominated two individuals to replace retiring jurists on federal courts in the Washington area. 14 | 1 President President PROPN NNP Number=Sing 5 nsubj 5:nsubj _ 15 | 2 Bush Bush PROPN NNP Number=Sing 1 flat 1:flat _ 16 | 3 on on ADP IN _ 4 case 4:case _ 17 | 4 Tuesday Tuesday PROPN NNP Number=Sing 5 obl 5:obl:on _ 18 | 5 nominated nominate VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 0 root 0:root _ 19 | 6 two two NUM CD NumType=Card 7 nummod 7:nummod _ 20 | 7 individuals individual NOUN NNS Number=Plur 5 obj 5:obj _ 21 | 8 to to PART TO _ 9 mark 9:mark _ 22 | 9 replace replace VERB VB VerbForm=Inf 5 advcl 5:advcl:to _ 23 | 10 retiring retire VERB VBG VerbForm=Ger 11 amod 11:amod _ 24 | 11 jurists jurist NOUN NNS Number=Plur 9 obj 9:obj _ 25 | 12 on on ADP IN _ 14 case 14:case _ 26 | 13 federal federal ADJ JJ Degree=Pos 14 amod 14:amod _ 27 | 14 courts court NOUN NNS Number=Plur 11 nmod 11:nmod:on _ 28 | 15 in in ADP IN _ 18 case 18:case _ 29 | 16 the the DET DT Definite=Def|PronType=Art 18 det 18:det _ 30 | 17 Washington Washington PROPN NNP Number=Sing 18 compound 18:compound _ 31 | 18 area area NOUN NN Number=Sing 14 nmod 14:nmod:in SpaceAfter=No 32 | 19 . . PUNCT . _ 5 punct 5:punct _ -------------------------------------------------------------------------------- /tests/resources/tasks/up_english/en_ewt-up-dev.conllu: -------------------------------------------------------------------------------- 1 | # newdoc id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713 2 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0001 3 | # text = From the AP comes this story : 4 | 1 From from ADP IN _ 3 case 3:case _ _ _ 5 | 2 the the DET DT Definite=Def|PronType=Art 3 det 3:det _ _ _ 6 | 3 AP AP PROPN NNP Number=Sing 4 obl 4:obl:from _ _ ARG2 7 | 4 comes come VERB VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 0 root 0:root _ come.03 V 8 | 5 this this DET DT Number=Sing|PronType=Dem 6 det 6:det _ _ _ 9 | 6 story story NOUN NN Number=Sing 4 nsubj 4:nsubj _ _ ARG1 10 | 7 : : PUNCT : _ 4 punct 4:punct _ _ _ 11 | 12 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0002 13 | # text = President Bush on Tuesday nominated two individuals to replace retiring jurists on federal courts in the Washington area. 14 | 1 President President PROPN NNP Number=Sing 5 nsubj 5:nsubj _ _ ARG0 _ 15 | 2 Bush Bush PROPN NNP Number=Sing 1 flat 1:flat _ _ _ _ 16 | 3 on on ADP IN _ 4 case 4:case _ _ _ _ 17 | 4 Tuesday Tuesday PROPN NNP Number=Sing 5 obl 5:obl:on _ _ ARGM-TMP _ 18 | 5 nominated nominate VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 0 root 0:root _ nominate.01 V _ 19 | 6 two two NUM CD NumType=Card 7 nummod 7:nummod _ _ _ _ 20 | 7 individuals individual NOUN NNS Number=Plur 5 obj 5:obj _ _ ARG1 ARG0 21 | 8 to to PART TO _ 9 mark 9:mark _ _ _ _ 22 | 9 replace replace VERB VB VerbForm=Inf 5 advcl 5:advcl:to _ replace.01 ARG2 V 23 | 10 retiring retire VERB VBG VerbForm=Ger 11 amod 11:amod _ _ _ _ 24 | 11 jurists jurist NOUN NNS Number=Plur 9 obj 9:obj _ _ _ ARG1 25 | 12 on on ADP IN _ 14 case 14:case _ _ _ _ 26 | 13 federal federal ADJ JJ Degree=Pos 14 amod 14:amod _ _ _ _ 27 | 14 courts court NOUN NNS Number=Plur 11 nmod 11:nmod:on _ _ _ _ 28 | 15 in in ADP IN _ 18 case 18:case _ _ _ _ 29 | 16 the the DET DT Definite=Def|PronType=Art 18 det 18:det _ _ _ _ 30 | 17 Washington Washington PROPN NNP Number=Sing 18 compound 18:compound _ _ _ _ 31 | 18 area area NOUN NN Number=Sing 14 nmod 14:nmod:in SpaceAfter=No _ _ _ 32 | 19 . . PUNCT . _ 5 punct 5:punct _ _ _ _ -------------------------------------------------------------------------------- /tests/resources/tasks/up_english/en_ewt-up-test.conllu: -------------------------------------------------------------------------------- 1 | # newdoc id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200 2 | # sent_id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200-0001 3 | # text = What if Google Morphed Into GoogleOS? 4 | 1 What what PRON WP PronType=Int 0 root 0:root _ _ _ 5 | 2 if if SCONJ IN _ 4 mark 4:mark _ _ _ 6 | 3 Google Google PROPN NNP Number=Sing 4 nsubj 4:nsubj _ _ ARG1 7 | 4 Morphed morph VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 1 advcl 1:advcl:if _ morph.01 V 8 | 5 Into into ADP IN _ 6 case 6:case _ _ _ 9 | 6 GoogleOS GoogleOS PROPN NNP Number=Sing 4 obl 4:obl:into SpaceAfter=No _ ARG2 10 | 7 ? ? PUNCT . _ 4 punct 4:punct _ _ _ 11 | 12 | # sent_id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200-0002 13 | # text = What if Google expanded on its search-engine (and now e-mail) wares into a full-fledged operating system? 14 | 1 What what PRON WP PronType=Int 0 root 0:root _ _ _ 15 | 2 if if SCONJ IN _ 4 mark 4:mark _ _ _ 16 | 3 Google Google PROPN NNP Number=Sing 4 nsubj 4:nsubj _ _ ARG0 17 | 4 expanded expand VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 1 advcl 1:advcl:if _ expand.01 V 18 | 5 on on ADP IN _ 15 case 15:case _ _ _ 19 | 6 its its PRON PRP$ Gender=Neut|Number=Sing|Person=3|Poss=Yes|PronType=Prs 15 nmod:poss 15:nmod:poss _ _ _ 20 | 7 search search NOUN NN Number=Sing 9 compound 9:compound SpaceAfter=No _ _ 21 | 8 - - PUNCT HYPH _ 9 punct 9:punct SpaceAfter=No _ _ 22 | 9 engine engine NOUN NN Number=Sing 15 compound 15:compound _ _ _ 23 | 10 ( ( PUNCT -LRB- _ 9 punct 9:punct SpaceAfter=No _ _ 24 | 11 and and CCONJ CC _ 13 cc 13:cc _ _ _ 25 | 12 now now ADV RB _ 13 advmod 13:advmod _ _ _ 26 | 13 e-mail e-mail NOUN NN Number=Sing 9 conj 9:conj:and|15:compound SpaceAfter=No _ _ 27 | 14 ) ) PUNCT -RRB- _ 15 punct 15:punct _ _ _ 28 | 15 wares wares NOUN NNS Number=Plur 4 obl 4:obl:on _ _ ARG1 29 | 16 into into ADP IN _ 22 case 22:case _ _ _ 30 | 17 a a DET DT Definite=Ind|PronType=Art 22 det 22:det _ _ _ 31 | 18 full full ADV RB _ 20 advmod 20:advmod SpaceAfter=No _ _ 32 | 19 - - PUNCT HYPH _ 20 punct 20:punct SpaceAfter=No _ _ 33 | 20 fledged fledged ADJ JJ Degree=Pos 22 amod 22:amod _ _ _ 34 | 21 operating operating NOUN NN Number=Sing 22 compound 22:compound _ _ _ 35 | 22 system system NOUN NN Number=Sing 4 obl 4:obl:into SpaceAfter=No _ ARG4 36 | 23 ? ? PUNCT . _ 4 punct 4:punct _ _ _ -------------------------------------------------------------------------------- /tests/resources/visual/snippet.txt: -------------------------------------------------------------------------------- 1 | The U.S. Centers for Disease Control and Prevention initially advised school systems to close if outbreaks occurred , then reversed itself , saying the apparent mildness of the virus meant most schools and day care centers should stay open , even if they had confirmed cases of swine flu . 2 | When Ms. Winfrey invited Suzanne Somers to share her controversial views about bio-identical hormone treatment on her syndicated show in 2009 , it won Ms. Winfrey a rare dollop of unflattering press , including a Newsweek cover story titled " Crazy Talk : Oprah , Wacky Cures & You . " 3 | Elk calling -- a skill that hunters perfected long ago to lure game with the promise of a little romance -- is now its own sport . 4 | Don 't ! 5 | Fish , ranked 98th in the world , fired 22 aces en route to a 6-3 , 6-7 ( 5 / 7 ) , 7-6 ( 7 / 4 ) win over seventh-seeded Argentinian David Nalbandian . 6 | Why does everything have to become such a big issue ? 7 | AMMAN ( Reuters ) - King Abdullah of Jordan will meet U.S. President Barack Obama in Washington on April 21 to lobby on behalf of Arab states for a stronger U.S. role in Middle East peacemaking , palace officials said on Sunday . 8 | To help keep traffic flowing the Congestion Charge will remain in operation through-out the strike and TfL will be suspending road works on major London roads wherever possible . 9 | If no candidate wins an absolute majority , there will be a runoff between the top two contenders , most likely in mid-October . 10 | Authorities previously served search warrants at Murray 's Las Vegas home and his businesses in Las Vegas and Houston . -------------------------------------------------------------------------------- /tests/test_lemmatizer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import flair 4 | from flair.data import Sentence 5 | from flair.models import Lemmatizer 6 | 7 | 8 | def test_words_to_char_indices(): 9 | sentence = Sentence("Hello look what a beautiful day!") 10 | 11 | lemmatizer = Lemmatizer() # lemmatizer uses standard char dictionary 12 | 13 | d = lemmatizer.dummy_index 14 | e = lemmatizer.end_index 15 | s = lemmatizer.start_index 16 | 17 | string_list = sentence.to_tokenized_string().split() 18 | 19 | # With end symbol, without start symbol, padding in front 20 | target = torch.tensor( 21 | [ 22 | [d, d, d, d, 55, 5, 15, 15, 12, e], 23 | [d, d, d, d, d, 15, 12, 12, 28, e], 24 | [d, d, d, d, d, 23, 13, 9, 8, e], 25 | [d, d, d, d, d, d, d, d, 9, e], 26 | [24, 5, 9, 16, 8, 7, 22, 16, 15, e], 27 | [d, d, d, d, d, d, 14, 9, 27, e], 28 | [d, d, d, d, d, d, d, d, 76, e], 29 | ], 30 | dtype=torch.long, 31 | ).to(flair.device) 32 | out = lemmatizer.words_to_char_indices(string_list, end_symbol=True, start_symbol=False, padding_in_front=True) 33 | assert torch.equal(target, out) 34 | 35 | # Without end symbol, with start symbol, padding in back 36 | target = torch.tensor( 37 | [ 38 | [s, 55, 5, 15, 15, 12, d, d, d, d], 39 | [s, 15, 12, 12, 28, d, d, d, d, d], 40 | [s, 23, 13, 9, 8, d, d, d, d, d], 41 | [s, 9, d, d, d, d, d, d, d, d], 42 | [s, 24, 5, 9, 16, 8, 7, 22, 16, 15], 43 | [s, 14, 9, 27, d, d, d, d, d, d], 44 | [s, 76, d, d, d, d, d, d, d, d], 45 | ], 46 | dtype=torch.long, 47 | ).to(flair.device) 48 | out = lemmatizer.words_to_char_indices(string_list, end_symbol=False, start_symbol=True, padding_in_front=False) 49 | assert torch.equal(target, out) 50 | 51 | # Without end symbol, without start symbol, padding in front 52 | assert lemmatizer.words_to_char_indices( 53 | string_list, end_symbol=False, start_symbol=False, padding_in_front=True 54 | ).size() == (7, 9) 55 | -------------------------------------------------------------------------------- /tests/test_multitask.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import flair 4 | from flair.data import Sentence 5 | from flair.datasets import SENTEVAL_CR, SENTEVAL_SST_GRANULAR 6 | from flair.embeddings import TransformerDocumentEmbeddings 7 | from flair.models import MultitaskModel, TextClassifier 8 | from flair.nn.multitask import make_multitask_model_and_corpus 9 | from flair.trainers import ModelTrainer 10 | 11 | 12 | @pytest.mark.integration() 13 | def test_train_load_use_classifier(results_base_path, tasks_base_path): 14 | # --- Embeddings that are shared by both models --- # 15 | shared_embedding = TransformerDocumentEmbeddings("distilbert-base-uncased", fine_tune=True) 16 | 17 | # --- Task 1: Sentiment Analysis (5-class) --- # 18 | flair.set_seed(123) 19 | 20 | # Define corpus and model 21 | corpus_1 = SENTEVAL_SST_GRANULAR().downsample(0.01) 22 | 23 | model_1 = TextClassifier( 24 | shared_embedding, label_dictionary=corpus_1.make_label_dictionary("class", add_unk=False), label_type="class" 25 | ) 26 | 27 | # -- Task 2: Binary Sentiment Analysis on Customer Reviews -- # 28 | flair.set_seed(123) 29 | 30 | # Define corpus and model 31 | corpus_2 = SENTEVAL_CR().downsample(0.01) 32 | 33 | model_2 = TextClassifier( 34 | shared_embedding, 35 | label_dictionary=corpus_2.make_label_dictionary("sentiment", add_unk=False), 36 | label_type="sentiment", 37 | inverse_model=True, 38 | ) 39 | 40 | # -- Define mapping (which tagger should train on which model) -- # 41 | multitask_model, multicorpus = make_multitask_model_and_corpus( 42 | [ 43 | (model_1, corpus_1), 44 | (model_2, corpus_2), 45 | ] 46 | ) 47 | 48 | # -- Create model trainer and train -- # 49 | trainer = ModelTrainer(multitask_model, multicorpus) 50 | 51 | trainer.fine_tune(results_base_path, max_epochs=1) 52 | 53 | del trainer, multitask_model, corpus_1, corpus_2 54 | loaded_model = MultitaskModel.load(results_base_path / "final-model.pt") 55 | 56 | sentence = Sentence("I love Berlin") 57 | sentence_empty = Sentence(" ") 58 | 59 | loaded_model.predict(sentence) 60 | loaded_model.predict([sentence, sentence_empty]) 61 | loaded_model.predict([sentence_empty]) 62 | 63 | for label in sentence.labels: 64 | assert label.value is not None 65 | assert 0.0 <= label.score <= 1.0 66 | assert isinstance(label.score, float) 67 | del loaded_model 68 | -------------------------------------------------------------------------------- /tests/test_tars.py: -------------------------------------------------------------------------------- 1 | from flair.data import Sentence 2 | from flair.datasets import ClassificationCorpus 3 | from flair.models import TARSClassifier 4 | from flair.trainers import ModelTrainer 5 | 6 | 7 | def test_init_tars_and_switch(tasks_base_path): 8 | # test corpus 9 | corpus = ClassificationCorpus(tasks_base_path / "imdb") 10 | 11 | # create a TARS classifier 12 | tars = TARSClassifier( 13 | task_name="2_CLASS", 14 | label_dictionary=corpus.make_label_dictionary(label_type="class"), 15 | label_type="class", 16 | ) 17 | 18 | # check if right number of classes 19 | assert len(tars.get_current_label_dictionary()) == 2 20 | 21 | # switch to task with only one label 22 | tars.add_and_switch_to_new_task("1_CLASS", "one class", "testlabel") 23 | 24 | # check if right number of classes 25 | assert len(tars.get_current_label_dictionary()) == 1 26 | 27 | # switch to task with three labels provided as list 28 | tars.add_and_switch_to_new_task("3_CLASS", ["list 1", "list 2", "list 3"], "testlabel") 29 | 30 | # check if right number of classes 31 | assert len(tars.get_current_label_dictionary()) == 3 32 | 33 | # switch to task with four labels provided as set 34 | tars.add_and_switch_to_new_task("4_CLASS", {"set 1", "set 2", "set 3", "set 4"}, "testlabel") 35 | 36 | # check if right number of classes 37 | assert len(tars.get_current_label_dictionary()) == 4 38 | 39 | # switch to task with two labels provided as Dictionary 40 | tars.add_and_switch_to_new_task("2_CLASS_AGAIN", corpus.make_label_dictionary(label_type="class"), "testlabel") 41 | 42 | # check if right number of classes 43 | assert len(tars.get_current_label_dictionary()) == 2 44 | 45 | 46 | def test_train_tars(tasks_base_path, results_base_path): 47 | # test corpus 48 | corpus = ClassificationCorpus(tasks_base_path / "imdb_underscore") 49 | 50 | # create a TARS classifier 51 | tars = TARSClassifier(embeddings="sshleifer/tiny-distilroberta-base") 52 | 53 | # switch to a new task (TARS can do multiple tasks so you must define one) 54 | tars.add_and_switch_to_new_task( 55 | task_name="question 2_CLASS", 56 | label_dictionary=corpus.make_label_dictionary(label_type="class"), 57 | label_type="class", 58 | ) 59 | 60 | # initialize the text classifier trainer 61 | trainer = ModelTrainer(tars, corpus) 62 | 63 | # start the training 64 | trainer.train( 65 | base_path=results_base_path, 66 | learning_rate=0.02, 67 | mini_batch_size=1, 68 | max_epochs=1, 69 | ) 70 | 71 | sentence = Sentence("This is great!") 72 | tars.predict(sentence) 73 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from flair.data import Dictionary 2 | from flair.training_utils import convert_labels_to_one_hot 3 | 4 | 5 | def test_convert_labels_to_one_hot(): 6 | label_dict = Dictionary(add_unk=False) 7 | label_dict.add_item("class-1") 8 | label_dict.add_item("class-2") 9 | label_dict.add_item("class-3") 10 | 11 | one_hot = convert_labels_to_one_hot([["class-2"]], label_dict) 12 | 13 | assert one_hot[0][0] == 0 14 | assert one_hot[0][1] == 1 15 | assert one_hot[0][2] == 0 16 | -------------------------------------------------------------------------------- /tests/test_visual.py: -------------------------------------------------------------------------------- 1 | from flair.data import Sentence, Span, Token 2 | from flair.embeddings import FlairEmbeddings 3 | from flair.visual import Highlighter 4 | from flair.visual.ner_html import HTML_PAGE, PARAGRAPH, TAGGED_ENTITY, render_ner_html 5 | from flair.visual.training_curves import Plotter 6 | 7 | 8 | def test_highlighter(resources_path): 9 | with (resources_path / "visual/snippet.txt").open() as f: 10 | sentences = [x for x in f.read().split("\n") if x] 11 | 12 | embeddings = FlairEmbeddings("news-forward") 13 | 14 | features = embeddings.lm.get_representation(sentences[0], "", "").squeeze() 15 | 16 | Highlighter().highlight_selection( 17 | features, 18 | sentences[0], 19 | n=1000, 20 | file_=str(resources_path / "visual/highligh.html"), 21 | ) 22 | 23 | # clean up directory 24 | (resources_path / "visual/highligh.html").unlink() 25 | 26 | 27 | def test_plotting_training_curves_and_weights(resources_path): 28 | plotter = Plotter() 29 | plotter.plot_training_curves(resources_path / "visual/loss.tsv") 30 | plotter.plot_weights(resources_path / "visual/weights.txt") 31 | 32 | # clean up directory 33 | (resources_path / "visual/weights.png").unlink() 34 | (resources_path / "visual/training.png").unlink() 35 | 36 | 37 | def mock_ner_span(text, tag, start, end): 38 | span = Span([]).set_label("class", tag) 39 | span.start_pos = start 40 | span.end_pos = end 41 | span.tokens = [Token(text[start:end])] 42 | return span 43 | 44 | 45 | def test_html_rendering(): 46 | text = ( 47 | "Boris Johnson has been elected new Conservative leader in " 48 | "a ballot of party members and will become the " 49 | "next UK prime minister. &" 50 | ) 51 | sentence = Sentence(text) 52 | 53 | print(sentence[0:2].add_label("ner", "PER")) 54 | print(sentence[6:7].add_label("ner", "MISC")) 55 | print(sentence[19:20].add_label("ner", "LOC")) 56 | colors = { 57 | "PER": "#F7FF53", 58 | "ORG": "#E8902E", 59 | "LOC": "yellow", 60 | "MISC": "#4647EB", 61 | "O": "#ddd", 62 | } 63 | actual = render_ner_html([sentence], colors=colors) 64 | 65 | expected_res = HTML_PAGE.format( 66 | text=PARAGRAPH.format( 67 | sentence=TAGGED_ENTITY.format(color="#F7FF53", entity="Boris Johnson", label="PER") 68 | + " has been elected new " 69 | + TAGGED_ENTITY.format(color="#4647EB", entity="Conservative", label="MISC") 70 | + " leader in a ballot of party members and will become the next " 71 | + TAGGED_ENTITY.format(color="yellow", entity="UK", label="LOC") 72 | + " prime minister. &" 73 | ), 74 | title="Flair", 75 | ) 76 | 77 | assert expected_res == actual 78 | --------------------------------------------------------------------------------