├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yaml
    │   ├── config.yaml
    │   ├── feature-enhancement-request.yaml
    │   └── question.yaml
    ├── stale.yml
    └── workflows
    │   ├── ci.yml
    │   ├── issues.yml
    │   ├── publish-docs.yml
    │   └── test-docs.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── SECURITY.md
├── assets
    ├── README.md
    └── redirect.html
├── collect_env.py
├── docs
    ├── _static
    │   ├── api.svg
    │   ├── contributing.svg
    │   ├── css
    │   │   ├── api.css
    │   │   ├── footer.css
    │   │   ├── header.css
    │   │   ├── legal-notice.css
    │   │   ├── main.css
    │   │   ├── search.css
    │   │   ├── sidebar.css
    │   │   ├── tutorial.css
    │   │   └── version-switcher.css
    │   ├── favicon.ico
    │   ├── flair_logo_orange.svg
    │   ├── flair_logo_white.svg
    │   ├── glossary.svg
    │   ├── magnifying_glass.svg
    │   ├── magnifying_glass_dark.svg
    │   ├── octocat.svg
    │   └── tutorial.svg
    ├── _templates
    │   ├── darkmode-toggle.html
    │   ├── footer-links
    │   │   ├── legal-notice.html
    │   │   ├── linkedin.html
    │   │   └── x.html
    │   ├── landing-page-banner.html
    │   ├── landing-page-illustrations.html
    │   ├── landing_page_styles.html
    │   ├── legal-notice-content.html
    │   ├── page.html
    │   ├── version-switcher.html
    │   └── versioning.html
    ├── api
    │   ├── datasets
    │   │   ├── base.rst
    │   │   ├── biomedical.rst
    │   │   ├── document_classification.rst
    │   │   ├── entity_linking.rst
    │   │   ├── ocr.rst
    │   │   ├── relation_extraction.rst
    │   │   ├── sequence_labeling.rst
    │   │   ├── text_image.rst
    │   │   ├── text_text.rst
    │   │   └── treebanks.rst
    │   ├── embeddings
    │   │   ├── base.rst
    │   │   ├── document.rst
    │   │   ├── image.rst
    │   │   ├── legacy.rst
    │   │   ├── token.rst
    │   │   └── transformer.rst
    │   ├── flair.data.rst
    │   ├── flair.datasets.rst
    │   ├── flair.embeddings.rst
    │   ├── flair.models.rst
    │   ├── flair.nn.rst
    │   ├── flair.rst
    │   ├── flair.splitter.rst
    │   ├── flair.tokenization.rst
    │   ├── flair.trainers.plugins.rst
    │   ├── flair.trainers.rst
    │   └── index.rst
    ├── conf.py
    ├── contributing
    │   ├── index.rst
    │   ├── local_development.md
    │   ├── making_a_pull_request.md
    │   ├── updating_documentation.md
    │   └── writing_a_good_issue.md
    ├── index.rst
    ├── legal-notice
    │   └── index.rst
    ├── requirements.txt
    └── tutorial
    │   ├── index.rst
    │   ├── intro.md
    │   ├── tutorial-basics
    │       ├── basic-types.md
    │       ├── entity-linking.md
    │       ├── entity-mention-linking.md
    │       ├── how-predictions-work.md
    │       ├── how-to-tag-corpus.md
    │       ├── index.rst
    │       ├── other-models.md
    │       ├── part-of-speech-tagging.md
    │       ├── tagging-entities.md
    │       └── tagging-sentiment.md
    │   ├── tutorial-embeddings
    │       ├── classic-word-embeddings.md
    │       ├── embeddings.md
    │       ├── flair-embeddings.md
    │       ├── index.rst
    │       ├── other-embeddings.md
    │       └── transformer-embeddings.md
    │   ├── tutorial-hunflair2
    │       ├── customize-linking.md
    │       ├── index.rst
    │       ├── linking.md
    │       ├── overview.md
    │       ├── tagging.md
    │       └── training-ner-models.md
    │   └── tutorial-training
    │       ├── how-model-training-works.md
    │       ├── how-to-load-custom-dataset.md
    │       ├── how-to-load-prepared-dataset.md
    │       ├── how-to-train-multitask-model.md
    │       ├── how-to-train-sequence-tagger.md
    │       ├── how-to-train-span-classifier.md
    │       ├── how-to-train-text-classifier.md
    │       ├── index.rst
    │       └── train-vs-fine-tune.md
├── examples
    ├── README.md
    ├── multi_gpu
    │   ├── README.md
    │   ├── __init__.py
    │   └── run_multi_gpu.py
    └── ner
    │   ├── README.md
    │   ├── __init__.py
    │   └── run_ner.py
├── flair
    ├── __init__.py
    ├── class_utils.py
    ├── data.py
    ├── datasets
    │   ├── __init__.py
    │   ├── base.py
    │   ├── biomedical.py
    │   ├── document_classification.py
    │   ├── entity_linking.py
    │   ├── ocr.py
    │   ├── relation_extraction.py
    │   ├── sequence_labeling.py
    │   ├── text_image.py
    │   ├── text_text.py
    │   └── treebanks.py
    ├── distributed_utils.py
    ├── embeddings
    │   ├── __init__.py
    │   ├── base.py
    │   ├── document.py
    │   ├── image.py
    │   ├── legacy.py
    │   ├── token.py
    │   └── transformer.py
    ├── file_utils.py
    ├── inference_utils.py
    ├── models
    │   ├── __init__.py
    │   ├── entity_linker_model.py
    │   ├── entity_mention_linking.py
    │   ├── language_model.py
    │   ├── lemmatizer_model.py
    │   ├── multitask_model.py
    │   ├── pairwise_classification_model.py
    │   ├── pairwise_regression_model.py
    │   ├── prefixed_tagger.py
    │   ├── regexp_tagger.py
    │   ├── relation_classifier_model.py
    │   ├── relation_extractor_model.py
    │   ├── sequence_tagger_model.py
    │   ├── sequence_tagger_utils
    │   │   ├── __init__.py
    │   │   ├── crf.py
    │   │   └── viterbi.py
    │   ├── tars_model.py
    │   ├── text_classification_model.py
    │   ├── text_regression_model.py
    │   ├── triple_classification_model.py
    │   └── word_tagger_model.py
    ├── nn
    │   ├── __init__.py
    │   ├── decoder.py
    │   ├── distance
    │   │   ├── __init__.py
    │   │   ├── cosine.py
    │   │   ├── euclidean.py
    │   │   └── hyperbolic.py
    │   ├── dropout.py
    │   ├── model.py
    │   ├── multitask.py
    │   └── recurrent.py
    ├── optim.py
    ├── py.typed
    ├── samplers.py
    ├── splitter.py
    ├── tokenization.py
    ├── trainers
    │   ├── __init__.py
    │   ├── language_model_trainer.py
    │   ├── plugins
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── functional
    │   │   │   ├── __init__.py
    │   │   │   ├── anneal_on_plateau.py
    │   │   │   ├── checkpoints.py
    │   │   │   ├── deepncm_trainer_plugin.py
    │   │   │   ├── linear_scheduler.py
    │   │   │   ├── reduce_transformer_vocab.py
    │   │   │   └── weight_extractor.py
    │   │   ├── loggers
    │   │   │   ├── __init__.py
    │   │   │   ├── clearml_logger.py
    │   │   │   ├── log_file.py
    │   │   │   ├── loss_file.py
    │   │   │   ├── metric_history.py
    │   │   │   ├── tensorboard.py
    │   │   │   └── wandb.py
    │   │   └── metric_records.py
    │   └── trainer.py
    ├── training_utils.py
    └── visual
    │   ├── __init__.py
    │   ├── activations.py
    │   ├── manifold.py
    │   ├── ner_html.py
    │   ├── training_curves.py
    │   └── tree_printer.py
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── resources
    └── docs
    │   ├── EXPERIMENTS.md
    │   ├── HUNFLAIR.md
    │   ├── HUNFLAIR2.md
    │   ├── HUNFLAIR2_TUTORIAL_1_TAGGING.md
    │   ├── HUNFLAIR2_TUTORIAL_2_LINKING.md
    │   ├── HUNFLAIR2_TUTORIAL_3_TRAINING_NER.md
    │   ├── HUNFLAIR2_TUTORIAL_4_CUSTOMIZE_LINKING.md
    │   ├── HUNFLAIR_CORPORA.md
    │   ├── HUNFLAIR_EXPERIMENTS.md
    │   ├── HUNFLAIR_TUTORIAL_1_TAGGING.md
    │   ├── HUNFLAIR_TUTORIAL_2_TRAINING.md
    │   ├── HUNFLAIR_TUTORIAL_3_ENTITY_LINKING.md
    │   ├── KOR_docs
    │       ├── README.md
    │       ├── TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md
    │       ├── TUTORIAL_1_BASICS.md
    │       ├── TUTORIAL_2_TAGGING.md
    │       ├── TUTORIAL_3_WORD_EMBEDDING.md
    │       ├── TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
    │       ├── TUTORIAL_5_DOCUMENT_EMBEDDINGS.md
    │       ├── TUTORIAL_6_CORPUS.md
    │       ├── TUTORIAL_7_TRAINING_A_MODEL.md
    │       ├── TUTORIAL_8_MODEL_OPTIMIZATION.md
    │       └── TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md
    │   ├── TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md
    │   ├── TUTORIAL_8_MODEL_OPTIMIZATION.md
    │   ├── TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md
    │   ├── TUTORIAL_BASICS_TOKENIZATION.md
    │   ├── TUTORIAL_CORPUS_CUSTOM.md
    │   ├── TUTORIAL_CORPUS_PREPARED.md
    │   ├── TUTORIAL_EMBEDDINGS_OVERVIEW.md
    │   ├── TUTORIAL_FLAIR_BASICS.md
    │   ├── TUTORIAL_PRODUCTION_FASTER_TRANSFORMERS.md
    │   ├── TUTORIAL_TAGGING_CIRCUS.md
    │   ├── TUTORIAL_TAGGING_LINKING.md
    │   ├── TUTORIAL_TAGGING_NER.md
    │   ├── TUTORIAL_TAGGING_OVERVIEW.md
    │   ├── TUTORIAL_TAGGING_POS.md
    │   ├── TUTORIAL_TAGGING_RELATIONS.md
    │   ├── TUTORIAL_TAGGING_SENTIMENT.md
    │   ├── TUTORIAL_TRAINING_MODELS.md
    │   ├── TUTORIAL_TRAINING_MORE.md
    │   ├── TUTORIAL_TRAINING_OVERVIEW.md
    │   ├── TUTORIAL_TRAINING_SEQUENCE_LABELER.md
    │   ├── TUTORIAL_TRAINING_TEXT_CLASSIFIER.md
    │   ├── embeddings
    │       ├── BYTE_PAIR_EMBEDDINGS.md
    │       ├── CHARACTER_EMBEDDINGS.md
    │       ├── CLASSIC_WORD_EMBEDDINGS.md
    │       ├── DOCUMENT_POOL_EMBEDDINGS.md
    │       ├── DOCUMENT_RNN_EMBEDDINGS.md
    │       ├── ELMO_EMBEDDINGS.md
    │       ├── FASTTEXT_EMBEDDINGS.md
    │       ├── FLAIR_EMBEDDINGS.md
    │       ├── ONE_HOT_EMBEDDINGS.md
    │       └── TRANSFORMER_EMBEDDINGS.md
    │   ├── flair_logo.svg
    │   ├── flair_logo_2020.png
    │   ├── flair_logo_2020.svg
    │   ├── flair_logo_2020_FINAL_day_dpi72.png
    │   ├── flair_logo_2020_FINAL_night_dpi72.png
    │   └── flair_logo_2020_FINAL_night_light_dpi72.png
├── setup.py
└── tests
    ├── __init__.py
    ├── conftest.py
    ├── embedding_test_utils.py
    ├── embeddings
        ├── __init__.py
        ├── test_byte_pair_embeddings.py
        ├── test_document_transform_word_embeddings.py
        ├── test_flair_embeddings.py
        ├── test_simple_token_embeddings.py
        ├── test_stacked_embeddings.py
        ├── test_tfidf_embeddings.py
        ├── test_transformer_document_embeddings.py
        ├── test_transformer_word_embeddings.py
        └── test_word_embeddings.py
    ├── model_test_utils.py
    ├── models
        ├── __init__.py
        ├── test_deepncm_classifier.py
        ├── test_entity_linker.py
        ├── test_model_license.py
        ├── test_regexp_tagger.py
        ├── test_relation_classifier.py
        ├── test_relation_extractor.py
        ├── test_sequence_tagger.py
        ├── test_tars_classifier.py
        ├── test_tars_ner.py
        ├── test_text_classifier.py
        ├── test_text_regressor.py
        └── test_word_tagger.py
    ├── resources
        ├── corpora
        │   └── lorem_ipsum
        │   │   ├── test.txt
        │   │   ├── train
        │   │       ├── split_1.txt
        │   │       └── split_2.txt
        │   │   └── valid.txt
        ├── tasks
        │   ├── ag_news
        │   │   ├── README.md
        │   │   ├── dev.txt
        │   │   ├── test.txt
        │   │   └── train.txt
        │   ├── column_corpus_options
        │   │   ├── eng.testa
        │   │   ├── eng.testb
        │   │   └── eng.train
        │   ├── column_with_whitespaces
        │   │   ├── eng.testa
        │   │   ├── eng.testb
        │   │   └── eng.train
        │   ├── conllu
        │   │   ├── train.conllu
        │   │   ├── train.conllup
        │   │   └── universal_dependencies.conllu
        │   ├── example_images
        │   │   └── i_love_berlin.png
        │   ├── fashion
        │   │   ├── eng.testa
        │   │   ├── eng.testb
        │   │   └── eng.train
        │   ├── fashion_disjunct
        │   │   ├── eng.testa
        │   │   ├── eng.testb
        │   │   └── eng.train
        │   ├── fashion_nodev
        │   │   ├── test.tsv
        │   │   └── train.tsv
        │   ├── fewshot_conll
        │   │   └── 1shot.txt
        │   ├── imdb
        │   │   ├── README.md
        │   │   ├── dev.txt
        │   │   ├── test.txt
        │   │   └── train.txt
        │   ├── imdb_underscore
        │   │   ├── README.md
        │   │   ├── dev.txt
        │   │   ├── test.txt
        │   │   └── train.txt
        │   ├── jsonl
        │   │   ├── testa.jsonl
        │   │   ├── testb.jsonl
        │   │   └── train.jsonl
        │   ├── multi_class
        │   │   ├── dev.txt
        │   │   ├── test.txt
        │   │   └── train.txt
        │   ├── multi_class_negative_examples
        │   │   ├── dev.txt
        │   │   ├── test.txt
        │   │   └── train.txt
        │   ├── ner_german_germeval
        │   │   ├── NER-de-dev.tsv
        │   │   ├── NER-de-test.tsv
        │   │   └── NER-de-train.tsv
        │   ├── ontonotes
        │   │   └── tiny-conll-2012.zip
        │   ├── regression
        │   │   ├── README.md
        │   │   ├── dev.txt
        │   │   ├── test.txt
        │   │   └── train.txt
        │   ├── span_labels
        │   │   ├── span_first.txt
        │   │   ├── span_second.txt
        │   │   └── span_third.txt
        │   ├── trivial
        │   │   ├── trivial_bioes
        │   │   │   ├── dev.txt
        │   │   │   ├── test.txt
        │   │   │   └── train.txt
        │   │   ├── trivial_bioes_with_boundaries
        │   │   │   ├── dev.txt
        │   │   │   ├── test.txt
        │   │   │   └── train.txt
        │   │   ├── trivial_text_classification_multi
        │   │   │   ├── dev.txt
        │   │   │   ├── test.txt
        │   │   │   └── train.txt
        │   │   └── trivial_text_classification_single
        │   │   │   ├── dev.txt
        │   │   │   ├── test.txt
        │   │   │   └── train.txt
        │   ├── ud_english
        │   │   ├── en_ewt-ud-dev.conllu
        │   │   ├── en_ewt-ud-test.conllu
        │   │   └── en_ewt-ud-train.conllu
        │   └── up_english
        │   │   ├── en_ewt-up-dev.conllu
        │   │   ├── en_ewt-up-test.conllu
        │   │   └── en_ewt-up-train.conllu
        ├── text_sequences
        │   └── resume1.txt
        └── visual
        │   ├── loss.tsv
        │   ├── snippet.txt
        │   └── weights.txt
    ├── test_augmentation.py
    ├── test_biomedical_entity_linking.py
    ├── test_corpus_dictionary.py
    ├── test_datasets.py
    ├── test_datasets_biomedical.py
    ├── test_labels.py
    ├── test_language_model.py
    ├── test_lemmatizer.py
    ├── test_multitask.py
    ├── test_sentence.py
    ├── test_sentence_labeling.py
    ├── test_tars.py
    ├── test_tokenize_sentence.py
    ├── test_trainer.py
    ├── test_utils.py
    └── test_visual.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [alanakbik]
4 | # TODO: Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yaml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: Create a report to help us improve
 3 | title: "[Bug]: "
 4 | labels: ["bug"]
 5 | assignees: []
 6 | body:
 7 |   - type: markdown
 8 |     attributes:
 9 |       value: |
10 |         Thanks for taking the time to fill out this bug report!
11 |   - type: textarea
12 |     id: description
13 |     attributes:
14 |       label: Describe the bug
15 |       description: Please describe the bug as concrete as possible
16 |       placeholder:
17 |     validations:
18 |       required: true
19 |   - type: textarea
20 |     id: reproduction
21 |     attributes:
22 |       label: To Reproduce
23 |       description: Steps to reproduce the behavior (e.g. which model did you train? what parameters did you use? etc.).
24 |       placeholder: |
25 |         import flair
26 |         from flair.datasets import WNUT_17
27 |         corpus = WNUT_17()
28 |         ....
29 |       render: python
30 |     validations:
31 |       required: true
32 |   - type: textarea
33 |     id: expectation
34 |     attributes:
35 |       label: Expected behavior
36 |       description: A clear and concise description of what you expected to happen.
37 |     validations:
38 |       required: true
39 |   - type: textarea
40 |     id: error
41 |     attributes:
42 |       label: Logs and Stack traces
43 |       description: If applicable, please past any relevant logs or error messages, please don't cut off the stack trace.
44 |       placeholder: |
45 |         Traceback (most recent call last):
46 |         File "flair\flair\embeddings\token.py", line 84, in __init__
47 |         self.__embedding_type: str = embeddings[0].embedding_type
48 |         IndexError: list index out of range
49 |       render: stacktrace
50 |     validations:
51 |       required: false
52 |   - type: textarea
53 |     id: screenshots
54 |     attributes:
55 |       label: Screenshots
56 |       description: If applicable, add screenshots to help explain your problem.
57 |       placeholder: if applicable, copy paste an image in here
58 |     validations:
59 |       required: false
60 |   - type: textarea
61 |     id: context
62 |     attributes:
63 |       label: Additional Context
64 |       description: Add anything worthy to add that wasn't mentioned yet.
65 |     validations:
66 |       required: false
67 |   - type: textarea
68 |     id: environment
69 |     attributes:
70 |       label: Environment
71 |       description: Please run the [collect_env.py](../blob/master/collect_env.py) and paste it's output in here
72 |       placeholder: python collect_env.py
73 |     validations:
74 |       required: true


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yaml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-enhancement-request.yaml:
--------------------------------------------------------------------------------
 1 | name: Feature/Enhancement request
 2 | description: Suggest an idea for this project
 3 | title: "[Feature]: "
 4 | labels: ["feature"]
 5 | assignees: []
 6 | body:
 7 |   - type: markdown
 8 |     attributes:
 9 |       value: |
10 |         Thanks for taking the time to to provide an idea to improve flair!
11 |   - type: textarea
12 |     id: problem
13 |     attributes:
14 |       label: Problem statement
15 |       description: Is your feature/enhancement request related to a problem? Please describe.
16 |       placeholder: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
17 |     validations:
18 |       required: false
19 |   - type: textarea
20 |     id: solution
21 |     attributes:
22 |       label: Solution
23 |       description: Describe the solution you'd like
24 |       placeholder: How should your problem ideally be solved?
25 |     validations:
26 |       required: true
27 |   - type: textarea
28 |     id: context
29 |     attributes:
30 |       label: Additional Context
31 |       description: Add anything worthy to add that wasn't mentioned yet.
32 |     validations:
33 |       required: false


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.yaml:
--------------------------------------------------------------------------------
 1 | name: Question
 2 | description: Any question related to the usage of flair
 3 | title: "[Question]: "
 4 | labels: ["question"]
 5 | assignees: []
 6 | body:
 7 |   - type: textarea
 8 |     id: question
 9 |     attributes:
10 |       label: Question
11 |       description: Ask you question here
12 |       placeholder: A clear and concise description of what you want to know.
13 |     validations:
14 |       required: true


--------------------------------------------------------------------------------
/.github/stale.yml:
--------------------------------------------------------------------------------
 1 | # Number of days of inactivity before an issue becomes stale
 2 | daysUntilStale: 120
 3 | # Number of days of inactivity before a stale issue is closed
 4 | daysUntilClose: 7
 5 | # Issues with these labels will never be considered stale
 6 | exemptLabels:
 7 |   - pinned
 8 |   - security
 9 | # Label to use when marking an issue as stale
10 | staleLabel: wontfix
11 | # Comment to post when marking an issue as stale. Set to `false` to disable
12 | markComment: >
13 |   This issue has been automatically marked as stale because it has not had
14 |   recent activity. It will be closed if no further activity occurs. Thank you
15 |   for your contributions.
16 | # Comment to post when closing a stale issue. Set to `false` to disable
17 | closeComment: false
18 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: [master]
 4 |   pull_request:
 5 |     branches: [master]
 6 | 
 7 | jobs:
 8 |   test:
 9 |     runs-on: ubuntu-latest
10 |     env:
11 |       HF_HOME: ./cache/transformers
12 |       FLAIR_CACHE_ROOT: ./cache/flair
13 |     steps:
14 |       - uses: actions/checkout@v3
15 |       - name: Set up Python 3.9
16 |         id: setup-python
17 |         uses: actions/setup-python@v4
18 |         with:
19 |           python-version: 3.9
20 |       - name: Install Torch cpu
21 |         run: pip install torch --index-url https://download.pytorch.org/whl/cpu
22 |       - name: Install Flair dependencies
23 |         run: pip install -e .[word-embeddings]
24 |       - name: Install unittest dependencies
25 |         run: pip install -r requirements-dev.txt
26 |       - name: Show installed dependencies
27 |         run: pip freeze
28 |       - name: Cache downloaded models/datasets
29 |         uses: actions/cache@v3
30 |         with:
31 |           path: ./cache
32 |           key: cache-v1.2
33 |       - name: Run tests
34 |         run: |
35 |           python -c 'import flair'
36 |           pytest --runintegration -vv
37 | 


--------------------------------------------------------------------------------
/.github/workflows/issues.yml:
--------------------------------------------------------------------------------
 1 | on: issue_comment
 2 | 
 3 | jobs:
 4 |   issue_commented:
 5 |     name: Issue comment
 6 |     if: ${{ github.event.issue.author == github.even.issue_comment.author }}
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions-ecosystem/action-remove-labels@v1
10 |         with:
11 |           labels: "Awaiting Response"


--------------------------------------------------------------------------------
/.github/workflows/publish-docs.yml:
--------------------------------------------------------------------------------
 1 | name: 'Build doc page'
 2 | on:
 3 |   push:
 4 |     branches: [ master ]
 5 |     tags:
 6 |       - "*"
 7 | 
 8 | jobs:
 9 |   publish_docs:
10 |     name: Build the docs using Sphinx and push to gh-pages
11 |     runs-on: ubuntu-latest
12 |     env:
13 |       python-version: 3.9
14 |     steps:
15 |       - name: Checkout code
16 |         uses: actions/checkout@v3
17 |       - name: setup python ${{ env.python-version }}
18 |         uses: actions/setup-python@v4
19 |         with:
20 |           python-version: ${{ env.python-version }}
21 |       - name: Install Torch cpu
22 |         run: pip install torch --index-url https://download.pytorch.org/whl/cpu
23 |       - name: Install Flair dependencies
24 |         run: pip install -e .
25 |       - name: Install unittest dependencies
26 |         run: pip install -r requirements-dev.txt
27 |       - name: Install doc dependencies
28 |         run: pip install -r docs/requirements.txt
29 |       - name: Fetch git tags
30 |         run: git fetch --tags origin
31 |       - name: Build docs
32 |         run: |
33 |           sphinx-multiversion docs doc_build/
34 |       - name: Add redirect to stable doc
35 |         run: |
36 |           cp assets/redirect.html doc_build/index.html
37 |           cp assets/redirect.html doc_build/404.html
38 |           cp assets/README.md doc_build/README.md
39 |           sed -i "s/\[VERSION\]/$(python -c 'import flair;print(flair.__version__)')/g" doc_build/index.html
40 |           sed -i "s/\[VERSION\]/$(python -c 'import flair;print(flair.__version__)')/g" doc_build/404.html
41 |       - name: Deploy
42 |         uses: peaceiris/actions-gh-pages@v3
43 |         with:
44 |           github_token: ${{ secrets.GITHUB_TOKEN }}
45 |           publish_dir: ./doc_build


--------------------------------------------------------------------------------
/.github/workflows/test-docs.yml:
--------------------------------------------------------------------------------
 1 | name: 'Build doc page for artifact'
 2 | on:
 3 |   push:
 4 |     branches: "*"
 5 | 
 6 | jobs:
 7 |   build_docs:
 8 |     name: Build the docs using Sphinx and publish as artifact
 9 |     runs-on: ubuntu-latest
10 |     env:
11 |       python-version: 3.9
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v3
15 |       - name: setup python ${{ env.python-version }}
16 |         uses: actions/setup-python@v4
17 |         with:
18 |           python-version: ${{ env.python-version }}
19 |       - name: Install Torch cpu
20 |         run: pip install torch --index-url https://download.pytorch.org/whl/cpu
21 |       - name: Install Flair dependencies
22 |         run: pip install -e .
23 |       - name: Install unittest dependencies
24 |         run: pip install -r requirements-dev.txt
25 |       - name: Install doc dependencies
26 |         run: pip install -r docs/requirements.txt
27 |       - name: Fetch git tags
28 |         run: git fetch --tags origin
29 |       - name: Change config to current branch
30 |         env:
31 |           BRANCH_NAME: ${{ github.ref_name }}
32 |         run: |
33 |           sed -i 's/^smv_branch_whitelist = r"^master$"/smv_branch_whitelist = r"^${{ env.BRANCH_NAME }}$"/' docs/conf.py
34 |           sed -i 's/^smv_tag_whitelist = r"^v\d+\.\d+\.\d+$"/smv_tag_whitelist = "^$"/' docs/conf.py
35 |           cat docs/conf.py
36 |       - name: Build docs
37 |         run: |
38 |           sphinx-multiversion docs doc_build/
39 |       - name: Add redirect to stable doc
40 |         env:
41 |           BRANCH_NAME: ${{ github.ref_name }}
42 |         run: |
43 |           cp assets/redirect.html doc_build/index.html
44 |           cp assets/redirect.html doc_build/404.html
45 |           cp assets/README.md doc_build/README.md
46 |           sed -i "s/\[VERSION\]/${{ env.BRANCH_NAME }}/g" doc_build/index.html
47 |           sed -i "s/\[VERSION\]/${{ env.BRANCH_NAME }}/g" doc_build/404.html
48 |       - name: Archive code coverage results
49 |         uses: actions/upload-artifact@v4
50 |         with:
51 |           name: website
52 |           path: doc_build


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | .idea/
 29 | .vscode/
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Environments
 88 | .env
 89 | .venv
 90 | env/
 91 | venv/
 92 | ENV/
 93 | env.bak/
 94 | venv.bak/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 
109 | resources/taggers/
110 | regression_train/
111 | /doc_build/
112 | 
113 | scripts/
114 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Flair
 2 | 
 3 | We are happy to accept your contributions to make `flair` better and more awesome! To avoid unnecessary work on either
 4 | side, please stick to the following process:
 5 | 
 6 | 1. Check if there is already [an issue](https://github.com/flairNLP/flair/issues) for your concern.
 7 | 2. If there is not, open a new one to start a discussion. We hate to close finished PRs!
 8 | 3. If we decide your concern needs code changes, we would be happy to accept a pull request. Please consider the
 9 |    commit guidelines below.
10 | 
11 | 
12 | ## Git Commit Guidelines
13 | 
14 | If there is already a ticket, use this number at the start of your commit message.
15 | Use meaningful commit messages that described what you did.
16 | 
17 | **Example:** `GH-42: Added new type of embeddings: DocumentEmbedding.`
18 | 
19 | ## Developing locally
20 | 
21 | For contributors looking to get deeper into the API we suggest cloning the repository and checking out the unit
22 | tests for examples of how to call methods. Nearly all classes and methods are documented, so finding your way around
23 | the code should hopefully be easy.
24 | 
25 | ### Setup
26 | 
27 | Flair requires python-3.9 or higher. To make sure your code also runs on the oldest supported
28 | python version, it is recommended to use python-3.9.x for flair development.
29 | 
30 | Create a python environment of your preference and run:
31 | ```bash
32 | pip install -r requirements-dev.txt
33 | pip install -e .
34 | ```
35 | 
36 | ### Tests
37 | 
38 | To only run typechecks and check the code formatting execute:
39 | 
40 | ```bash
41 | pytest flair
42 | ```
43 | 
44 | To run all basic tests execute:
45 | 
46 | ```bash
47 | pytest
48 | ```
49 | 
50 | To run integration tests execute:
51 | 
52 | ```bash
53 | pytest --runintegration
54 | ```
55 | 
56 | The integration tests will train small models and therefore take more time.
57 | In general, it is recommended to ensure all basic tests are running through before testing the integration tests
58 | 
59 | ### Code Formatting
60 | 
61 | To ensure a standardized code style we use the formatter [black](https://github.com/ambv/black) and for standardizing imports we use [ruff](https://github.com/charliermarsh/ruff).
62 | If your code is not formatted properly, the tests will fail.
63 | 
64 | We recommend configuring your IDE to run these formatters for you, but you can also always run them manually via
65 | `black . && ruff --fix .` in the flair root folder.
66 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Flair is licensed under the following MIT License (MIT) Copyright © 2018 Zalando SE, https://tech.zalando.com
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include flair/py.typed
2 | include requirements.txt
3 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | We acknowledge that every line of code that we write may potentially contain security issues.
2 | We are trying to deal with it responsibly and provide patches as quickly as possible.
3 | 
4 | Please report any issues to [Alan Akbik](http://alanakbik.github.io/).
5 | 


--------------------------------------------------------------------------------
/assets/README.md:
--------------------------------------------------------------------------------
1 | # Docs For Flair NLP
2 | 
3 | This branch is currently under construction.
4 | 
5 | It will contain the docs for Flair NLP.
6 | Don't change files, as this branch will be autogenerated using github actions.


--------------------------------------------------------------------------------
/assets/redirect.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE html>
2 | <html>
3 |   <head>
4 |     <title>Redirecting to https://flairnlp.github.io/flair/v[VERSION]/</title>
5 |     <meta charset="utf-8">
6 |     <meta http-equiv="refresh" content="0; URL=https://flairnlp.github.io/flair/v[VERSION]/">
7 |     <link rel="canonical" href="https://flairnlp.github.io/flair/v[VERSION]/">
8 |   </head>
9 | </html>


--------------------------------------------------------------------------------
/collect_env.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import transformers
 3 | 
 4 | import flair
 5 | 
 6 | 
 7 | def main():
 8 |     print("#### Versions:")
 9 |     print(f"##### Flair\n{flair.__version__}")
10 |     print(f"##### Pytorch\n{torch.__version__}")
11 |     print(f"##### Transformers\n{transformers.__version__}")
12 |     print(f"#### GPU\n{torch.cuda.is_available()}")
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     main()
17 | 


--------------------------------------------------------------------------------
/docs/_static/api.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?><!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
2 | <svg width="800px" height="800px" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><rect width="24" height="24" fill="none"/><path d="M20,6H4A2,2,0,0,0,2,8v8a2,2,0,0,0,2,2H20a2,2,0,0,0,2-2V8A2,2,0,0,0,20,6ZM9.29,14.8,9,13.73H7.16L6.87,14.8H5.17L7,9.07H9.09L11,14.8Zm6.34-3.14a1.7,1.7,0,0,1-.36.64,1.82,1.82,0,0,1-.67.44,2.75,2.75,0,0,1-1,.17h-.44V14.8H11.6V9.09h2a2.43,2.43,0,0,1,1.62.47,1.67,1.67,0,0,1,.55,1.35A2.36,2.36,0,0,1,15.63,11.66Zm2.58,3.14H16.66V9.09h1.55ZM8.45,11.53l.24.93H7.48l.24-.93c0-.13.08-.28.12-.47s.09-.38.13-.57a4.63,4.63,0,0,0,.1-.48c0,.13.07.29.11.5l.15.58Zm5.59-1a.57.57,0,0,1,.16.43.75.75,0,0,1-.11.42.59.59,0,0,1-.27.22.9.9,0,0,1-.37.07h-.31V10.34h.4A.63.63,0,0,1,14,10.51Z" fill-rule="evenodd"/></svg>


--------------------------------------------------------------------------------
/docs/_static/css/footer.css:
--------------------------------------------------------------------------------
 1 | .bd-footer {
 2 | 	border: none;
 3 | 	background: var(--blue-orange);
 4 | 	height: var(--footer-height);
 5 | 	display: flex;
 6 | 	align-items: center;
 7 | }
 8 | 
 9 | .bd-footer *::selection {
10 | 	background: var(--white-blue);
11 | 	color: var(--flair-orange);
12 | }
13 | 
14 | .bd-footer .bd-footer__inner {
15 | 	padding: 0 10%;
16 | 	width: 100%;
17 | 	max-width: initial;
18 | 	margin: 0;
19 | 	box-sizing: border-box;
20 | }
21 | 
22 | .bd-footer .bd-footer__inner .footer-item * {
23 | 	color: white;
24 | }
25 | 
26 | .bd-footer .bd-footer__inner a:hover {
27 | 	color: var(--orange-blue);
28 | }
29 | 
30 | .bd-footer .bd-footer__inner .footer-items__end {
31 | 	display: flex;
32 | 	flex-direction: row;
33 | }
34 | 
35 | .bd-footer .bd-footer__inner .footer-items__end .footer-item:not(:last-of-type) {
36 | 	margin-right: 3rem;
37 | }
38 | 
39 | @media screen and (max-width: 959px) {
40 | 	.bd-footer {
41 | 		font-size: 0.8rem;
42 | 	}
43 | 
44 | 	.bd-footer .bd-footer__inner .footer-items__end .footer-item:not(:last-of-type) {
45 | 		margin-right: 2rem;
46 | 	}
47 | }
48 | 
49 | @media screen and (max-width: 700px) {
50 | 	.bd-footer {
51 | 		font-size: 1rem;
52 | 	}
53 | 
54 | 	.bd-footer .bd-footer__inner .footer-items__start {
55 | 		display: none;
56 | 	}
57 | 
58 | 	.bd-footer .bd-footer__inner .footer-items__end {
59 | 		width: 100%;
60 | 		justify-content: space-between;
61 | 	}
62 | 
63 | 	.bd-footer .bd-footer__inner .footer-items__end .footer-item:not(:last-of-type) {
64 | 		margin-right: 1rem;
65 | 	}
66 | }
67 | 
68 | @media screen and (max-width: 435px) {
69 | 	.bd-footer {
70 | 		font-size: 0.8rem;
71 | 	}
72 | }


--------------------------------------------------------------------------------
/docs/_static/css/legal-notice.css:
--------------------------------------------------------------------------------
1 | .legal-notice strong {
2 | 	font-size: 1.75rem;
3 | 	font-weight: 600;
4 | }


--------------------------------------------------------------------------------
/docs/_static/css/search.css:
--------------------------------------------------------------------------------
 1 | .bd-search-container h1 {
 2 | 	color: var(--blue-white);
 3 | 	font-size: 3rem;
 4 | 	font-weight: 600;
 5 | }
 6 | 
 7 | .bd-search-container h2 {
 8 | 	color: var(--blue-white);
 9 | 	font-weight: 600;
10 | 	font-size: 1.5rem;
11 | }
12 | 
13 | .bd-search-container form.bd-search i {
14 | 	display: none;
15 | }
16 | 
17 | .bd-search-container form.bd-search input {
18 | 	border: 2px var(--flair-orange) solid;
19 | 	color: var(--flair-blue);
20 | 	box-shadow: none;
21 | 	padding: .25em 5.5em .25em .75em;
22 | 	border-radius: 1rem;
23 | 	font-size: 1.2rem;
24 | }
25 | 
26 | .bd-search-container form.bd-search .search-button__kbd-shortcut {
27 | 	align-items: center;
28 | }
29 | 
30 | .bd-search-container p.search-summary {
31 | 	color: var(--blue-white) !important;
32 | }
33 | 
34 | .bd-search-container ul.search li {
35 | 	border-top: 2px solid var(--blue-white);
36 | 	margin: 0 0 .05em 0;
37 | }
38 | 
39 | .bd-search-container ul.search li:first-of-type {
40 | 	margin-top: 1rem;
41 | 	border: none;
42 | }
43 | 
44 | .bd-search-container ul.search li a {
45 | 	font-weight: 400;
46 | }
47 | 
48 | .bd-search-container ul.search li span {
49 | 	color: var(--blue-white);
50 | }
51 | 
52 | .highlighted {
53 | 	background: var(--flair-orange) !important;
54 | 	color: white !important;
55 | 	border-radius: 5px !important;
56 | }
57 | 
58 | div#searchbox p.highlight-link {
59 | 	box-shadow: none !important;
60 | 	margin: 1rem 0;
61 | }
62 | 
63 | div#searchbox p.highlight-link a {
64 | 	background: var(--white-blue) !important;
65 | 	border: 2px var(--flair-orange) solid;
66 | 	color: var(--flair-orange);
67 | 	transition: all 200ms ease-in-out;
68 | }
69 | 
70 | div#searchbox p.highlight-link a:hover {
71 | 	background: var(--flair-orange) !important;
72 | 	color: white !important;
73 | 	box-shadow: none !important;
74 | }
75 | 
76 | div#searchbox p.highlight-link a::before {
77 | 	content: none;
78 | }


--------------------------------------------------------------------------------
/docs/_static/css/sidebar.css:
--------------------------------------------------------------------------------
 1 | .bd-sidebar-primary.bd-sidebar {
 2 | 	top: 90px;
 3 | 	width: 350px;
 4 | 	padding: 0;
 5 | 	border: none;
 6 | 	overflow: initial;
 7 | 	background: var(--white-blue);
 8 | }
 9 | 
10 | .bd-sidebar-primary.bd-sidebar #rtd-footer-container, .bd-sidebar-primary.bd-sidebar .sidebar-primary-items__end {
11 | 	display: none;
12 | }
13 | 
14 | .bd-sidebar-primary.bd-sidebar::after {
15 | 	content: "";
16 | 	background: linear-gradient(90deg, rgba(0, 0, 0, 0.2) 0%, transparent 100%);
17 | 	height: 100%;
18 | 	width: 7px;
19 | 	position: absolute;
20 | 	right: 0;
21 | 	transform: translateX(100%);
22 | 	top: 0;
23 | }
24 | 
25 | .bd-sidebar-primary.bd-sidebar h3 {
26 | 	display: none;
27 | }
28 | 
29 | .bd-sidebar-primary.bd-sidebar * {
30 | 	color: var(--blue-white);
31 | }
32 | 
33 | .bd-sidebar-primary.bd-sidebar a:hover {
34 | 	color: var(--flair-orange);
35 | }
36 | 
37 | .bd-sidebar-primary.bd-sidebar a:hover * {
38 | 	color: var(--flair-orange);
39 | }
40 | 
41 | .bd-sidebar-primary.bd-sidebar a * {
42 | 	transition: color 200ms cubic-bezier(0, 0.35, 0.08, 0.89);
43 | }
44 | 
45 | .bd-sidebar-primary.bd-sidebar .sidebar-primary-items__start {
46 | 	overflow-y: auto;
47 | 	padding: 2rem;
48 | }
49 | 
50 | .bd-sidebar-primary.bd-sidebar .sidebar-primary-item {
51 | 	padding: 0;
52 | }
53 | 
54 | .bd-sidebar-primary.bd-sidebar .toctree-l1 {
55 | 	font-size: 1.5rem;
56 | 	font-weight: 600;
57 | 	margin-top: .8rem;
58 | }
59 | 
60 | .bd-sidebar-primary.bd-sidebar .toctree-l2 {
61 | 	font-size: 1rem;
62 | 	font-weight: 400;
63 | 	margin-top: .8rem;
64 | }
65 | 
66 | .bd-sidebar-primary.bd-sidebar .toctree-l2 ul {
67 | 	margin-left: 1rem;
68 | }
69 | 
70 | .bd-sidebar-primary.bd-sidebar .toctree-l3 {
71 | 	margin-top: .8rem;
72 | }
73 | 
74 | .bd-sidebar-primary.bd-sidebar .toctree-l4 {
75 | 	margin-top: .8rem;
76 | }
77 | 
78 | .bd-sidebar-primary.bd-sidebar .current, .bd-sidebar-primary.bd-sidebar .current * {
79 | 	color: var(--flair-orange);
80 | }
81 | 
82 | .bd-sidebar-primary.bd-sidebar code {
83 | 	padding: 0;
84 | 	background: transparent;
85 | 	border: none;
86 | 	font-weight: 400;
87 | }


--------------------------------------------------------------------------------
/docs/_static/css/version-switcher.css:
--------------------------------------------------------------------------------
 1 | .version-switcher__container {
 2 | 	position: relative;
 3 | 	z-index: 10;
 4 | }
 5 | 
 6 | .version-switcher__container #versionswitcherbutton {
 7 | 	background: var(--flair-orange);
 8 | 	border: 2px white solid;
 9 | 	color: white;
10 | 	font-weight: 500;
11 | 	cursor: pointer;
12 | 	font-size: 1rem;
13 | 	padding: 0.2em 1em;
14 | 	border-radius: 0.66em;
15 | 	font-family: inherit;
16 | 	width: 7em;
17 | }
18 | 
19 | .version-switcher__container #versionswitcherbutton::after {
20 | 	content: none;
21 | }
22 | 
23 | .version-switcher__container #versionswitcherlist {
24 | 	background: var(--flair-orange);
25 | 	position: absolute;
26 | 	top: calc(100% + 0.4rem);
27 | 	left: 0;
28 | 	display: none;
29 | 	flex-direction: column;
30 | 	border: 2px white solid;
31 | 	border-radius: 0.66em;
32 | 	font-size: 1rem;
33 | 	min-width: 100%;
34 | 	box-sizing: border-box;
35 | 	overflow: hidden;
36 | }
37 | 
38 | .version-switcher__container.open #versionswitcherlist {
39 | 	display: flex;
40 | }
41 | 
42 | .version-switcher__container .version-switcher__menu a.list-group-item:not(:last-child) {
43 | 	border-bottom: none;
44 | }
45 | 
46 | .version-switcher__container a.list-group-item {
47 | 	background: var(--flair-orange);
48 | 	font-size: 1rem;
49 | 	padding: 0.2em 1em 0.2em 1em;
50 | 	white-space: nowrap;
51 | 	overflow: hidden;
52 | 	text-overflow: ellipsis;
53 | 	position: relative;
54 | 	transition: all 200ms cubic-bezier(0, 0.35, 0.08, 0.89);
55 | }
56 | 
57 | .version-switcher__container a.list-group-item:hover {
58 | 	background: var(--white-transparent);
59 | 	color: var(--white-blue);
60 | }
61 | 
62 | .version-switcher__container a.list-group-item span {
63 | 	color: white;
64 | }
65 | 
66 | .version-switcher__container a.list-group-item:not(:last-of-type) {
67 | 	padding-bottom: calc(0.2em + 2px);
68 | }
69 | 
70 | .version-switcher__container a.list-group-item:not(:last-of-type)::after {
71 | 	content: "";
72 | 	height: 2px;
73 | 	width: 100%;
74 | 	background: white;
75 | 	position: absolute;
76 | 	bottom: 0;
77 | 	left: 0;
78 | }


--------------------------------------------------------------------------------
/docs/_static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/docs/_static/favicon.ico


--------------------------------------------------------------------------------
/docs/_static/glossary.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
3 | <svg width="800px" height="800px" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
4 |     <g fill="#494c4e" fill-rule="evenodd">
5 |         <path d="M23.37.67l-.02-.02A2.24 2.24 0 0 0 21.77 0C16.95.01 13.86 1.33 12 2.6A13.365 13.365 0 0 0 8.61.94 20.711 20.711 0 0 0 2.22 0 2.251 2.251 0 0 0 .63.67C.226 1.085 0 1.641 0 2.22v19.56c.005.598.246 1.169.67 1.59.421.4.979.626 1.56.63 4.82-.01 7.91-1.33 9.77-2.6 1.86 1.27 4.95 2.59 9.78 2.6a2.315 2.315 0 0 0 1.59-.67c.402-.416.628-.971.63-1.55V2.22c0-.579-.226-1.135-.63-1.55zM11 19.67C9.42 20.78 6.68 21.99 2.22 22a.468.468 0 0 1-.14-.04.276.276 0 0 1-.08-.18V2.22a.273.273 0 0 1 .06-.16c.043-.04.1-.062.16-.06a18.453 18.453 0 0 1 5.76.84A10.96 10.96 0 0 1 11 4.35v15.32zm11 2.11a.394.394 0 0 1-.04.14.344.344 0 0 1-.18.08c-4.46-.01-7.2-1.22-8.78-2.33V4.33c1.58-1.11 4.32-2.32 8.78-2.33a.269.269 0 0 1 .15.05c.04.048.065.107.07.17v19.56z"/>
6 |         <path d="M8 13H5a1 1 0 0 1 0-2h3a1 1 0 0 1 0 2zm0-4a1 1 0 0 1-.316-.052l-3-1a1.001 1.001 0 0 1 .633-1.9l3 1A1 1 0 0 1 8 9zm-3 9a1 1 0 0 1-.316-1.948l3-1a1.001 1.001 0 0 1 .633 1.9l-3 1A1 1 0 0 1 5 18zm14-5h-3a1 1 0 0 1 0-2h3a1 1 0 0 1 0 2zm-3-4a1 1 0 0 1-.316-1.949l3-1a1.001 1.001 0 0 1 .633 1.9l-3 1A1 1 0 0 1 16 9zm3 9a1 1 0 0 1-.316-.052l-3-1a1.001 1.001 0 0 1 .633-1.9l3 1A1 1 0 0 1 19 18z"/>
7 |     </g>
8 | </svg>


--------------------------------------------------------------------------------
/docs/_static/magnifying_glass.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <svg
 3 |    width="36.193"
 4 |    height="36.193"
 5 |    viewBox="0 0 9.576 9.576"
 6 |    xml:space="preserve"
 7 |    version="1.1"
 8 |    id="svg2"
 9 |    sodipodi:docname="glass.svg"
10 |    inkscape:version="1.3.2 (091e20ef0f, 2023-11-25)"
11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
12 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
13 |    xmlns="http://www.w3.org/2000/svg"
14 |    xmlns:svg="http://www.w3.org/2000/svg"><defs
15 |    id="defs2" /><sodipodi:namedview
16 |    id="namedview2"
17 |    pagecolor="#f79910"
18 |    bordercolor="#000000"
19 |    borderopacity="0.25"
20 |    inkscape:showpageshadow="2"
21 |    inkscape:pageopacity="0.0"
22 |    inkscape:pagecheckerboard="0"
23 |    inkscape:deskcolor="#d1d1d1"
24 |    inkscape:zoom="5.5604674"
25 |    inkscape:cx="-46.668739"
26 |    inkscape:cy="26.346706"
27 |    inkscape:window-width="1920"
28 |    inkscape:window-height="1011"
29 |    inkscape:window-x="0"
30 |    inkscape:window-y="0"
31 |    inkscape:window-maximized="1"
32 |    inkscape:current-layer="svg2" />
33 |                   <g
34 |    transform="translate(-371.73 -8.534)"
35 |    id="g2"
36 |    style="fill:none;stroke:#f79910;stroke-opacity:1">
37 |                      <g
38 |    transform="matrix(.81949 -.81949 .81949 .81949 16.159 396.59)"
39 |    class="stroke-orange-white"
40 |    stroke-width=".4566"
41 |    id="g1"
42 |    style="fill:none;stroke:#f79910;stroke-opacity:1">
43 |                         <circle
44 |    cx="453.71"
45 |    cy="-14.914"
46 |    r="3.2422"
47 |    id="circle1"
48 |    style="fill:none;stroke:#f79910;stroke-opacity:1" />
49 |                         <path
50 |    d="m453.71-11.494v3.0363"
51 |    stroke-linecap="round"
52 |    id="path1"
53 |    style="fill:none;stroke:#f79910;stroke-opacity:1" />
54 |                      </g>
55 |                   </g>
56 |                </svg>
57 | 


--------------------------------------------------------------------------------
/docs/_static/magnifying_glass_dark.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <svg
 3 |    width="36.193"
 4 |    height="36.193"
 5 |    viewBox="0 0 9.576 9.576"
 6 |    xml:space="preserve"
 7 |    version="1.1"
 8 |    id="svg2"
 9 |    sodipodi:docname="glass.svg"
10 |    inkscape:version="1.3.2 (091e20ef0f, 2023-11-25)"
11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
12 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
13 |    xmlns="http://www.w3.org/2000/svg"
14 |    xmlns:svg="http://www.w3.org/2000/svg"><defs
15 |    id="defs2" /><sodipodi:namedview
16 |    id="namedview2"
17 |    pagecolor="#ffffff"
18 |    bordercolor="#000000"
19 |    borderopacity="0.25"
20 |    inkscape:showpageshadow="2"
21 |    inkscape:pageopacity="0.0"
22 |    inkscape:pagecheckerboard="0"
23 |    inkscape:deskcolor="#d1d1d1"
24 |    inkscape:zoom="5.5604674"
25 |    inkscape:cx="-46.668739"
26 |    inkscape:cy="26.346706"
27 |    inkscape:window-width="1920"
28 |    inkscape:window-height="1011"
29 |    inkscape:window-x="0"
30 |    inkscape:window-y="0"
31 |    inkscape:window-maximized="1"
32 |    inkscape:current-layer="svg2" />
33 |                   <g
34 |    transform="translate(-371.73 -8.534)"
35 |    id="g2"
36 |    style="fill:none;stroke:#ffffff;stroke-opacity:1">
37 |                      <g
38 |    transform="matrix(.81949 -.81949 .81949 .81949 16.159 396.59)"
39 |    class="stroke-orange-white"
40 |    stroke-width=".4566"
41 |    id="g1"
42 |    style="fill:none;stroke:#ffffff;stroke-opacity:1">
43 |                         <circle
44 |    cx="453.71"
45 |    cy="-14.914"
46 |    r="3.2422"
47 |    id="circle1"
48 |    style="fill:none;stroke:#ffffff;stroke-opacity:1" />
49 |                         <path
50 |    d="m453.71-11.494v3.0363"
51 |    stroke-linecap="round"
52 |    id="path1"
53 |    style="fill:none;stroke:#ffffff;stroke-opacity:1" />
54 |                      </g>
55 |                   </g>
56 |                </svg>
57 | 


--------------------------------------------------------------------------------
/docs/_static/octocat.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <svg
 3 |    stroke="currentColor"
 4 |    fill="currentColor"
 5 |    stroke-width="0"
 6 |    viewBox="0 0 256 256"
 7 |    height="200px"
 8 |    width="200px"
 9 |    version="1.1"
10 |    id="svg1"
11 |    sodipodi:docname="octocat.svg"
12 |    inkscape:version="1.3.2 (091e20ef0f, 2023-11-25)"
13 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
14 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
15 |    xmlns="http://www.w3.org/2000/svg"
16 |    xmlns:svg="http://www.w3.org/2000/svg">
17 |   <defs
18 |      id="defs1" />
19 |   <sodipodi:namedview
20 |      id="namedview1"
21 |      pagecolor="#ffffff"
22 |      bordercolor="#000000"
23 |      borderopacity="0.25"
24 |      inkscape:showpageshadow="2"
25 |      inkscape:pageopacity="0.0"
26 |      inkscape:pagecheckerboard="0"
27 |      inkscape:deskcolor="#d1d1d1"
28 |      inkscape:zoom="4.025"
29 |      inkscape:cx="100"
30 |      inkscape:cy="100"
31 |      inkscape:window-width="1920"
32 |      inkscape:window-height="1043"
33 |      inkscape:window-x="0"
34 |      inkscape:window-y="0"
35 |      inkscape:window-maximized="1"
36 |      inkscape:current-layer="svg1" />
37 |   <path
38 |      d="M216,104v8a56.06,56.06,0,0,1-48.44,55.47A39.8,39.8,0,0,1,176,192v40a8,8,0,0,1-8,8H104a8,8,0,0,1-8-8V216H72a40,40,0,0,1-40-40A24,24,0,0,0,8,152a8,8,0,0,1,0-16,40,40,0,0,1,40,40,24,24,0,0,0,24,24H96v-8a39.8,39.8,0,0,1,8.44-24.53A56.06,56.06,0,0,1,56,112v-8a58.14,58.14,0,0,1,7.69-28.32A59.78,59.78,0,0,1,69.07,28,8,8,0,0,1,76,24a59.75,59.75,0,0,1,48,24h24a59.75,59.75,0,0,1,48-24,8,8,0,0,1,6.93,4,59.74,59.74,0,0,1,5.37,47.68A58,58,0,0,1,216,104Z"
39 |      id="path1"
40 |      style="fill:#f79910;fill-opacity:1" />
41 | </svg>
42 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="iso-8859-1"?>
 2 | <!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
 3 | <svg fill="#000000" height="800px" width="800px" version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" 
 4 | 	 viewBox="0 0 512 512" xml:space="preserve">
 5 | <g>
 6 | 	<g>
 7 | 		<path d="M418.472,17.102H204.159v-0.534C204.159,7.432,196.727,0,187.591,0h-34.205c-9.136,0-16.568,7.432-16.568,16.568v0.534
 8 | 			h-43.29c-13.851,0-25.119,11.268-25.119,25.119v444.66c0,13.851,11.268,25.119,25.119,25.119h324.944
 9 | 			c13.851,0,25.119-11.268,25.119-25.119V42.221C443.591,28.371,432.323,17.102,418.472,17.102z M152.852,16.568
10 | 			c0-0.295,0.239-0.534,0.534-0.534h34.205c0.295,0,0.534,0.239,0.534,0.534v79.083l-13.19-8.794
11 | 			c-1.347-0.898-2.897-1.347-4.447-1.347c-1.55,0-3.1,0.449-4.447,1.347l-13.19,8.794V16.568z M102.614,495.967h-9.086
12 | 			c-5.01,0-9.086-4.076-9.086-9.086V42.221c0-5.01,4.076-9.086,9.086-9.086h9.086V495.967z M427.558,486.881h-0.001
13 | 			c0,5.01-4.076,9.086-9.086,9.086H118.647V33.136h18.171v77.495c0,2.957,1.627,5.674,4.234,7.069
14 | 			c2.607,1.395,5.77,1.243,8.229-0.399l21.207-14.138l21.207,14.138c1.341,0.895,2.892,1.347,4.448,1.347
15 | 			c1.297,0,2.597-0.314,3.783-0.948c2.606-1.395,4.234-4.112,4.234-7.069V33.136h214.313c5.01,0,9.086,4.076,9.086,9.086V486.881z"
16 | 			/>
17 | 	</g>
18 | </g>
19 | <g>
20 | 	<g>
21 | 		<path d="M375.716,136.818H256c-9.136,0-16.568,7.432-16.568,16.568v77.495h-77.495c-9.136,0-16.568,7.432-16.568,16.568v119.716
22 | 			c0,9.136,7.432,16.568,16.568,16.568h213.779c9.136,0,16.568-7.432,16.568-16.568V153.386
23 | 			C392.284,144.25,384.852,136.818,375.716,136.818z M376.251,367.165c0,0.295-0.239,0.534-0.534,0.534H161.937
24 | 			c-0.295,0-0.534-0.239-0.534-0.534V247.449c0-0.295,0.239-0.534,0.534-0.534h85.511c4.427,0,8.017-3.589,8.017-8.017v-85.511
25 | 			c0-0.295,0.239-0.534,0.534-0.534h119.716c0.295,0,0.534,0.239,0.534,0.534V367.165z"/>
26 | 	</g>
27 | </g>
28 | <g>
29 | 	<g>
30 | 		<path d="M315.858,222.33c-4.427,0-8.017,3.589-8.017,8.017v68.944h-59.568l3.624-2.416c3.684-2.455,4.68-7.433,2.223-11.117
31 | 			s-7.434-4.679-11.116-2.223l-25.653,17.102c-2.23,1.487-3.57,3.989-3.57,6.67s1.339,5.184,3.57,6.67l25.653,17.102
32 | 			c1.367,0.912,2.912,1.348,4.439,1.348c2.59,0,5.133-1.254,6.677-3.571c2.456-3.683,1.461-8.661-2.223-11.117l-3.624-2.416h67.585
33 | 			c4.427,0,8.017-3.589,8.017-8.017v-76.96C323.875,225.919,320.285,222.33,315.858,222.33z"/>
34 | 	</g>
35 | </g>
36 | </svg>


--------------------------------------------------------------------------------
/docs/_templates/footer-links/legal-notice.html:
--------------------------------------------------------------------------------
1 | <a href="{{ pathto('legal-notice/index') }}">Legal notice</a>


--------------------------------------------------------------------------------
/docs/_templates/footer-links/linkedin.html:
--------------------------------------------------------------------------------
1 | <a href="https://www.linkedin.com/in/alan-akbik-9859b1122/" target="_blank">LinkedIn</a>


--------------------------------------------------------------------------------
/docs/_templates/footer-links/x.html:
--------------------------------------------------------------------------------
1 | <a href="https://twitter.com/alan_akbik" target="_blank">Twitter/X</a>


--------------------------------------------------------------------------------
/docs/_templates/legal-notice-content.html:
--------------------------------------------------------------------------------
 1 | <div class="legal-notice">
 2 |     <div>
 3 |         <div>
 4 |             <article>
 5 |                 <p>
 6 |                     In case of questions, feel free to open a
 7 |                     <a href="https://github.com/flairNLP/flair/issues" target="_blank" rel="noopener noreferrer">Github Issue</a>
 8 |                     or write me an email:
 9 |                     <a href="mailto:alan.akbik@hu-berlin.de" target="_blank" rel="noopener noreferrer">alan.akbik@hu-berlin.de</a>.
10 |                 </p>
11 |                 <p>
12 |                     <strong>
13 |                         Flair NLP is maintained by:
14 |                     </strong>
15 |                     <br>
16 |                     Alan Akbik<br>
17 |                     Humboldt-Universität zu Berlin<br>
18 |                     Institut für Informatik / Lehrstuhl Maschinelles Lernen<br>
19 |                     Unter den Linden 6<br>
20 |                     10099 Berlin<br>
21 |                     Germany
22 |                 </p>
23 |                 <p>
24 |                     <a href="https://www.informatik.hu-berlin.de/de/forschung/gebiete/ml" target="_blank" rel="noopener noreferrer">Website</a>
25 |                 </p>
26 |                 <p>
27 |                     <strong>Privacy Policy</strong><br>
28 |                     The webserver / web hosting company might collect certain log files to prevent abuse of services.
29 |                     These log files can include: IP address, URL, date and time.<br>
30 |                     We do not use any tracking services or cookies to track or re-identify visitors.
31 |                 </p>
32 |             </article>
33 |         </div>
34 |     </div>
35 | </div>


--------------------------------------------------------------------------------
/docs/_templates/page.html:
--------------------------------------------------------------------------------
 1 | {% extends "!page.html" %}
 2 | {% block body %}
 3 | {% if current_version and latest_version and current_version != latest_version and current_version != release and current_version.name != latest_version.release %}
 4 | <p>
 5 |   <strong>
 6 |     {% if current_version.is_released %}
 7 |         {% if latest_version.release.replace('v', '').split('.') | map('int') | list > current_version.name.replace('v', '').split('.') | map('int') | list  %}
 8 |           You're reading an old version of this documentation.
 9 |           If you want up-to-date information, please have a look at <a href="{{ vpathto(latest_version.name) }}">{{latest_version.name}}</a>.
10 |         {% endif %}
11 |     {% else %}
12 |     You're reading the documentation for a development version.
13 |     For the latest stable version, please have a look at <a href="{{ vpathto(latest_version.name) }}">{{latest_version.name}}</a>.
14 |     {% endif %}
15 |   </strong>
16 | </p>
17 | {% endif %}
18 | {{ super() }}
19 | {% endblock %}%


--------------------------------------------------------------------------------
/docs/_templates/version-switcher.html:
--------------------------------------------------------------------------------
 1 | {# As the version switcher will only work when JavaScript is enabled, we add it through JavaScript.
 2 |  #}
 3 | <script>
 4 |  const toggleMenu = () => {
 5 |   const elem = document.querySelector(".version-switcher__container");
 6 |   if (elem.classList.contains("open")) {
 7 |    elem.classList.remove("open");
 8 |   } else {
 9 |    elem.classList.add("open");
10 |   }
11 |  }
12 | document.write(`
13 |   <div class="version-switcher__container dropdown">
14 |     <button onclick="toggleMenu()" id="versionswitcherbutton" type="button" role="button" class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle" data-bs-toggle="dropdown" aria-haspopup="listbox" aria-controls="versionswitcherlist" aria-label="Version switcher list">
15 |       {% if current_version.is_released %} {{ current_version.name }} {% if latest_version and item == latest_version %} (stable) {% endif %} {% else %} latest (dev) {% endif %}
16 |     </button>
17 |     <div id="versionswitcherlist" class="version-switcher__menu dropdown-menu list-group-flush py-0" role="listbox" aria-labelledby="versionswitcherbutton">
18 |     <!-- dropdown will be populated by javascript on page load -->
19 |     {%- for item in versions|reverse %}
20 |       {% if item != current_version %}
21 |           <a class="list-group-item list-group-item-action py-1" href="{{ item.url }}" data-version-name="dev" data-version="devdocs">
22 |           <span>{% if item.is_released %}
23 |         {{ item.name }}
24 |           {% if latest_version and item == latest_version %}
25 |           (stable)
26 |           {% endif %}
27 |         {% else %}
28 |         latest (dev)
29 |         {% endif %}
30 |         {% if item == current_version %}
31 |         [x]
32 |         {% endif %}</span>
33 |           </a>
34 |       {% endif %}
35 |     {%- endfor %}
36 |     </div>
37 |   </div>
38 | `);
39 |  document.querySelector('footer.bd-footer')
40 | </script>
41 | <style>
42 | button.search-button {
43 |  background-image: url("{{ pathto('_static/magnifying_glass_dark.svg', 1) }}") !important;
44 | }
45 | </style>
46 | 


--------------------------------------------------------------------------------
/docs/_templates/versioning.html:
--------------------------------------------------------------------------------
 1 | {% if versions %}
 2 | <h3>{{ _('Versions') }}</h3>
 3 | <ul>
 4 |   {%- for item in versions|reverse %}
 5 |   <li><a href="{{ item.url }}">
 6 |     {% if item.is_released %}
 7 |     {{ item.name }}
 8 |       {% if latest_version and item == latest_version %}
 9 |       (stable)
10 |       {% endif %}
11 |     {% else %}
12 |     latest ({{ item.name }})
13 |     {% endif %}
14 |   </a></li>
15 |   {%- endfor %}
16 | </ul>
17 | {% endif %}


--------------------------------------------------------------------------------
/docs/api/datasets/base.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.base
2 | ===================
3 | 
4 | .. currentmodule:: flair.datasets.base
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/datasets/biomedical.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.biomedical
2 | =========================
3 | 
4 | .. currentmodule:: flair.datasets.biomedical
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/datasets/document_classification.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.document_classification
2 | ======================================
3 | 
4 | .. currentmodule:: flair.datasets.document_classification
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/datasets/entity_linking.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.entity_linking
2 | =============================
3 | 
4 | .. currentmodule:: flair.datasets.entity_linking
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/datasets/ocr.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.ocr
2 | ==================
3 | 
4 | .. currentmodule:: flair.datasets.ocr
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/datasets/relation_extraction.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.relation_extraction
2 | ==================================
3 | 
4 | .. currentmodule:: flair.datasets.relation_extraction
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/datasets/sequence_labeling.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.sequence_labeling
2 | ================================
3 | 
4 | .. currentmodule:: flair.datasets.sequence_labeling
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/datasets/text_image.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.text_image
2 | =========================
3 | 
4 | .. currentmodule:: flair.datasets.text_image
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/datasets/text_text.rst:
--------------------------------------------------------------------------------
 1 | flair.datasets.text_text
 2 | ========================
 3 | 
 4 | .. currentmodule:: flair.datasets.text_text
 5 | 
 6 | .. autosummary::
 7 |     :toctree: generated
 8 |     :nosignatures:
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/api/datasets/treebanks.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.treebanks
2 | ========================
3 | 
4 | .. currentmodule:: flair.datasets.treebanks
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/embeddings/base.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.base
2 | =====================
3 | 
4 | .. currentmodule:: flair.embeddings.base
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/embeddings/document.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.document
2 | =========================
3 | 
4 | .. currentmodule:: flair.embeddings.document
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/embeddings/image.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.image
2 | ======================
3 | 
4 | .. currentmodule:: flair.embeddings.image
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/embeddings/legacy.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.legacy
2 | =======================
3 | 
4 | .. currentmodule:: flair.embeddings.legacy
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/embeddings/token.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.token
2 | ======================
3 | 
4 | .. currentmodule:: flair.embeddings.token
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/embeddings/transformer.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.transformer
2 | ============================
3 | 
4 | .. currentmodule:: flair.embeddings.transformer
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/flair.data.rst:
--------------------------------------------------------------------------------
1 | flair.data
2 | ==========
3 | 
4 | .. currentmodule:: flair.data
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/flair.datasets.rst:
--------------------------------------------------------------------------------
 1 | flair.datasets
 2 | ==============
 3 | 
 4 | .. currentmodule:: flair.datasets
 5 | 
 6 | .. toctree::
 7 |    :glob:
 8 |    :maxdepth: 2
 9 | 
10 |    datasets/*
11 | 


--------------------------------------------------------------------------------
/docs/api/flair.embeddings.rst:
--------------------------------------------------------------------------------
 1 | flair.embeddings
 2 | ================
 3 | 
 4 | .. currentmodule:: flair.embeddings
 5 | 
 6 | .. toctree::
 7 |    :glob:
 8 |    :maxdepth: 2
 9 | 
10 |    embeddings/*


--------------------------------------------------------------------------------
/docs/api/flair.models.rst:
--------------------------------------------------------------------------------
1 | flair.models
2 | ============
3 | 
4 | .. currentmodule:: flair.models
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/flair.nn.rst:
--------------------------------------------------------------------------------
1 | flair.nn
2 | ========
3 | 
4 | .. currentmodule:: flair.nn
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/flair.rst:
--------------------------------------------------------------------------------
1 | flair
2 | =====
3 | 
4 | .. currentmodule:: flair
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/flair.splitter.rst:
--------------------------------------------------------------------------------
 1 | flair.splitter
 2 | ==============
 3 | 
 4 | .. currentmodule:: flair.splitter
 5 | 
 6 | .. autosummary::
 7 |     :toctree: generated
 8 |     :nosignatures:
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/api/flair.tokenization.rst:
--------------------------------------------------------------------------------
1 | flair.tokenization
2 | ==================
3 | 
4 | .. currentmodule:: flair.tokenization
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/flair.trainers.plugins.rst:
--------------------------------------------------------------------------------
1 | flair.trainers.plugins
2 | ======================
3 | 
4 | .. currentmodule:: flair.trainers.plugins
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/flair.trainers.rst:
--------------------------------------------------------------------------------
1 | flair.trainers
2 | ==============
3 | 
4 | .. currentmodule:: flair.trainers
5 | 
6 | .. autosummary::
7 |     :toctree: generated
8 |     :nosignatures:
9 | 


--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
1 | API Docs
2 | ========
3 | 
4 | .. toctree::
5 |    :glob:
6 |    :maxdepth: 2
7 | 
8 |    flair
9 |    flair.*


--------------------------------------------------------------------------------
/docs/contributing/index.rst:
--------------------------------------------------------------------------------
 1 | Contributing
 2 | ============
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    writing_a_good_issue
 8 |    local_development
 9 |    making_a_pull_request
10 |    updating_documentation
11 | 


--------------------------------------------------------------------------------
/docs/contributing/local_development.md:
--------------------------------------------------------------------------------
 1 | # Local Development
 2 | 
 3 | For contributors looking to get deeper into the API we suggest cloning the repository and checking out the unit
 4 | tests for examples of how to call methods. Most classes and methods are documented, so finding your way around
 5 | the code should hopefully be easy.
 6 | 
 7 | ## Setup
 8 | 
 9 | Flair requires python-3.9 or higher. To make sure our code also runs on the oldest supported
10 | python version, it is recommended to use python-3.9.x for flair development.
11 | 
12 | Create a python environment of your preference and run:
13 | ```bash
14 | pip install -r requirements-dev.txt
15 | pip install -e .
16 | ```
17 | 
18 | ## Tests
19 | 
20 | To only run typechecks and check the code formatting execute:
21 | 
22 | ```bash
23 | pytest flair
24 | ```
25 | 
26 | To run all basic tests execute:
27 | 
28 | ```bash
29 | pytest
30 | ```
31 | 
32 | To run integration tests execute:
33 | 
34 | ```bash
35 | pytest --runintegration
36 | ```
37 | 
38 | The integration tests will train small models and therefore take more time.
39 | In general, it is recommended to ensure all basic tests are running through before testing the integration tests
40 | 
41 | ## Code Formatting
42 | 
43 | To ensure a standardized code style we use the formatter [black](https://github.com/ambv/black) and for standardizing imports we use [ruff](https://github.com/charliermarsh/ruff).
44 | If your code is not formatted properly, the tests will fail.
45 | 
46 | We recommend configuring your IDE to run these formatters for you, but you can also always run them manually via
47 | `black . && ruff --fix .` in the flair root folder.


--------------------------------------------------------------------------------
/docs/contributing/making_a_pull_request.md:
--------------------------------------------------------------------------------
 1 | # Making a pull request
 2 | 
 3 | We are happy to accept your contributions to make `flair` better and more awesome! To avoid unnecessary work on either
 4 | side, please stick to the following process:
 5 | 
 6 | 1. Check if there is already [an issue](https://github.com/flairNLP/flair/issues) for your concern.
 7 | 2. If there is not, open a new one to start a discussion. We hate to close finished PRs!
 8 | 3. If we decide your concern needs code changes, we would be happy to accept a pull request. Please consider the
 9 |    commit guidelines below.
10 | 
11 | 
12 | ## Git Commit Guidelines
13 | 
14 | If there is already a ticket, use this number at the start of your commit message.
15 | Use meaningful commit messages that described what you did.
16 | 
17 | **Example:** `GH-42: Added new type of embeddings: DocumentEmbedding.`


--------------------------------------------------------------------------------
/docs/contributing/updating_documentation.md:
--------------------------------------------------------------------------------
 1 | # Updating documentation
 2 | 
 3 | 
 4 | ## What is good documentation?
 5 | 
 6 | Good Documentation
 7 | * Always refers to the enduser. Do not document *why* something is the way it is, but rather *how* to use it.
 8 | * Doesn't lie and is always up-to-ate. Whenever code is updated, consider if the documentation needs to change accordingly to reflect reality.
 9 | * Provides useful links whenever usable. Do not reference another object without linking it.
10 | 
11 | 
12 | ## Tutorials
13 | 
14 | All tutorials are markdown files stored at [the tutorial folder](https://github.com/flairNLP/flair/tree/master/docs/tutorial).
15 | When adding a new tutorial, you must add its name to the `index.rst` file in the respective folder.
16 | We are using the [MyST parser](https://myst-parser.readthedocs.io/en/latest/syntax/typography.html) which adds
17 | some additional syntax over markdown.
18 | 
19 | A tutorial should always be easy to understand, and reference api documentation for future readings.
20 | 
21 | ```{note}
22 |   You can reference symbols by defining links
23 |   e.g.: ``[`flair.set_seed`](#flair.set_seed)`` for a function
24 |   e.g.: `[entity-linking](project:../tutorial/tutorial-basics/entity-linking.md)` for another tutorial
25 | ```
26 | 
27 | ## Docstrings
28 | 
29 | For docstrings we follow the [Google docstring](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) format.
30 | We do not need to specify types or default values, as those will be extracted from the function signature.
31 | 
32 | Docstrings have usual a 1 liner giving a simple explanation of the object. Then there is a more detailed explanation followed **if required**.
33 | Ensure that you always use cross-references instead of just mentioning another object,
34 | e.g. ``:class:`flair.models.SequenceTagger` `` can be used to reference the SequenceTagger.
35 | 
36 | 
37 | ## Building the local docs
38 | 
39 | For building the docs,
40 | 
41 | * Ensure that you have everything committed. Local changes won't be used for building.
42 | * Install the build dependencies via `pip install -r docs/requirements.txt`.
43 | * In `docs/conf.py` temporarily add your local branch name to the `smv_branch_whitelist` pattern. 
44 |   E.g. if your branch is called `doc-page` `smv_branch_whitelist` need to have the value `r"^master|doc-page$"`
45 | * run `sphinx-multiversion docs doc_build/` to generate the docs.
46 | * open `doc_build/<your branch name>/index.html` to view the docs.
47 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. _flair_docs_mainpage:
 2 | 
 3 | .. title:: Home
 4 | 
 5 | .. raw:: html
 6 |    :file: _templates/landing_page_styles.html
 7 | 
 8 | .. raw:: html
 9 |    :file: _templates/landing-page-banner.html
10 | 
11 | .. raw:: html
12 |    :file: _templates/landing-page-illustrations.html
13 | 
14 | .. toctree::
15 |    :maxdepth: 3
16 |    :hidden:
17 | 
18 |    Tutorials <tutorial/index>
19 |    API <api/index>
20 |    Contributing <contributing/index>


--------------------------------------------------------------------------------
/docs/legal-notice/index.rst:
--------------------------------------------------------------------------------
 1 | Legal Notice
 2 | ============
 3 | 
 4 | .. title:: Legal Notice
 5 | 
 6 | .. raw:: html
 7 |    :file: ../_templates/legal-notice-content.html
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 3
11 |    :hidden:
12 | 
13 |    Tutorials <../tutorial/index>
14 |    API <../api/index>
15 |    Contributing <../contributing/index>


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | sphinx-github-style<=1.0.2  # 1.0.3 changes logic that breaks with sphinx-multiversion
 2 | sphinx-autodoc-typehints
 3 | myst-parser
 4 | sphinx<8.0.0
 5 | importlib-metadata
 6 | sphinx-multiversion
 7 | pydata-sphinx-theme<0.14
 8 | sphinx_design
 9 | sphinx-autosummary-autocollect
10 | 
11 | # previous dependencies that are required to build docs for later versions too.
12 | semver
13 | gensim
14 | bpemb


--------------------------------------------------------------------------------
/docs/tutorial/index.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========
 3 | 
 4 | 
 5 | .. _flair_tutorials:
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 | 
10 |    intro
11 |    tutorial-basics/index
12 |    tutorial-training/index
13 |    tutorial-embeddings/index
14 |    tutorial-hunflair2/index


--------------------------------------------------------------------------------
/docs/tutorial/intro.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 1
 3 | ---
 4 | 
 5 | (getting_started)=
 6 | 
 7 | # Quick Start
 8 | 
 9 | Let's discover **Flair in less than 5 minutes**.
10 | 
11 | ## Requirements and Installation
12 | 
13 | In your favorite virtual environment, simply do:
14 | 
15 | ```
16 | pip install flair
17 | ```
18 | 
19 | Flair requires Python 3.9+. 
20 | 
21 | ## Example 1: Tag Entities in Text
22 | 
23 | Let's run **named entity recognition**  (NER) over the following example sentence: "_I love Berlin and New York._"
24 | 
25 | Our goal is to identify names in this sentence, and their types.
26 | 
27 | To do this, all you need is to make a [`Sentence`](#flair.data.Sentence) for this text, load a pre-trained model and use it to predict tags for the sentence:
28 | 
29 | 
30 | ```python
31 | from flair.data import Sentence
32 | from flair.nn import Classifier
33 | 
34 | # make a sentence
35 | sentence = Sentence('I love Berlin and New York.')
36 | 
37 | # load the NER tagger
38 | tagger = Classifier.load('ner')
39 | 
40 | # run NER over sentence
41 | tagger.predict(sentence)
42 | 
43 | # print the sentence with all annotations
44 | print(sentence)
45 | ```
46 | 
47 | This should print:
48 | 
49 | ```console
50 | Sentence[7]: "I love Berlin and New York." → ["Berlin"/LOC, "New York"/LOC]
51 | ```
52 | 
53 | The output shows that both "Berlin" and "New York" were tagged as **location entities** (LOC) in this sentence.
54 | 
55 | 
56 | ## Example 2: Detect Sentiment 
57 | 
58 | Let's run **sentiment analysis** over the same sentence to determine whether it is POSITIVE or NEGATIVE.
59 | 
60 | You can do this with essentially the same code as above. Just instead of loading the 'ner' model, you now load the 'sentiment' model:
61 | 
62 | 
63 | ```python
64 | from flair.data import Sentence
65 | from flair.nn import Classifier
66 | 
67 | # make a sentence
68 | sentence = Sentence('I love Berlin and New York.')
69 | 
70 | # load the sentiment tagger
71 | tagger = Classifier.load('sentiment')
72 | 
73 | # run sentiment analysis over sentence
74 | tagger.predict(sentence)
75 | 
76 | # print the sentence with all annotations
77 | print(sentence)
78 | 
79 | ```
80 | 
81 | This should print:
82 | 
83 | ```console
84 | Sentence[7]: "I love Berlin and New York." → POSITIVE (0.9982)
85 | ```
86 | 
87 | The output shows that the sentence "_I love Berlin and New York._" was tagged as having **POSITIVE** sentiment. 
88 | 
89 | 
90 | ## Summary
91 | 
92 | Congrats, you now know how to use Flair to find entities and detect sentiment!
93 | 
94 | ## Next steps
95 | 
96 | If you want to know more about Flair, next check out [Tutorial 1](tutorial-basics/) that gives an intro into the basics of Flair!


--------------------------------------------------------------------------------
/docs/tutorial/tutorial-basics/how-predictions-work.md:
--------------------------------------------------------------------------------
 1 | # How predictions work
 2 | 
 3 | All taggers in Flair make predictions. This tutorial helps you understand what information you can get out of each prediction.
 4 | 
 5 | ## Running example
 6 | 
 7 | Let's use our standard NER example to illustrate how annotations work: 
 8 | 
 9 | ```python
10 | from flair.nn import Classifier
11 | from flair.data import Sentence
12 | 
13 | # load the model
14 | tagger = Classifier.load('ner')
15 | 
16 | # make a sentence
17 | sentence = Sentence('George Washington went to Washington.')
18 | 
19 | # predict NER tags
20 | tagger.predict(sentence)
21 | 
22 | # print the sentence with the tags
23 | print(sentence)
24 | ```
25 | 
26 | This should print:
27 | ```console
28 | Sentence: "George Washington went to Washington ." → ["George Washington"/PER, "Washington"/LOC]
29 | ```
30 | 
31 | Showing us that two entities are labeled in this sentence: "George Washington" as PER (person) and "Washington"
32 | as LOC (location.)
33 | 
34 | ## Getting the predictions
35 | 
36 | A common question that gets asked is **how to access these predictions directly**. You can do this by using
37 | the [`get_labels()`](#flair.data.Sentence.get_labels) method to iterate over all predictions:
38 | 
39 | ```python
40 | for label in sentence.get_labels():
41 |     print(label)
42 | ```
43 | This should print the two NER predictions:
44 | 
45 | ```console
46 | Span[0:2]: "George Washington" → PER (0.9989)
47 | Span[4:5]: "Washington" → LOC (0.9942)
48 | ```
49 | 
50 | As you can see, each entity is printed, together with the predicted class. 
51 | The confidence of the prediction is indicated as a score in brackets.
52 | 
53 | ## Values for each prediction
54 | 
55 | For each prediction, you can even **directly access** the label value, and all other attributes of the [`Label`](#flair.data.Label) class:  
56 | 
57 | ```python
58 | # iterate over all labels in the sentence
59 | for label in sentence.get_labels():
60 |     # print label value and score
61 |     print(f'label.value is: "{label.value}"')
62 |     print(f'label.score is: "{label.score}"')
63 |     # access the data point to which label attaches and print its text
64 |     print(f'the text of label.data_point is: "{label.data_point.text}"\n')
65 | ```
66 | 
67 | This should print: 
68 | ```console
69 | label.value is: "PER"
70 | label.score is: "0.998886227607727"
71 | the text of label.data_point is: "George Washington"
72 | 
73 | label.value is: "LOC"
74 | label.score is: "0.9942097663879395"
75 | the text of label.data_point is: "Washington"
76 | ```
77 | 
78 | 
79 | ### Next
80 | 
81 | Congrats, you've made your first predictions with Flair and accessed value and confidence scores of each prediction.
82 | 
83 | Next, let's discuss specifically how to [predict named entities with Flair](tagging-entities.md). 
84 | 


--------------------------------------------------------------------------------
/docs/tutorial/tutorial-basics/how-to-tag-corpus.md:
--------------------------------------------------------------------------------
 1 | # How to tag a whole corpus
 2 | 
 3 | Often, you may want to tag an entire text corpus. In this case, you need to split the corpus into sentences and pass a
 4 | list of [`Sentence`](#flair.data.Sentence) objects to the [`Classifier.predict()`](#flair.nn.Classifier.predict) method.
 5 | 
 6 | For instance, you can use a [`SentenceSplitter`](#flair.splitter.SentenceSplitter) to split your text:
 7 | 
 8 | ```python
 9 | from flair.nn import Classifier
10 | from flair.splitter import SegtokSentenceSplitter
11 | 
12 | # example text with many sentences
13 | text = "This is a sentence. This is another sentence. I love Berlin."
14 | 
15 | # initialize sentence splitter
16 | splitter = SegtokSentenceSplitter()
17 | 
18 | # use splitter to split text into list of sentences
19 | sentences = splitter.split(text)
20 | 
21 | # predict tags for sentences
22 | tagger = Classifier.load('ner')
23 | tagger.predict(sentences)
24 | 
25 | # iterate through sentences and print predicted labels
26 | for sentence in sentences:
27 |     print(sentence)
28 | ```
29 | 
30 | Using the `mini_batch_size` parameter of the [`Classifier.predict()`](#flair.nn.Classifier.predict) method, you can set the size of mini batches passed to the
31 | tagger. Depending on your resources, you might want to play around with this parameter to optimize speed.
32 | 
33 | ### Next 
34 | 
35 | That's it - you completed tutorial 1! Congrats!
36 | 
37 | You've learned how basic classes work and how to use Flair to make various predictions. 
38 | 
39 | Next, you can check out our tutorial on how to [train your own model](../tutorial-training/how-model-training-works.md).
40 | 


--------------------------------------------------------------------------------
/docs/tutorial/tutorial-basics/index.rst:
--------------------------------------------------------------------------------
 1 | Tutorial 1: Basic Tagging
 2 | =========================
 3 | 
 4 | This tutorial shows you in more detail how to tag your text and access predictions,
 5 | and showcases various models we ship with Flair.
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 1
 9 | 
10 |    basic-types
11 |    how-predictions-work
12 |    tagging-entities
13 |    tagging-sentiment
14 |    entity-linking
15 |    entity-mention-linking
16 |    part-of-speech-tagging
17 |    other-models
18 |    how-to-tag-corpus
19 | 


--------------------------------------------------------------------------------
/docs/tutorial/tutorial-basics/tagging-sentiment.md:
--------------------------------------------------------------------------------
 1 | # Tagging sentiment
 2 | 
 3 | This tutorials shows you how to do sentiment analysis in Flair.
 4 | 
 5 | ## Tagging sentiment with our standard model
 6 | 
 7 | Our standard sentiment analysis model uses distilBERT embeddings and was trained over a mix of corpora, notably
 8 | the Amazon review corpus, and can thus handle a variety of domains and language.
 9 | 
10 | Let's use an example sentence:
11 | 
12 | ```python
13 | from flair.nn import Classifier
14 | from flair.data import Sentence
15 | 
16 | # load the model
17 | tagger = Classifier.load('sentiment')
18 | 
19 | # make a sentence
20 | sentence = Sentence('This movie is not at all bad.')
21 | 
22 | # predict NER tags
23 | tagger.predict(sentence)
24 | 
25 | # print sentence with predicted tags
26 | print(sentence)
27 | ```
28 | 
29 | This should print:
30 | ```console
31 | Sentence[8]: "This movie is not at all bad." → POSITIVE (0.9929)
32 | ```
33 | 
34 | Showing us that the sentence overall is tagged to be of POSITIVE sentiment. 
35 | 
36 | ## Tagging sentiment with our fast model
37 | 
38 | We also offer an RNN-based variant which is faster but less accurate. Use it like this: 
39 | 
40 | 
41 | ```python
42 | from flair.nn import Classifier
43 | from flair.data import Sentence
44 | 
45 | # load the model
46 | tagger = Classifier.load('sentiment-fast')
47 | 
48 | # make a sentence
49 | sentence = Sentence('This movie is very bad.')
50 | 
51 | # predict NER tags
52 | tagger.predict(sentence)
53 | 
54 | # print sentence with predicted tags
55 | print(sentence)
56 | ```
57 | 
58 | This should print:
59 | ```console
60 | Sentence[6]: "This movie is very bad." → NEGATIVE (0.9999)
61 | ```
62 | 
63 | This indicates that the sentence is of NEGATIVE sentiment. As you can see, its the same code as above, just loading the
64 | '**sentiment-fast**' model instead of '**sentiment**'.
65 | 
66 | 
67 | ### List of Sentiment Models
68 | 
69 | We end this section with a list of all models we currently ship with Flair:
70 | 
71 | | ID | Language | Task | Training Dataset | Accuracy |
72 | | ------------- | ---- | ------------- |------------- |------------- |
73 | | 'sentiment' | English | detecting positive and negative sentiment (transformer-based) | movie and product reviews |  **98.87** |
74 | | 'sentiment-fast' | English | detecting positive and negative sentiment (RNN-based) | movie and product reviews |  **96.83**|
75 | | 'de-offensive-language' | German | detecting offensive language | [GermEval 2018 Task 1](https://projects.fzai.h-da.de/iggsa/projekt/) |  **75.71** (Macro F1) |
76 | 
77 | 
78 | ### Next
79 | 
80 | Congrats, you learned how to predict sentiment with Flair! 
81 | 
82 | Next, let's discuss how to [link entities to Wikipedia with Flair](entity-linking.md). 
83 | 
84 | 


--------------------------------------------------------------------------------
/docs/tutorial/tutorial-embeddings/index.rst:
--------------------------------------------------------------------------------
 1 | Tutorial 3: Embeddings
 2 | ======================
 3 | 
 4 | This tutorial shows you how to use Flair to produce embeddings for words and documents.
 5 | Embeddings are vector representations that are useful for a variety of reasons.
 6 | All Flair models are trained on top of embeddings, so if you want to train your own models,
 7 | you should understand how embeddings work.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    embeddings
13 |    transformer-embeddings
14 |    flair-embeddings
15 |    classic-word-embeddings
16 |    other-embeddings
17 | 


--------------------------------------------------------------------------------
/docs/tutorial/tutorial-hunflair2/index.rst:
--------------------------------------------------------------------------------
 1 | Tutorial: HunFlair2
 2 | ===================
 3 | 
 4 | *HunFlair2* is a state-of-the-art named entity tagger and linker for biomedical texts. It comes with
 5 | models for genes/proteins, chemicals, diseases, species and cell lines. *HunFlair2*
 6 | builds on pretrained domain-specific language models and outperforms other biomedical
 7 | NER tools on unseen corpora.
 8 | 
 9 | .. toctree::
10 |    :glob:
11 |    :maxdepth: 1
12 | 
13 |    overview
14 |    tagging
15 |    linking
16 |    training-ner-models
17 |    customize-linking
18 | 


--------------------------------------------------------------------------------
/docs/tutorial/tutorial-training/how-to-train-text-classifier.md:
--------------------------------------------------------------------------------
 1 | # Train a Text Classifier
 2 | 
 3 | This tutorial shows you how to train your own text classifier models with Flair. For instance, you 
 4 | could train your own sentiment analysis model, or offensive language detection model.
 5 | 
 6 | 
 7 | ## Training a text classification model with transformers
 8 | 
 9 | For text classification, you reach state-of-the-art scores by fine-tuning a transformer. 
10 | 
11 | Training a model is easy: load the appropriate corpus, make a label dictionary, then fine-tune a [`TextClassifier`](#flair.models.TextClassifier)
12 | model using the [`ModelTrainer.fine_tune()`](#flair.trainers.ModelTrainer.fine_tune) method. See the example script below:
13 | 
14 | ```python
15 | from flair.data import Corpus
16 | from flair.datasets import TREC_6
17 | from flair.embeddings import TransformerDocumentEmbeddings
18 | from flair.models import TextClassifier
19 | from flair.trainers import ModelTrainer
20 | 
21 | # 1. get the corpus
22 | corpus: Corpus = TREC_6()
23 | 
24 | # 2. what label do we want to predict?
25 | label_type = 'question_class'
26 | 
27 | # 3. create the label dictionary
28 | label_dict = corpus.make_label_dictionary(label_type=label_type)
29 | 
30 | # 4. initialize transformer document embeddings (many models are available)
31 | document_embeddings = TransformerDocumentEmbeddings('distilbert-base-uncased', fine_tune=True)
32 | 
33 | # 5. create the text classifier
34 | classifier = TextClassifier(document_embeddings, label_dictionary=label_dict, label_type=label_type)
35 | 
36 | # 6. initialize trainer
37 | trainer = ModelTrainer(classifier, corpus)
38 | 
39 | # 7. run training with fine-tuning
40 | trainer.fine_tune('resources/taggers/question-classification-with-transformer',
41 |                   learning_rate=5.0e-5,
42 |                   mini_batch_size=4,
43 |                   max_epochs=10,
44 |                   )
45 | ```
46 | 
47 | Once the model is trained you can load it to predict the class of new sentences. Just call the [`predict`](#flair.nn.DefaultClassifier.predict) method of the model.
48 | 
49 | ```python
50 | classifier = TextClassifier.load('resources/taggers/question-classification-with-transformer/final-model.pt')
51 | 
52 | # create example sentence
53 | sentence = Sentence('Who built the Eiffel Tower ?')
54 | 
55 | # predict class and print
56 | classifier.predict(sentence)
57 | 
58 | print(sentence.labels)
59 | ```
60 | 
61 | 
62 | ## Next 
63 | 
64 | Next, learn [how to train an entity linker](how-to-train-span-classifier.md).


--------------------------------------------------------------------------------
/docs/tutorial/tutorial-training/index.rst:
--------------------------------------------------------------------------------
 1 | Tutorial 2: Training models
 2 | ===========================
 3 | 
 4 | This tutorial illustrates how you can train your own state-of-the-art NLP models with Flair.
 5 | 
 6 | .. toctree::
 7 |    :glob:
 8 |    :maxdepth: 1
 9 | 
10 |    how-model-training-works
11 |    train-vs-fine-tune
12 |    how-to-load-prepared-dataset
13 |    how-to-load-custom-dataset
14 |    how-to-train-sequence-tagger
15 |    how-to-train-text-classifier
16 |    how-to-train-span-classifier
17 |    how-to-train-multitask-model
18 | 


--------------------------------------------------------------------------------
/docs/tutorial/tutorial-training/train-vs-fine-tune.md:
--------------------------------------------------------------------------------
 1 | # Training vs fine-tuning
 2 | 
 3 | There are two broad ways you train a model: The "classic" approach and the fine-tuning approach. This section
 4 | explains the differences. 
 5 | 
 6 | 
 7 | ## Fine-Tuning
 8 | 
 9 | Fine-tuning is the current state-of-the-art approach. The main idea is that you take a pre-trained language model that 
10 | consists of (hundreds of) millions of trained parameters. To this language model you add a simple prediction head with
11 | randomly initialized weights. 
12 | 
13 | Since in this case, the vast majority of parameters in the model is already trained, you only need to "fine-tune" this
14 | model. This means: Very small learning rate (LR) and just a few epochs. You are essentially just minimally modifying 
15 | the model to adapt it to the task you want to solve.
16 | 
17 | Use this method by calling [`ModelTrainer.fine_tune()`](#flair.trainers.ModelTrainer.fine_tune).
18 | Since most models in Flair were trained this way, this is likely the approach you'll want to use. 
19 | 
20 | 
21 | ## Training
22 | 
23 | On the other hand, you should use the classic training approach if the majority of the trainable parameters in your 
24 | model is randomly initialized. This can happen for instance if you freeze the model weights of the pre-trained language 
25 | model, leaving only the randomly initialited prediction head as trainable parameters. This training approach is also
26 | referred to as "feature-based" or "probing" in some papers. 
27 |  
28 | Since the majority of parameters is randomly initialized, you need to fully train the model. This means: high learning 
29 | rate and many epochs. 
30 | 
31 | Use this method by calling  [`ModelTrainer.train()`](#flair.trainers.ModelTrainer.train) .
32 | 
33 | ```{note}
34 | Another application of classic training is for linear probing of pre-trained language models. In this scenario, you 
35 | "freeze" the weights of the language model (meaning that they cannot be changed) and add a prediction head that is 
36 | trained from scratch. So, even though a language model is involved, its parameters are not trainable. This means that 
37 | all trainable parameters in this scenario are randomly initialized, therefore necessitating the use of the classic
38 | training approach.
39 | ```
40 | 
41 | 
42 | ## Paper 
43 | 
44 | If you are interested in an experimental comparison of the two above-mentioned approach, check out [our paper](https://arxiv.org/pdf/2011.06993) 
45 | that compares fine-tuning to the feature-based approach.
46 | 
47 | 
48 | ## Next 
49 | 
50 | Next, learn how to load a [training dataset](how-to-load-prepared-dataset.md).


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | This folder contains actively maintained examples of use of Flair, organized along various NLP tasks.
 4 | 
 5 | ## Table of Tasks
 6 | 
 7 | | Task                           | Documentation
 8 | | ------------------------------ | -------------
 9 | | Named Entity Recognition (NER) | [Here](ner/)
10 | | Multi GPU                      | [Here](multi_gpu/)
11 | 


--------------------------------------------------------------------------------
/examples/multi_gpu/README.md:
--------------------------------------------------------------------------------
 1 | # Multi GPU
 2 | 
 3 | Training can be distributed across multiple GPUs on a local machine when using 
 4 | [`ModelTrainer`](#flair.trainers.trainer.ModelTrainer).
 5 | 
 6 | ## Example
 7 | 
 8 | See the script `run_multi_gpu.py` and its comments.
 9 | 
10 | ## Tutorial
11 | 
12 | There are 2 changes that are always required, as well as a few things to consider
13 | 
14 | Always Required:
15 | 1) Pass the argument `multi_gpu=True` to your [`.train()`](#flair.trainers.trainer.ModelTrainer.train) or `.fine_tune()`
16 | 2) Wrap your code in [`launch_distributed`](#flair.distributed_utils.launch_distributed), e.g.
17 |    `launch_distributed(main, *args)`. This spawns multiple processes, each driving a GPU
18 | 
19 | Other considerations:
20 | - The corpus and other preprocessing must be the same on all processes. For example, if corpus initialization involves
21 |   anything random, you should either 
22 |   - Set the random seed before initializing the corpus (e.g. [`flair.set_seed(42)`) OR 
23 |   - Initialize the corpus before calling `launch_distributed` and pass the corpus as an argument so it's serialized to
24 |     all processes
25 | - The effective batch size will be larger by a factor of num_gpus
26 |   - Each GPU will now process `mini_batch_size` examples before the optimizer steps, resulting in fewer total steps
27 |     taken relative to training with a single device. To obtain comparable results between single/multi gpu,
28 |     both mathematically, and in terms of wall time, consider the method in the example script.
29 | - Large batch sizes may be necessary to see faster runs, otherwise the communication overhead may dominate
30 | 
31 | Only the parameter updates in the training process will be distributed across multiple GPUs. Evaluation and prediction
32 | are still done on a single device.
33 | 


--------------------------------------------------------------------------------
/examples/multi_gpu/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/examples/multi_gpu/__init__.py


--------------------------------------------------------------------------------
/examples/multi_gpu/run_multi_gpu.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import flair
 4 | from flair.datasets import IMDB
 5 | from flair.distributed_utils import launch_distributed
 6 | from flair.embeddings import TransformerDocumentEmbeddings
 7 | from flair.models import TextClassifier
 8 | from flair.trainers import ModelTrainer
 9 | 
10 | 
11 | def main(multi_gpu):
12 |     # Note: Multi-GPU can affect corpus loading
13 |     # This code will run multiple times -- each GPU gets its own process and each process runs this code. We need to
14 |     # ensure that the corpus has the same elements and order on all processes, despite sampling. We do that by using
15 |     # the same seed on all processes.
16 |     flair.set_seed(42)
17 | 
18 |     corpus = IMDB()
19 |     corpus.downsample(0.1)
20 |     label_type = "sentiment"
21 |     label_dictionary = corpus.make_label_dictionary(label_type)
22 | 
23 |     embeddings = TransformerDocumentEmbeddings(model="distilbert-base-uncased")
24 |     model = TextClassifier(embeddings, label_type, label_dictionary=label_dictionary)
25 | 
26 |     # Note: Multi-GPU can affect choice of batch size.
27 |     # In order to compare batch updates fairly between single and multi-GPU training, we should:
28 |     #   1) Step the optimizer after the same number of examples to achieve com
29 |     #   2) Process the same number of examples in each forward pass
30 |     mini_batch_chunk_size = 32  # Make this as large as possible without running out of GPU-memory to pack device
31 |     num_devices_when_distributing = max(torch.cuda.device_count(), 1)
32 |     mini_batch_size = mini_batch_chunk_size if multi_gpu else mini_batch_chunk_size * num_devices_when_distributing
33 |     # e.g. Suppose your machine has 2 GPUs. If multi_gpu=False, the first gpu will process 32 examples, then the
34 |     # first gpu will process another 32 examples, then the optimizer will step. If multi_gpu=True, each gpu will
35 |     # process 32 examples at the same time, then the optimizer will step.
36 | 
37 |     trainer = ModelTrainer(model, corpus)
38 |     trainer.fine_tune(
39 |         "resources/taggers/multi-gpu",
40 |         multi_gpu=multi_gpu,  # Required for multi-gpu
41 |         max_epochs=2,
42 |         mini_batch_chunk_size=mini_batch_chunk_size,
43 |         mini_batch_size=mini_batch_size,
44 |     )
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     """Minimal example demonstrating how to train a model on multiple GPUs."""
49 |     multi_gpu = True
50 | 
51 |     if multi_gpu:
52 |         launch_distributed(main, multi_gpu)  # Required for multi-gpu
53 |     else:
54 |         main(multi_gpu)
55 | 


--------------------------------------------------------------------------------
/examples/ner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/examples/ner/__init__.py


--------------------------------------------------------------------------------
/flair/class_utils.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import inspect
 3 | from collections.abc import Iterable
 4 | from types import ModuleType
 5 | from typing import Any, Optional, Protocol, TypeVar, Union, overload
 6 | 
 7 | T = TypeVar("T")
 8 | 
 9 | 
10 | class StringLike(Protocol):
11 |     def __str__(self) -> str: ...
12 | 
13 | 
14 | def get_non_abstract_subclasses(cls: type[T]) -> Iterable[type[T]]:
15 |     for subclass in cls.__subclasses__():
16 |         yield from get_non_abstract_subclasses(subclass)
17 |         if inspect.isabstract(subclass):
18 |             continue
19 |         yield subclass
20 | 
21 | 
22 | def get_state_subclass_by_name(cls: type[T], cls_name: Optional[str]) -> type[T]:
23 |     for sub_cls in get_non_abstract_subclasses(cls):
24 |         if sub_cls.__name__ == cls_name:
25 |             return sub_cls
26 |     raise ValueError(f"Could not find any class with name '{cls_name}'")
27 | 
28 | 
29 | @overload
30 | def lazy_import(group: str, module: str, first_symbol: None) -> ModuleType: ...
31 | 
32 | 
33 | @overload
34 | def lazy_import(group: str, module: str, first_symbol: str, *symbols: str) -> list[Any]: ...
35 | 
36 | 
37 | def lazy_import(
38 |     group: str, module: str, first_symbol: Optional[str] = None, *symbols: str
39 | ) -> Union[list[Any], ModuleType]:
40 |     try:
41 |         imported_module = importlib.import_module(module)
42 |     except ImportError:
43 |         raise ImportError(
44 |             f"Could not import {module}. Please install the optional '{group}' dependency. Via 'pip install flair[{group}]'"
45 |         )
46 |     if first_symbol is None:
47 |         return imported_module
48 |     symbols = (first_symbol, *symbols)
49 | 
50 |     return [getattr(imported_module, symbol) for symbol in symbols]
51 | 


--------------------------------------------------------------------------------
/flair/embeddings/__init__.py:
--------------------------------------------------------------------------------
 1 | # Expose base classses
 2 | from flair.embeddings.transformer import (
 3 |     TransformerEmbeddings,
 4 |     TransformerJitDocumentEmbeddings,
 5 |     TransformerJitWordEmbeddings,
 6 |     TransformerOnnxDocumentEmbeddings,
 7 |     TransformerOnnxWordEmbeddings,
 8 | )
 9 | 
10 | from .base import Embeddings, ScalarMix
11 | 
12 | # Expose document embedding classes
13 | from .document import (
14 |     DocumentCNNEmbeddings,
15 |     DocumentEmbeddings,
16 |     DocumentLMEmbeddings,
17 |     DocumentPoolEmbeddings,
18 |     DocumentRNNEmbeddings,
19 |     DocumentTFIDFEmbeddings,
20 |     SentenceTransformerDocumentEmbeddings,
21 |     TransformerDocumentEmbeddings,
22 | )
23 | 
24 | # Expose image embedding classes
25 | from .image import (
26 |     ConvTransformNetworkImageEmbeddings,
27 |     IdentityImageEmbeddings,
28 |     ImageEmbeddings,
29 |     NetworkImageEmbeddings,
30 |     PrecomputedImageEmbeddings,
31 | )
32 | 
33 | # Expose legacy embedding classes
34 | from .legacy import (
35 |     CharLMEmbeddings,
36 |     DocumentLSTMEmbeddings,
37 |     DocumentMeanEmbeddings,
38 |     ELMoEmbeddings,
39 | )
40 | 
41 | # Expose token embedding classes
42 | from .token import (
43 |     BytePairEmbeddings,
44 |     CharacterEmbeddings,
45 |     FastTextEmbeddings,
46 |     FlairEmbeddings,
47 |     HashEmbeddings,
48 |     MuseCrosslingualEmbeddings,
49 |     NILCEmbeddings,
50 |     OneHotEmbeddings,
51 |     PooledFlairEmbeddings,
52 |     StackedEmbeddings,
53 |     TokenEmbeddings,
54 |     TransformerWordEmbeddings,
55 |     WordEmbeddings,
56 | )
57 | 
58 | __all__ = [
59 |     "BPEmbSerializable",
60 |     "BytePairEmbeddings",
61 |     "CharLMEmbeddings",
62 |     "CharacterEmbeddings",
63 |     "ConvTransformNetworkImageEmbeddings",
64 |     "DocumentCNNEmbeddings",
65 |     "DocumentEmbeddings",
66 |     "DocumentLMEmbeddings",
67 |     "DocumentLSTMEmbeddings",
68 |     "DocumentMeanEmbeddings",
69 |     "DocumentPoolEmbeddings",
70 |     "DocumentRNNEmbeddings",
71 |     "DocumentTFIDFEmbeddings",
72 |     "ELMoEmbeddings",
73 |     "Embeddings",
74 |     "FastTextEmbeddings",
75 |     "FlairEmbeddings",
76 |     "HashEmbeddings",
77 |     "IdentityImageEmbeddings",
78 |     "ImageEmbeddings",
79 |     "MuseCrosslingualEmbeddings",
80 |     "NILCEmbeddings",
81 |     "NetworkImageEmbeddings",
82 |     "OneHotEmbeddings",
83 |     "PooledFlairEmbeddings",
84 |     "PrecomputedImageEmbeddings",
85 |     "ScalarMix",
86 |     "SentenceTransformerDocumentEmbeddings",
87 |     "StackedEmbeddings",
88 |     "TokenEmbeddings",
89 |     "TransformerDocumentEmbeddings",
90 |     "TransformerEmbeddings",
91 |     "TransformerJitDocumentEmbeddings",
92 |     "TransformerJitWordEmbeddings",
93 |     "TransformerOnnxDocumentEmbeddings",
94 |     "TransformerOnnxWordEmbeddings",
95 |     "TransformerWordEmbeddings",
96 |     "WordEmbeddings",
97 | ]
98 | 


--------------------------------------------------------------------------------
/flair/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .entity_linker_model import SpanClassifier
 2 | from .entity_mention_linking import EntityMentionLinker
 3 | from .language_model import LanguageModel
 4 | from .lemmatizer_model import Lemmatizer
 5 | from .multitask_model import MultitaskModel
 6 | from .pairwise_classification_model import TextPairClassifier
 7 | from .pairwise_regression_model import TextPairRegressor
 8 | from .prefixed_tagger import PrefixedSequenceTagger  # This import has to be after SequenceTagger!
 9 | from .regexp_tagger import RegexpTagger
10 | from .relation_classifier_model import RelationClassifier
11 | from .relation_extractor_model import RelationExtractor
12 | from .sequence_tagger_model import SequenceTagger
13 | from .tars_model import FewshotClassifier, TARSClassifier, TARSTagger
14 | from .text_classification_model import TextClassifier
15 | from .text_regression_model import TextRegressor
16 | from .triple_classification_model import TextTripleClassifier
17 | from .word_tagger_model import TokenClassifier, WordTagger
18 | 
19 | __all__ = [
20 |     "EntityMentionLinker",
21 |     "FewshotClassifier",
22 |     "LanguageModel",
23 |     "Lemmatizer",
24 |     "MultitaskModel",
25 |     "PrefixedSequenceTagger",
26 |     "RegexpTagger",
27 |     "RelationClassifier",
28 |     "RelationExtractor",
29 |     "SequenceTagger",
30 |     "SpanClassifier",
31 |     "TARSClassifier",
32 |     "TARSTagger",
33 |     "TextClassifier",
34 |     "TextPairClassifier",
35 |     "TextPairRegressor",
36 |     "TextRegressor",
37 |     "TextTripleClassifier",
38 |     "TokenClassifier",
39 |     "WordTagger",
40 | ]
41 | 


--------------------------------------------------------------------------------
/flair/models/sequence_tagger_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/models/sequence_tagger_utils/__init__.py


--------------------------------------------------------------------------------
/flair/models/sequence_tagger_utils/crf.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import flair
 4 | 
 5 | START_TAG: str = "<START>"
 6 | STOP_TAG: str = "<STOP>"
 7 | 
 8 | 
 9 | class CRF(torch.nn.Module):
10 |     """Conditional Random Field.
11 | 
12 |     Conditional Random Field Implementation according to sgrvinod (https://github.com/sgrvinod).
13 |     Classifier which predicts single tag / class / label for given word based on not just the word,
14 |     but also on previous seen annotations.
15 |     """
16 | 
17 |     def __init__(self, tag_dictionary, tagset_size: int, init_from_state_dict: bool) -> None:
18 |         """Initialize the Conditional Random Field.
19 | 
20 |         Args:
21 |             tag_dictionary: tag dictionary in order to find ID for start and stop tags
22 |             tagset_size: number of tag from tag dictionary
23 |             init_from_state_dict: whether we load pretrained model from state dict
24 |         """
25 |         super().__init__()
26 | 
27 |         self.tagset_size = tagset_size
28 |         # Transitions are used in the following way: transitions[to, from].
29 |         self.transitions = torch.nn.Parameter(torch.randn(tagset_size, tagset_size))
30 |         # If we are not using a pretrained model and train a fresh one, we need to set transitions from any tag
31 |         # to START-tag and from STOP-tag to any other tag to -10000.
32 |         if not init_from_state_dict:
33 |             self.transitions.detach()[tag_dictionary.get_idx_for_item(START_TAG), :] = -10000
34 | 
35 |             self.transitions.detach()[:, tag_dictionary.get_idx_for_item(STOP_TAG)] = -10000
36 |         self.to(flair.device)
37 | 
38 |     def forward(self, features: torch.Tensor) -> torch.Tensor:
39 |         """Forward propagation of Conditional Random Field.
40 | 
41 |         Args:
42 |             features: output from RNN / Linear layer in shape (batch size, seq len, hidden size)
43 | 
44 |         Returns: CRF scores (emission scores for each token + transitions prob from previous state) in shape (batch_size, seq len, tagset size, tagset size)
45 |         """
46 |         batch_size, seq_len = features.size()[:2]
47 | 
48 |         emission_scores = features
49 |         emission_scores = emission_scores.unsqueeze(-1).expand(batch_size, seq_len, self.tagset_size, self.tagset_size)
50 | 
51 |         crf_scores = emission_scores + self.transitions.unsqueeze(0).unsqueeze(0)
52 |         return crf_scores
53 | 


--------------------------------------------------------------------------------
/flair/nn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .decoder import DeepNCMDecoder, LabelVerbalizerDecoder, PrototypicalDecoder
 2 | from .dropout import LockedDropout, WordDropout
 3 | from .model import Classifier, DefaultClassifier, Model
 4 | 
 5 | __all__ = [
 6 |     "Classifier",
 7 |     "DeepNCMDecoder",
 8 |     "DefaultClassifier",
 9 |     "LabelVerbalizerDecoder",
10 |     "LockedDropout",
11 |     "Model",
12 |     "PrototypicalDecoder",
13 |     "WordDropout",
14 | ]
15 | 


--------------------------------------------------------------------------------
/flair/nn/distance/__init__.py:
--------------------------------------------------------------------------------
 1 | from .cosine import CosineDistance, LogitCosineDistance, NegativeScaledDotProduct
 2 | from .euclidean import EuclideanDistance, EuclideanMean
 3 | from .hyperbolic import HyperbolicDistance, HyperbolicMean
 4 | 
 5 | __all__ = [
 6 |     "CosineDistance",
 7 |     "EuclideanDistance",
 8 |     "EuclideanMean",
 9 |     "HyperbolicDistance",
10 |     "HyperbolicMean",
11 |     "LogitCosineDistance",
12 |     "NegativeScaledDotProduct",
13 | ]
14 | 


--------------------------------------------------------------------------------
/flair/nn/distance/cosine.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | # Source: https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/util.py#L23
 4 | 
 5 | 
 6 | def dot_product(a: torch.Tensor, b: torch.Tensor, normalize=False):
 7 |     """Computes dot product for pairs of vectors.
 8 | 
 9 |     Args:
10 |         a: the left tensor
11 |         b: the right tensor
12 |         normalize: Vectors are normalized (leads to cosine similarity)
13 | 
14 |     Returns: Matrix with res[i][j] = dot_product(a[i], b[j])
15 |     """
16 |     if len(a.shape) == 1:
17 |         a = a.unsqueeze(0)
18 | 
19 |     if len(b.shape) == 1:
20 |         b = b.unsqueeze(0)
21 | 
22 |     if normalize:
23 |         a = torch.nn.functional.normalize(a, p=2, dim=1)
24 |         b = torch.nn.functional.normalize(b, p=2, dim=1)
25 | 
26 |     return torch.mm(a, b.transpose(0, 1))
27 | 
28 | 
29 | class CosineDistance(torch.nn.Module):
30 |     def forward(self, a, b):
31 |         return -dot_product(a, b, normalize=True)
32 | 
33 | 
34 | class LogitCosineDistance(torch.nn.Module):
35 |     def forward(self, a, b):
36 |         return torch.logit(0.5 - 0.5 * dot_product(a, b, normalize=True))
37 | 
38 | 
39 | class NegativeScaledDotProduct(torch.nn.Module):
40 |     def forward(self, a, b):
41 |         sqrt_d = torch.sqrt(torch.tensor(a.size(-1)))
42 |         return -dot_product(a, b, normalize=False) / sqrt_d
43 | 


--------------------------------------------------------------------------------
/flair/nn/distance/euclidean.py:
--------------------------------------------------------------------------------
 1 | """Euclidean distances implemented in pytorch.
 2 | 
 3 | This module was copied from the repository the following repository:
 4 | https://github.com/asappresearch/dynamic-classification
 5 | 
 6 | It contains the code from the paper "Metric Learning for Dynamic Text
 7 | Classification".
 8 | 
 9 | https://arxiv.org/abs/1911.01026
10 | 
11 | In case this file is modified, please consider contributing to the original
12 | repository.
13 | 
14 | It was published under MIT License:
15 | https://github.com/asappresearch/dynamic-classification/blob/master/LICENSE.md
16 | 
17 | Source: https://github.com/asappresearch/dynamic-classification/blob/55beb5a48406c187674bea40487c011e8fa45aab/distance/euclidean.py
18 | """
19 | 
20 | import torch
21 | from torch import Tensor, nn
22 | 
23 | 
24 | class EuclideanDistance(nn.Module):
25 |     """Implement a EuclideanDistance object."""
26 | 
27 |     def forward(self, mat_1: Tensor, mat_2: Tensor) -> Tensor:
28 |         """Returns the squared euclidean distance between each element in mat_1 and each element in mat_2.
29 | 
30 |         Parameters
31 |         ----------
32 |         mat_1: torch.Tensor
33 |             matrix of shape (n_1, n_features)
34 |         mat_2: torch.Tensor
35 |             matrix of shape (n_2, n_features)
36 | 
37 |         Returns:
38 |         -------
39 |         dist: torch.Tensor
40 |             distance matrix of shape (n_1, n_2)
41 | 
42 |         """
43 |         return torch.cdist(mat_1, mat_2).pow(2)
44 | 
45 | 
46 | class EuclideanMean(nn.Module):
47 |     """Implement a EuclideanMean object."""
48 | 
49 |     def forward(self, data: Tensor) -> Tensor:
50 |         """Performs a forward pass through the network.
51 | 
52 |         Parameters
53 |         ----------
54 |         data : torch.Tensor
55 |             The input data, as a float tensor
56 | 
57 |         Returns:
58 |         -------
59 |         torch.Tensor
60 |             The encoded output, as a float tensor
61 | 
62 |         """
63 |         return data.mean(0)
64 | 


--------------------------------------------------------------------------------
/flair/nn/dropout.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class LockedDropout(torch.nn.Module):
 5 |     """Implementation of locked (or variational) dropout.
 6 | 
 7 |     Randomly drops out entire parameters in embedding space.
 8 |     """
 9 | 
10 |     def __init__(self, dropout_rate=0.5, batch_first=True, inplace=False) -> None:
11 |         super().__init__()
12 |         self.dropout_rate = dropout_rate
13 |         self.batch_first = batch_first
14 |         self.inplace = inplace
15 | 
16 |     def forward(self, x):
17 |         if not self.training or not self.dropout_rate:
18 |             return x
19 | 
20 |         if not self.batch_first:
21 |             m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - self.dropout_rate)
22 |         else:
23 |             m = x.data.new(x.size(0), 1, x.size(2)).bernoulli_(1 - self.dropout_rate)
24 | 
25 |         mask = torch.autograd.Variable(m, requires_grad=False) / (1 - self.dropout_rate)
26 |         mask = mask.expand_as(x)
27 |         return mask * x
28 | 
29 |     def extra_repr(self):
30 |         inplace_str = ", inplace" if self.inplace else ""
31 |         return f"p={self.dropout_rate}{inplace_str}"
32 | 
33 | 
34 | class WordDropout(torch.nn.Module):
35 |     """Implementation of word dropout.
36 | 
37 |     Randomly drops out entire words (or characters) in embedding space.
38 |     """
39 | 
40 |     def __init__(self, dropout_rate=0.05, inplace=False) -> None:
41 |         super().__init__()
42 |         self.dropout_rate = dropout_rate
43 |         self.inplace = inplace
44 | 
45 |     def forward(self, x):
46 |         if not self.training or not self.dropout_rate:
47 |             return x
48 | 
49 |         m = x.data.new(x.size(0), x.size(1), 1).bernoulli_(1 - self.dropout_rate)
50 | 
51 |         mask = torch.autograd.Variable(m, requires_grad=False)
52 |         return mask * x
53 | 
54 |     def extra_repr(self):
55 |         inplace_str = ", inplace" if self.inplace else ""
56 |         return f"p={self.dropout_rate}{inplace_str}"
57 | 


--------------------------------------------------------------------------------
/flair/nn/multitask.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Iterable
 2 | from typing import Union
 3 | 
 4 | from flair.data import Corpus, MultiCorpus
 5 | from flair.models import MultitaskModel
 6 | from flair.nn import Classifier, Model
 7 | 
 8 | 
 9 | def make_multitask_model_and_corpus(
10 |     mapping: Iterable[Union[tuple[Classifier, Corpus], tuple[Classifier, Corpus, float]]]
11 | ) -> tuple[Model, Corpus]:
12 |     models = []
13 |     corpora = []
14 |     loss_factors = []
15 |     ids = []
16 | 
17 |     for task_id, _map in enumerate(mapping):
18 |         models.append(_map[0])
19 |         corpora.append(_map[1])
20 |         if len(_map) == 3:
21 |             loss_factors.append(_map[2])
22 |         else:
23 |             loss_factors.append(1.0)
24 | 
25 |         ids.append(f"Task_{task_id}")
26 | 
27 |     return MultitaskModel(models=models, task_ids=ids, loss_factors=loss_factors), MultiCorpus(corpora, ids)
28 | 


--------------------------------------------------------------------------------
/flair/nn/recurrent.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | rnn_layers = {"lstm": (nn.LSTM, 2), "gru": (nn.GRU, 1)}
 4 | 
 5 | 
 6 | def create_recurrent_layer(layer_type, initial_size, hidden_size, nlayers, dropout=0, **kwargs):
 7 |     layer_type = layer_type.lower()
 8 |     assert layer_type in rnn_layers
 9 |     module, hidden_count = rnn_layers[layer_type]
10 | 
11 |     if nlayers == 1:
12 |         dropout = 0
13 | 
14 |     return module(initial_size, hidden_size, nlayers, dropout=dropout, **kwargs), hidden_count
15 | 


--------------------------------------------------------------------------------
/flair/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/py.typed


--------------------------------------------------------------------------------
/flair/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | from .language_model_trainer import LanguageModelTrainer, TextCorpus
2 | from .trainer import ModelTrainer
3 | 
4 | __all__ = ["LanguageModelTrainer", "ModelTrainer", "TextCorpus"]
5 | 


--------------------------------------------------------------------------------
/flair/trainers/plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BasePlugin, Pluggable, TrainerPlugin, TrainingInterrupt
 2 | from .functional.anneal_on_plateau import AnnealingPlugin
 3 | from .functional.checkpoints import CheckpointPlugin
 4 | from .functional.deepncm_trainer_plugin import DeepNCMPlugin
 5 | from .functional.linear_scheduler import LinearSchedulerPlugin
 6 | from .functional.reduce_transformer_vocab import ReduceTransformerVocabPlugin
 7 | from .functional.weight_extractor import WeightExtractorPlugin
 8 | from .loggers.clearml_logger import ClearmlLoggerPlugin
 9 | from .loggers.log_file import LogFilePlugin
10 | from .loggers.loss_file import LossFilePlugin
11 | from .loggers.metric_history import MetricHistoryPlugin
12 | from .loggers.tensorboard import TensorboardLogger
13 | from .metric_records import MetricName, MetricRecord
14 | 
15 | __all__ = [
16 |     "AnnealingPlugin",
17 |     "BasePlugin",
18 |     "CheckpointPlugin",
19 |     "ClearmlLoggerPlugin",
20 |     "DeepNCMPlugin",
21 |     "LinearSchedulerPlugin",
22 |     "LogFilePlugin",
23 |     "LossFilePlugin",
24 |     "MetricHistoryPlugin",
25 |     "MetricName",
26 |     "MetricRecord",
27 |     "Pluggable",
28 |     "ReduceTransformerVocabPlugin",
29 |     "TensorboardLogger",
30 |     "TrainerPlugin",
31 |     "TrainingInterrupt",
32 |     "WeightExtractorPlugin",
33 | ]
34 | 


--------------------------------------------------------------------------------
/flair/trainers/plugins/functional/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/trainers/plugins/functional/__init__.py


--------------------------------------------------------------------------------
/flair/trainers/plugins/functional/checkpoints.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any
 3 | 
 4 | from flair.trainers.plugins.base import TrainerPlugin
 5 | 
 6 | log = logging.getLogger("flair")
 7 | 
 8 | 
 9 | class CheckpointPlugin(TrainerPlugin):
10 |     def __init__(
11 |         self,
12 |         save_model_each_k_epochs,
13 |         save_optimizer_state,
14 |         base_path,
15 |     ) -> None:
16 |         super().__init__()
17 |         self.save_optimizer_state = save_optimizer_state
18 |         self.save_model_each_k_epochs = save_model_each_k_epochs
19 |         self.base_path = base_path
20 | 
21 |     @TrainerPlugin.hook
22 |     def after_training_epoch(self, epoch, **kw):
23 |         """Saves the model each k epochs."""
24 |         if self.save_model_each_k_epochs > 0 and epoch % self.save_model_each_k_epochs == 0:
25 |             log.info(
26 |                 f"Saving model at current epoch since 'save_model_each_k_epochs={self.save_model_each_k_epochs}' "
27 |                 f"was set"
28 |             )
29 |             model_name = "model_epoch_" + str(epoch) + ".pt"
30 | 
31 |             # Use trainer's _save_model method - we have access to trainer through self.trainer
32 |             self.trainer._save_model(self.base_path / model_name, save_optimizer_state=self.save_optimizer_state)
33 | 
34 |     @property
35 |     def attach_to_all_processes(self) -> bool:
36 |         return False
37 | 
38 |     def get_state(self) -> dict[str, Any]:
39 |         return {
40 |             **super().get_state(),
41 |             "base_path": str(self.base_path),
42 |             "save_model_each_k_epochs": self.save_model_each_k_epochs,
43 |             "save_optimizer_state": self.save_optimizer_state,
44 |         }
45 | 


--------------------------------------------------------------------------------
/flair/trainers/plugins/functional/deepncm_trainer_plugin.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Iterable
 2 | 
 3 | import torch
 4 | 
 5 | from flair.models import MultitaskModel
 6 | from flair.nn import DeepNCMDecoder
 7 | from flair.trainers.plugins.base import TrainerPlugin
 8 | 
 9 | 
10 | class DeepNCMPlugin(TrainerPlugin):
11 |     """Plugin for training DeepNCMClassifier.
12 | 
13 |     Handles both multitask and single-task scenarios.
14 |     """
15 | 
16 |     @property
17 |     def decoders(self) -> Iterable[DeepNCMDecoder]:
18 |         """Iterator over all DeepNCMDecoder decoders in the trainer."""
19 |         model = self.trainer.model
20 | 
21 |         models = model.tasks.values() if isinstance(model, MultitaskModel) else [model]
22 | 
23 |         for sub_model in models:
24 |             if hasattr(sub_model, "decoder") and isinstance(sub_model.decoder, DeepNCMDecoder):
25 |                 yield sub_model.decoder
26 | 
27 |     @TrainerPlugin.hook
28 |     def after_training_epoch(self, **kwargs):
29 |         """Reset class counts after each training epoch."""
30 |         for decoder in self.decoders:
31 |             if decoder.mean_update_method == "condensation":
32 |                 decoder.class_counts.data = torch.ones_like(decoder.class_counts)
33 | 
34 |     @TrainerPlugin.hook
35 |     def after_training_batch(self, **kwargs):
36 |         """Update prototypes after each training batch."""
37 |         for decoder in self.decoders:
38 |             decoder.update_prototypes()
39 | 
40 |     def __str__(self) -> str:
41 |         return "DeepNCMPlugin"
42 | 


--------------------------------------------------------------------------------
/flair/trainers/plugins/functional/weight_extractor.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from flair.trainers.plugins.base import TrainerPlugin
 4 | from flair.training_utils import WeightExtractor
 5 | 
 6 | 
 7 | class WeightExtractorPlugin(TrainerPlugin):
 8 |     """Simple Plugin for weight extraction."""
 9 | 
10 |     def __init__(self, base_path) -> None:
11 |         super().__init__()
12 |         self.base_path = base_path
13 |         self.weight_extractor = WeightExtractor(base_path)
14 | 
15 |     @TrainerPlugin.hook
16 |     def after_training_batch(self, batch_no, epoch, total_number_of_batches, **kw):
17 |         """Extracts weights."""
18 |         modulo = max(1, int(total_number_of_batches / 10))
19 |         iteration = epoch * total_number_of_batches + batch_no
20 | 
21 |         if (iteration + 1) % modulo == 0:
22 |             self.weight_extractor.extract_weights(self.model.state_dict(), iteration)
23 | 
24 |     @property
25 |     def attach_to_all_processes(self) -> bool:
26 |         return False
27 | 
28 |     def get_state(self) -> dict[str, Any]:
29 |         return {
30 |             **super().get_state(),
31 |             "base_path": str(self.base_path),
32 |         }
33 | 


--------------------------------------------------------------------------------
/flair/trainers/plugins/loggers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/trainers/plugins/loggers/__init__.py


--------------------------------------------------------------------------------
/flair/trainers/plugins/loggers/clearml_logger.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from flair.trainers.plugins.base import TrainerPlugin
 4 | from flair.trainers.plugins.metric_records import MetricRecord
 5 | 
 6 | 
 7 | class ClearmlLoggerPlugin(TrainerPlugin):
 8 |     def __init__(self, task_id_or_task: Any):
 9 |         if isinstance(task_id_or_task, str):
10 |             self.task_id = task_id_or_task
11 |             self.task = None
12 |         else:
13 |             self.task = task_id_or_task
14 |             self.task_id = self.task.task_id
15 |         super().__init__()
16 | 
17 |     @property
18 |     def logger(self):
19 |         try:
20 |             import clearml
21 |         except ImportError:
22 |             raise ImportError(
23 |                 "Please install clearml 1.11.0 or higher before using the clearml plugin"
24 |                 "otherwise you can remove the clearml plugin from the training or model card."
25 |             )
26 |         if self.task is None:
27 |             self.task = clearml.Task.get_task(task_id=self.task_id)
28 |         return self.task.get_logger()
29 | 
30 |     @TrainerPlugin.hook
31 |     def metric_recorded(self, record: MetricRecord) -> None:
32 |         record_name = ".".join(record.name)
33 | 
34 |         if record.is_scalar:
35 |             self.logger.report_scalar(record_name, record_name, record.value, record.global_step)
36 |         elif record.is_scalar_list:
37 |             for i, v in enumerate(record.value):
38 |                 self.logger.report_scalar(record_name, f"{record_name}_{i}", v, record.global_step)
39 |         elif record.is_string:
40 |             self.logger.report_text(record.value, print_console=False)
41 |         elif record.is_histogram:
42 |             self.logger.report_histogram(record_name, record_name, record.value, record.global_step)
43 | 
44 |     @property
45 |     def attach_to_all_processes(self) -> bool:
46 |         return False
47 | 


--------------------------------------------------------------------------------
/flair/trainers/plugins/loggers/log_file.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from typing import Any
 4 | 
 5 | from flair.trainers.plugins.base import TrainerPlugin
 6 | from flair.training_utils import add_file_handler
 7 | 
 8 | log = logging.getLogger("flair")
 9 | 
10 | 
11 | class LogFilePlugin(TrainerPlugin):
12 |     """Plugin for the training.log file."""
13 | 
14 |     def __init__(self, base_path) -> None:
15 |         super().__init__()
16 |         self.base_path = base_path
17 |         self.log_handler = add_file_handler(log, Path(base_path) / "training.log")
18 | 
19 |     @TrainerPlugin.hook("_training_exception", "after_training")
20 |     def close_file_handler(self, **kw):
21 |         self.log_handler.close()
22 |         log.removeHandler(self.log_handler)
23 | 
24 |     @property
25 |     def attach_to_all_processes(self) -> bool:
26 |         return False
27 | 
28 |     def get_state(self) -> dict[str, Any]:
29 |         return {**super().get_state(), "base_path": str(self.base_path)}
30 | 


--------------------------------------------------------------------------------
/flair/trainers/plugins/loggers/metric_history.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from collections.abc import Mapping
 3 | from typing import Any
 4 | 
 5 | from flair.trainers.plugins.base import TrainerPlugin
 6 | 
 7 | log = logging.getLogger("flair")
 8 | 
 9 | 
10 | default_metrics_to_collect = {
11 |     ("train", "loss"): "train_loss_history",
12 |     ("dev", "score"): "dev_score_history",
13 |     ("dev", "loss"): "dev_loss_history",
14 | }
15 | 
16 | 
17 | class MetricHistoryPlugin(TrainerPlugin):
18 |     def __init__(self, metrics_to_collect: Mapping = default_metrics_to_collect) -> None:
19 |         super().__init__()
20 | 
21 |         self.metric_history: dict[str, list] = {}
22 |         self.metrics_to_collect: Mapping = metrics_to_collect
23 |         for target in self.metrics_to_collect.values():
24 |             self.metric_history[target] = []
25 | 
26 |     @TrainerPlugin.hook
27 |     def metric_recorded(self, record):
28 |         if tuple(record.name) in self.metrics_to_collect:
29 |             target = self.metrics_to_collect[tuple(record.name)]
30 |             self.metric_history[target].append(record.value)
31 | 
32 |     @TrainerPlugin.hook
33 |     def after_training(self, **kw):
34 |         """Returns metric history."""
35 |         self.trainer.return_values.update(self.metric_history)
36 | 
37 |     @property
38 |     def attach_to_all_processes(self) -> bool:
39 |         return False
40 | 
41 |     def get_state(self) -> dict[str, Any]:
42 |         return {
43 |             **super().get_state(),
44 |             "metrics_to_collect": dict(self.metrics_to_collect),
45 |         }
46 | 


--------------------------------------------------------------------------------
/flair/visual/__init__.py:
--------------------------------------------------------------------------------
1 | from .activations import Highlighter
2 | from .manifold import Visualizer
3 | 
4 | __all__ = ["Highlighter", "Visualizer"]
5 | 


--------------------------------------------------------------------------------
/flair/visual/activations.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | 
 4 | class Highlighter:
 5 |     def __init__(self) -> None:
 6 |         self.color_map = [
 7 |             "#ff0000",
 8 |             "#ff4000",
 9 |             "#ff8000",
10 |             "#ffbf00",
11 |             "#ffff00",
12 |             "#bfff00",
13 |             "#80ff00",
14 |             "#40ff00",
15 |             "#00ff00",
16 |             "#00ff40",
17 |             "#00ff80",
18 |             "#00ffbf",
19 |             "#00ffff",
20 |             "#00bfff",
21 |             "#0080ff",
22 |             "#0040ff",
23 |             "#0000ff",
24 |             "#4000ff",
25 |             "#8000ff",
26 |             "#bf00ff",
27 |             "#ff00ff",
28 |             "#ff00bf",
29 |             "#ff0080",
30 |             "#ff0040",
31 |             "#ff0000",
32 |         ]
33 | 
34 |     def highlight(self, activation, text):
35 |         activation = activation.detach().cpu().numpy()
36 | 
37 |         step_size = (max(activation) - min(activation)) / len(self.color_map)
38 | 
39 |         lookup = numpy.array(list(numpy.arange(min(activation), max(activation), step_size)))
40 | 
41 |         colors = []
42 | 
43 |         for _i, act in enumerate(activation):
44 |             try:
45 |                 colors.append(self.color_map[numpy.where(act > lookup)[0][-1]])
46 |             except IndexError:
47 |                 colors.append(len(self.color_map) - 1)
48 | 
49 |         str_ = "<br><br>"
50 | 
51 |         for i, (char, color) in enumerate(zip(list(text), colors)):
52 |             str_ += self._render(char, color)
53 | 
54 |             if i % 100 == 0 and i > 0:
55 |                 str_ += "<br>"
56 | 
57 |         return str_
58 | 
59 |     def highlight_selection(self, activations, text, file_="resources/data/highlight.html", n=10):
60 |         ix = numpy.random.default_rng().choice(activations.shape[1], size=n)
61 | 
62 |         rendered = ""
63 | 
64 |         for i in ix:
65 |             rendered += self.highlight(activations[:, i], text)
66 | 
67 |         with open(file_, "w") as f:
68 |             f.write(rendered)
69 | 
70 |     @staticmethod
71 |     def _render(char, color):
72 |         return f'<span style="background-color: {color}">{char}</span>'
73 | 


--------------------------------------------------------------------------------
/flair/visual/tree_printer.py:
--------------------------------------------------------------------------------
 1 | from pptree import print_tree
 2 | 
 3 | from flair.data import Sentence, Token
 4 | 
 5 | 
 6 | class NodeToken:
 7 |     def __init__(self, token: Token, tag_type: str) -> None:
 8 |         self.token: Token = token
 9 |         self.tag_type: str = tag_type
10 |         self.children: list[NodeToken] = []
11 | 
12 |     def set_haed(self, parent):
13 |         parent.children.append(self)
14 | 
15 |     def __str__(self) -> str:
16 |         return f" {self.token.text}({self.token.get_labels(self.tag_type)[0].value}) "
17 | 
18 | 
19 | def tree_printer(sentence: Sentence, tag_type: str):
20 |     tree: list[NodeToken] = [NodeToken(token, tag_type) for token in sentence]
21 |     for x in tree:
22 |         if x.token.head_id != 0:
23 |             head_token = x.token.get_head()
24 | 
25 |             for y in tree:
26 |                 if y.token == head_token:
27 |                     x.set_haed(y)
28 |         else:
29 |             root_node = x
30 |     print_tree(root_node, "children")
31 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | black[jupyter]==24.2.*
 2 | konoha[janome]<6.0.0
 3 | mypy>=1.2.0
 4 | pytest>=7.3.1
 5 | pytest-black-ng==0.4.*
 6 | pytest-github-actions-annotate-failures>=0.1.8
 7 | pytest-mypy>=0.10.3
 8 | pytest-ruff==0.3.*
 9 | ruff==0.7.*
10 | types-dataclasses>=0.6.6
11 | types-Deprecated>=1.2.9.2
12 | types-requests>=2.28.11.17
13 | types-tabulate>=0.9.0.2
14 | pyab3p
15 | transformers!=4.40.1,!=4.40.0
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3>=1.20.27
 2 | conllu>=4.0,<5.0.0
 3 | deprecated>=1.2.13
 4 | ftfy>=6.1.0
 5 | gdown>=4.4.0
 6 | huggingface-hub>=0.10.0
 7 | langdetect>=1.0.9
 8 | lxml>=4.8.0
 9 | matplotlib>=2.2.3
10 | more-itertools>=8.13.0
11 | mpld3>=0.3
12 | pptree>=3.1
13 | python-dateutil>=2.8.2
14 | pytorch_revgrad>=0.2.0
15 | regex>=2022.1.18
16 | scikit-learn>=1.0.2
17 | segtok>=1.5.11
18 | sqlitedict>=2.0.0
19 | tabulate>=0.8.10
20 | torch>=1.13.1
21 | tqdm>=4.63.0
22 | transformer-smaller-training-vocab>=0.2.3
23 | transformers[sentencepiece]>=4.25.0,<5.0.0
24 | wikipedia-api>=0.5.7
25 | bioc<3.0.0,>=2.0.0
26 | 


--------------------------------------------------------------------------------
/resources/docs/HUNFLAIR_TUTORIAL_3_ENTITY_LINKING.md:
--------------------------------------------------------------------------------
 1 | # HunFlair Tutorial 3: Entity Linking
 2 | 
 3 | After adding named entity recognition tags to your sentence, you can run named entity linking on these annotations.
 4 | 
 5 | ```python
 6 | from flair.models import EntityMentionLinker
 7 | from flair.nn import Classifier
 8 | from flair.tokenization import SciSpacyTokenizer
 9 | from flair.data import Sentence
10 | 
11 | sentence = Sentence(
12 |     "The mutation in the ABCD1 gene causes X-linked adrenoleukodystrophy, "
13 |     "a neurodegenerative disease, which is exacerbated by exposure to high "
14 |     "levels of mercury in dolphin populations.",
15 |     use_tokenizer=SciSpacyTokenizer()
16 | )
17 | 
18 | ner_tagger = Classifier.load("hunflair")
19 | ner_tagger.predict(sentence)
20 | 
21 | nen_tagger = EntityMentionLinker.load("disease-linker")
22 | nen_tagger.predict(sentence)
23 | 
24 | nen_tagger = EntityMentionLinker.load("gene-linker")
25 | nen_tagger.predict(sentence)
26 | 
27 | nen_tagger = EntityMentionLinker.load("chemical-linker")
28 | nen_tagger.predict(sentence)
29 | 
30 | nen_tagger = EntityMentionLinker.load("species-linker")
31 | nen_tagger.predict(sentence)
32 | 
33 | for tag in sentence.get_labels():
34 |     print(tag)
35 | ```
36 | 
37 | This should print:
38 | 
39 | ```
40 | Span[4:5]: "ABCD1" → Gene (0.9575)
41 | Span[4:5]: "ABCD1" →  abcd1 - NCBI-GENE-HUMAN:215 (14.5503)
42 | Span[7:11]: "X-linked adrenoleukodystrophy" → Disease (0.9867)
43 | Span[7:11]: "X-linked adrenoleukodystrophy" →  x linked adrenoleukodystrophy - CTD-DISEASES:MESH:D000326 (13.9717)
44 | Span[13:15]: "neurodegenerative disease" → Disease (0.8865)
45 | Span[13:15]: "neurodegenerative disease" →  neurodegenerative disease - CTD-DISEASES:MESH:D019636 (14.2779)
46 | Span[25:26]: "mercury" → Chemical (0.9456)
47 | Span[25:26]: "mercury" →  mercury - CTD-CHEMICALS:MESH:D008628 (14.9185)
48 | Span[27:28]: "dolphin" → Species (0.8082)
49 | Span[27:28]: "dolphin" →  marine dolphins - NCBI-TAXONOMY:9726 (14.473)
50 | ```
51 | 
52 | The output contains both the NER disease annotations and their entity / concept identifiers according to
53 | a knowledge base or ontology. We have pre-configured combinations of models and dictionaries for
54 | "disease", "chemical" and "gene".
55 | 
56 | You can also provide your own model and dictionary:
57 | 
58 | ```python
59 | from flair.models import EntityMentionLinker
60 | 
61 | nen_tagger = EntityMentionLinker.build("name_or_path_to_your_model",
62 |                                        dictionary_names_or_path="name_or_path_to_your_dictionary")
63 | nen_tagger = EntityMentionLinker.build("path_to_custom_disease_model", dictionary_names_or_path="disease")
64 | ```
65 | 
66 | You can use any combination of provided models, provided dictionaries and your own.
67 | 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_8_MODEL_OPTIMIZATION.md:
--------------------------------------------------------------------------------
1 | # Tutorial 8: Model Tuning
2 | 
3 | **Important**: This tutorial has been removed. 
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_CORPUS_CUSTOM.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4.3: Loading a Custom Corpus
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-load-custom-dataset
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 
6 | 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_CORPUS_PREPARED.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4.1: Loading a Prepared Corpus
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-load-prepared-dataset
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 
6 | 
7 | 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_EMBEDDINGS_OVERVIEW.md:
--------------------------------------------------------------------------------
1 | # Tutorial 3: Embeddings
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/category/tutorial-3-embeddings
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_FLAIR_BASICS.md:
--------------------------------------------------------------------------------
1 | # Tutorial 1: NLP Base Types
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/basic-types 
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_CIRCUS.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.6: Other Crazy Models in Flair
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/other-models
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_LINKING.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.3: Entity Linking on Your Text
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/entity-linking
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_NER.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.1: Tagging Entities in your Text
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/tagging-entities
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_OVERVIEW.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2: Tagging your Text
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/category/tutorial-1-basic-tagging
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_POS.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.4: Tagging Parts of Speech in your Text
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/part-of-speech-tagging
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_RELATIONS.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.5: Relation Extraction on Your Text
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/other-models
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_SENTIMENT.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.2: Sentiment Analysis on Your Text
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/tagging-sentiment
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TRAINING_MODELS.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4.1: How Model Training works in Flair
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-model-training-works
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 
6 | 
7 | 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TRAINING_OVERVIEW.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4: Training your own Models
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/category/tutorial-2-training-models
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 
6 | 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TRAINING_SEQUENCE_LABELER.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4.4: Training Sequence Labeling Models
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-train-sequence-tagger
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 
6 | 


--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TRAINING_TEXT_CLASSIFIER.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4.5: Training Text Classification Models
2 | 
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-train-text-classifier
4 | 
5 | All Flair documentation is now found at: https://flairnlp.github.io/ 


--------------------------------------------------------------------------------
/resources/docs/embeddings/BYTE_PAIR_EMBEDDINGS.md:
--------------------------------------------------------------------------------
 1 | # Byte Pair Embeddings
 2 | 
 3 | `BytePairEmbeddings` are word embeddings that are precomputed on the subword-level. This means that they are able to
 4 | embed any word by splitting words into subwords and looking up their embeddings. `BytePairEmbeddings` were proposed
 5 | and computed by [Heinzerling and Strube (2018)](https://www.aclweb.org/anthology/L18-1473) who found that they offer nearly the same accuracy as word embeddings, but at a fraction
 6 | of the model size. So they are a great choice if you want to train small models.
 7 | 
 8 | You initialize with a language code (275 languages supported), a number of 'syllables' (one of ) and
 9 | a number of dimensions (one of 50, 100, 200 or 300). The following initializes and uses byte pair embeddings
10 | for English:
11 | 
12 | ```python
13 | from flair.embeddings import BytePairEmbeddings
14 | 
15 | # init embedding
16 | embedding = BytePairEmbeddings('en')
17 | 
18 | # create a sentence
19 | sentence = Sentence('The grass is green .')
20 | 
21 | # embed words in sentence
22 | embedding.embed(sentence)
23 | ```
24 | 
25 | More information can be found
26 | on the [byte pair embeddings](https://nlp.h-its.org/bpemb/) web page.
27 | 
28 | `BytePairEmbeddings` also have a multilingual model capable of embedding any word in any language.
29 |  You can instantiate it with:
30 | 
31 | ```python
32 | # init embedding
33 | embedding = BytePairEmbeddings('multi')
34 | ```
35 | 
36 | You can also load custom `BytePairEmbeddings` by specifying a path to model_file_path and embedding_file_path arguments. They correspond respectively to a SentencePiece model file and to an embedding file (Word2Vec plain text or GenSim binary). For example:
37 | 
38 | ```python
39 | # init custom embedding
40 | embedding = BytePairEmbeddings(model_file_path='your/path/m.model', embedding_file_path='your/path/w2v.txt')
41 | ```
42 | 


--------------------------------------------------------------------------------
/resources/docs/embeddings/CHARACTER_EMBEDDINGS.md:
--------------------------------------------------------------------------------
 1 | # Character Embeddings
 2 | 
 3 | `CharacterEmbeddings` allow you to add character-level word embeddings during model training. Note that these embeddings
 4 | are randomly initialized when you initialize the class, so they are not meaningful unless you train them on a specific
 5 | downstream task.
 6 | 
 7 | For instance, the standard sequence labeling architecture used by [Lample et al. (2016)](https://www.aclweb.org/anthology/N16-1030) is a combination of classic word embeddings with task-trained character features. Normally this would require you to implement a [hierarchical embedding architecture](http://neuroner.com/NeuroNERengine_with_caption_no_figure.png) in which character-level embeddings for each word are computed using an RNN and then concatenated with word embeddings.
 8 | 
 9 | In Flair, we simplify this by treating `CharacterEmbeddings` just like any other embedding class. To reproduce the
10 | Lample architecture, you need only combine them with standard `WordEmbeddings` in an embedding stack:
11 | 
12 | 
13 | ```python
14 | # init embedding stack
15 | embedding = StackedEmbeddings(
16 |     [
17 |         # standard word embeddings
18 |         WordEmbeddings('glove'),
19 | 
20 |         # character-level features
21 |         CharacterEmbeddings(),
22 |     ]
23 | )
24 | ```
25 | 
26 | If you pass this stacked embedding to a train method, the character-level features will now automatically be trained
27 | for your downstream task.
28 | 


--------------------------------------------------------------------------------
/resources/docs/embeddings/ELMO_EMBEDDINGS.md:
--------------------------------------------------------------------------------
 1 | # ELMo Embeddings
 2 | 
 3 | [ELMo embeddings](http://www.aclweb.org/anthology/N18-1202) were presented by Peters et al. in 2018. They are using
 4 | a bidirectional recurrent neural network to predict the next word in a text.
 5 | We are using the implementation of [AllenNLP](https://allennlp.org/elmo). As this implementation comes with a lot of
 6 | sub-dependencies, which we don't want to include in Flair, you need to first install the library via
 7 | `pip install allennlp==0.9.0` before you can use it in Flair.
 8 | Using the embeddings is as simple as using any other embedding type:
 9 | 
10 | ```python
11 | from flair.embeddings import ELMoEmbeddings
12 | 
13 | # init embedding
14 | embedding = ELMoEmbeddings()
15 | 
16 | # create a sentence
17 | sentence = Sentence('The grass is green .')
18 | 
19 | # embed words in sentence
20 | embedding.embed(sentence)
21 | ```
22 | 
23 | ELMo word embeddings can be constructed by combining ELMo layers in different ways. The available combination strategies are:
24 | - `"all"`: Use the concatenation of the three ELMo layers.
25 | - `"top"`: Use the top ELMo layer.
26 | - `"average"`: Use the average of the three ELMo layers.
27 | 
28 | By default, the top 3 layers are concatenated to form the word embedding.
29 | 
30 | AllenNLP provides the following pre-trained models. To use any of the following models inside Flair
31 | simple specify the embedding id when initializing the `ELMoEmbeddings`.
32 | 
33 | | ID | Language | Embedding |
34 | | ------------- | ------------- | ------------- |
35 | | 'small' | English | 1024-hidden, 1 layer, 14.6M parameters |
36 | | 'medium'   | English | 2048-hidden, 1 layer, 28.0M parameters |
37 | | 'original'    | English | 4096-hidden, 2 layers, 93.6M parameters |
38 | | 'large'    | English |  |
39 | | 'pt'   | Portuguese | |
40 | | 'pubmed' | English biomedical data | [more information](https://allennlp.org/elmo) |
41 | 


--------------------------------------------------------------------------------
/resources/docs/embeddings/FASTTEXT_EMBEDDINGS.md:
--------------------------------------------------------------------------------
 1 | # FastText Embeddings
 2 | 
 3 | FastText Embeddings can give you vectors for out of vocabulary(oov) words by using the sub-word information.
 4 |  To use this functionality with Flair, use `FastTextEmbeddings` class as shown:
 5 | 
 6 | ```python
 7 | from flair.embeddings import FastTextEmbeddings
 8 | 
 9 | # init embedding
10 | embedding = FastTextEmbeddings('/path/to/local/custom_fasttext_embeddings.bin')
11 | 
12 | # create a sentence
13 | sentence = Sentence('The grass is green .')
14 | 
15 | # embed words in sentence
16 | embedding.embed(sentence)
17 | ```
18 | 
19 | You can initialize the class by passing the remote downloadable URL as well.
20 | 
21 | ```python
22 | embedding = FastTextEmbeddings('/path/to/remote/downloadable/custom_fasttext_embeddings.bin', use_local=False)
23 | ```
24 | 
25 | Note that FastText embeddings typically have huge models resulting in equally huge models for downstream tasks.
26 | 
27 | Alternatively, you can use FastText embeddings without the oov functionality by using normal `WordEmbeddings` which
28 | are smaller and get
29 | the oov functionality from the `BytePairEmbeddings` which are tiny. So, instead of using English `FastTextEmbeddings`
30 | with oov handling, you could use this stack:
31 | 
32 | ```python
33 | from flair.embeddings import WordEmbeddings, BytePairEmbeddings, StackedEmbeddings
34 | 
35 | # init embedding
36 | embedding = StackedEmbeddings(
37 |     [
38 |         # standard FastText word embeddings for English
39 |         WordEmbeddings('en'),
40 |         # Byte pair embeddings for English
41 |         BytePairEmbeddings('en'),
42 |     ]
43 | )
44 | 
45 | # create a sentence
46 | sentence = Sentence('The grass is green .')
47 | 
48 | # embed words in sentence
49 | embedding.embed(sentence)
50 | ```
51 | 


--------------------------------------------------------------------------------
/resources/docs/flair_logo_2020.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020.png


--------------------------------------------------------------------------------
/resources/docs/flair_logo_2020_FINAL_day_dpi72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020_FINAL_day_dpi72.png


--------------------------------------------------------------------------------
/resources/docs/flair_logo_2020_FINAL_night_dpi72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020_FINAL_night_dpi72.png


--------------------------------------------------------------------------------
/resources/docs/flair_logo_2020_FINAL_night_light_dpi72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020_FINAL_night_light_dpi72.png


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from setuptools import find_packages, setup
 4 | 
 5 | required = Path("requirements.txt").read_text(encoding="utf-8").split("\n")
 6 | 
 7 | setup(
 8 |     name="flair",
 9 |     version="0.15.1",
10 |     description="A very simple framework for state-of-the-art NLP",
11 |     long_description=Path("README.md").read_text(encoding="utf-8"),
12 |     long_description_content_type="text/markdown",
13 |     author="Alan Akbik",
14 |     author_email="alan.akbik@gmail.com",
15 |     url="https://github.com/flairNLP/flair",
16 |     packages=find_packages(exclude=["tests", "tests.*"]),  # same as name
17 |     license="MIT",
18 |     install_requires=required,
19 |     extras_require={
20 |         "word-embeddings": ["gensim>=4.2.0", "bpemb>=0.3.5"],
21 |     },
22 |     include_package_data=True,
23 |     python_requires=">=3.9",
24 | )
25 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | import torch
 5 | 
 6 | import flair
 7 | 
 8 | 
 9 | @pytest.fixture(scope="module")
10 | def resources_path():
11 |     return Path(__file__).parent / "resources"
12 | 
13 | 
14 | @pytest.fixture(scope="module")
15 | def tasks_base_path(resources_path):
16 |     return resources_path / "tasks"
17 | 
18 | 
19 | @pytest.fixture()
20 | def results_base_path(resources_path):
21 |     path = resources_path / "results"
22 |     try:
23 |         yield path
24 |     finally:
25 |         for p in reversed(list(path.rglob("*"))):
26 |             if p.is_file():
27 |                 p.unlink()
28 |             else:
29 |                 p.rmdir()
30 |         if path.is_dir():
31 |             path.rmdir()
32 | 
33 | 
34 | @pytest.fixture(autouse=True)
35 | def set_cpu(force_cpu):
36 |     if force_cpu:
37 |         flair.device = torch.device("cpu")
38 | 
39 | 
40 | def pytest_addoption(parser):
41 |     parser.addoption(
42 |         "--runintegration",
43 |         action="store_true",
44 |         default=False,
45 |         help="run integration tests",
46 |     )
47 |     parser.addoption(
48 |         "--force-cpu",
49 |         action="store_true",
50 |         default=False,
51 |         help="use cpu for tests even when gpu is available",
52 |     )
53 | 
54 | 
55 | def pytest_collection_modifyitems(config, items):
56 |     if not config.getoption("--runintegration"):
57 |         skip_integration = pytest.mark.skip(reason="need --runintegration option to run")
58 |         for item in items:
59 |             if "integration" in item.keywords:
60 |                 item.add_marker(skip_integration)
61 | 
62 | 
63 | def pytest_generate_tests(metafunc):
64 |     option_value = metafunc.config.getoption("--force-cpu")
65 |     if "force_cpu" in metafunc.fixturenames and option_value is not None:
66 |         metafunc.parametrize("force_cpu", [option_value])
67 | 


--------------------------------------------------------------------------------
/tests/embeddings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/embeddings/__init__.py


--------------------------------------------------------------------------------
/tests/embeddings/test_byte_pair_embeddings.py:
--------------------------------------------------------------------------------
 1 | from flair.embeddings import BytePairEmbeddings
 2 | from tests.embedding_test_utils import BaseEmbeddingsTest
 3 | 
 4 | 
 5 | class TestBytePairEmbeddings(BaseEmbeddingsTest):
 6 |     embedding_cls = BytePairEmbeddings
 7 |     is_token_embedding = True
 8 |     is_document_embedding = False
 9 |     default_args = {"language": "en"}
10 | 


--------------------------------------------------------------------------------
/tests/embeddings/test_document_transform_word_embeddings.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from flair.embeddings import (
 4 |     DocumentCNNEmbeddings,
 5 |     DocumentLMEmbeddings,
 6 |     DocumentPoolEmbeddings,
 7 |     DocumentRNNEmbeddings,
 8 |     FlairEmbeddings,
 9 |     TokenEmbeddings,
10 |     WordEmbeddings,
11 | )
12 | from tests.embedding_test_utils import BaseEmbeddingsTest
13 | 
14 | word: TokenEmbeddings = WordEmbeddings("turian")
15 | flair_embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast")
16 | flair_embedding_back: TokenEmbeddings = FlairEmbeddings("news-backward-fast")
17 | 
18 | 
19 | class BaseDocumentsViaWordEmbeddingsTest(BaseEmbeddingsTest):
20 |     is_document_embedding = True
21 |     is_token_embedding = False
22 |     base_embeddings: list[TokenEmbeddings] = [word, flair_embedding]
23 | 
24 |     def create_embedding_from_name(self, name: str):
25 |         """Overwrite this method if it is more complex to load an embedding by name."""
26 |         assert self.name_field is not None
27 |         kwargs = dict(self.default_args)
28 |         kwargs.pop(self.name_field)
29 |         return self.embedding_cls(name, **kwargs)  # type: ignore[call-arg]
30 | 
31 |     def create_embedding_with_args(self, args: dict[str, Any]):
32 |         kwargs = dict(self.default_args)
33 |         for k, v in args.items():
34 |             kwargs[k] = v
35 |         return self.embedding_cls(self.base_embeddings, **kwargs)  # type: ignore[call-arg]
36 | 
37 | 
38 | class TestDocumentLstmEmbeddings(BaseDocumentsViaWordEmbeddingsTest):
39 |     embedding_cls = DocumentRNNEmbeddings
40 |     default_args = {
41 |         "hidden_size": 128,
42 |         "bidirectional": False,
43 |     }
44 |     valid_args = [{"bidirectional": False}, {"bidirectional": True}]
45 | 
46 | 
47 | class TestDocumentPoolEmbeddings(BaseDocumentsViaWordEmbeddingsTest):
48 |     embedding_cls = DocumentPoolEmbeddings
49 |     default_args = {
50 |         "fine_tune_mode": "nonlinear",
51 |     }
52 |     valid_args = [{"pooling": "mean"}, {"pooling": "max"}, {"pooling": "min"}]
53 | 
54 | 
55 | class TestDocumentCNNEmbeddings(BaseDocumentsViaWordEmbeddingsTest):
56 |     embedding_cls = DocumentCNNEmbeddings
57 |     default_args = {
58 |         "kernels": ((50, 2), (50, 3)),
59 |     }
60 |     valid_args = [{"reproject_words_dimension": None}, {"reproject_words_dimension": 100}]
61 | 
62 | 
63 | class TestDocumentLMEmbeddings(BaseDocumentsViaWordEmbeddingsTest):
64 |     embedding_cls = DocumentLMEmbeddings
65 |     base_embeddings = [flair_embedding, flair_embedding_back]
66 |     default_args: dict[str, Any] = {}
67 | 


--------------------------------------------------------------------------------
/tests/embeddings/test_flair_embeddings.py:
--------------------------------------------------------------------------------
 1 | from flair.data import Dictionary, Sentence
 2 | from flair.embeddings import (
 3 |     DocumentLMEmbeddings,
 4 |     DocumentRNNEmbeddings,
 5 |     FlairEmbeddings,
 6 | )
 7 | from flair.models import LanguageModel
 8 | from tests.embedding_test_utils import BaseEmbeddingsTest
 9 | 
10 | 
11 | class TestFlairEmbeddings(BaseEmbeddingsTest):
12 |     embedding_cls = FlairEmbeddings
13 |     is_token_embedding = True
14 |     is_document_embedding = False
15 |     default_args = {"model": "news-forward-fast"}
16 | 
17 |     name_field = "model"
18 |     invalid_names = ["other", "not/existing/path/to/embeddings"]
19 | 
20 |     def test_fine_tunable_flair_embedding(self):
21 |         language_model_forward = LanguageModel(Dictionary.load("chars"), is_forward_lm=True, hidden_size=32, nlayers=1)
22 | 
23 |         embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
24 |             [FlairEmbeddings(language_model_forward, fine_tune=True)],
25 |             hidden_size=128,
26 |             bidirectional=False,
27 |         )
28 | 
29 |         sentence: Sentence = Sentence("I love Berlin.")
30 | 
31 |         embeddings.embed(sentence)
32 | 
33 |         assert len(sentence.get_embedding()) == 128
34 |         assert len(sentence.get_embedding()) == embeddings.embedding_length
35 | 
36 |         sentence.clear_embeddings()
37 | 
38 |         assert len(sentence.get_embedding()) == 0
39 | 
40 |         embeddings: DocumentLMEmbeddings = DocumentLMEmbeddings(
41 |             [FlairEmbeddings(language_model_forward, fine_tune=True)]
42 |         )
43 | 
44 |         sentence: Sentence = Sentence("I love Berlin.")
45 | 
46 |         embeddings.embed(sentence)
47 | 
48 |         assert len(sentence.get_embedding()) == 32
49 |         assert len(sentence.get_embedding()) == embeddings.embedding_length
50 | 
51 |         sentence.clear_embeddings()
52 | 
53 |         assert len(sentence.get_embedding()) == 0
54 |         del embeddings
55 | 


--------------------------------------------------------------------------------
/tests/embeddings/test_simple_token_embeddings.py:
--------------------------------------------------------------------------------
 1 | from flair.data import Dictionary
 2 | from flair.embeddings import CharacterEmbeddings, HashEmbeddings, OneHotEmbeddings
 3 | from tests.embedding_test_utils import BaseEmbeddingsTest
 4 | 
 5 | vocab_dictionary = Dictionary(add_unk=True)
 6 | vocab_dictionary.add_item("I")
 7 | vocab_dictionary.add_item("love")
 8 | vocab_dictionary.add_item("berlin")
 9 | 
10 | 
11 | class TestCharacterEmbeddings(BaseEmbeddingsTest):
12 |     embedding_cls = CharacterEmbeddings
13 |     is_token_embedding = True
14 |     is_document_embedding = False
15 |     default_args = {"path_to_char_dict": None}
16 | 
17 | 
18 | class TestOneHotEmbeddings(BaseEmbeddingsTest):
19 |     embedding_cls = OneHotEmbeddings
20 |     is_token_embedding = True
21 |     is_document_embedding = False
22 |     default_args = {"vocab_dictionary": vocab_dictionary}
23 | 
24 | 
25 | class TestHashEmbeddings(BaseEmbeddingsTest):
26 |     embedding_cls = HashEmbeddings
27 |     is_token_embedding = True
28 |     is_document_embedding = False
29 |     default_args = {"num_embeddings": 10}
30 | 


--------------------------------------------------------------------------------
/tests/embeddings/test_stacked_embeddings.py:
--------------------------------------------------------------------------------
 1 | from flair.data import Sentence
 2 | from flair.embeddings import (
 3 |     FlairEmbeddings,
 4 |     StackedEmbeddings,
 5 |     TokenEmbeddings,
 6 |     WordEmbeddings,
 7 | )
 8 | from flair.embeddings.base import load_embeddings
 9 | 
10 | 
11 | def test_stacked_embeddings():
12 |     glove: TokenEmbeddings = WordEmbeddings("turian")
13 |     flair_embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast")
14 |     embeddings: StackedEmbeddings = StackedEmbeddings([glove, flair_embedding])
15 | 
16 |     sentence: Sentence = Sentence("I love Berlin. Berlin is a great place to live.")
17 |     embeddings.embed(sentence)
18 | 
19 |     for token in sentence.tokens:
20 |         assert len(token.get_embedding()) == 1074
21 | 
22 |         token.clear_embeddings()
23 | 
24 |         assert len(token.get_embedding()) == 0
25 |     del embeddings
26 | 
27 | 
28 | def test_stacked_embeddings_stay_the_same_after_saving_and_loading():
29 |     glove: TokenEmbeddings = WordEmbeddings("turian")
30 |     flair_embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast")
31 |     embeddings: StackedEmbeddings = StackedEmbeddings([glove, flair_embedding])
32 | 
33 |     assert not embeddings.training
34 | 
35 |     sentence_old: Sentence = Sentence("I love Berlin")
36 |     embeddings.embed(sentence_old)
37 |     names_old = embeddings.get_names()
38 |     embedding_length_old = embeddings.embedding_length
39 | 
40 |     save_data = embeddings.save_embeddings(use_state_dict=True)
41 |     new_embeddings = load_embeddings(save_data)
42 | 
43 |     sentence_new: Sentence = Sentence("I love Berlin")
44 |     new_embeddings.embed(sentence_new)
45 |     names_new = new_embeddings.get_names()
46 |     embedding_length_new = new_embeddings.embedding_length
47 | 
48 |     assert not new_embeddings.training
49 |     assert names_old == names_new
50 |     assert embedding_length_old == embedding_length_new
51 | 
52 |     for token_old, token_new in zip(sentence_old, sentence_new):
53 |         assert (token_old.get_embedding(names_old) == token_new.get_embedding(names_new)).all()
54 | 


--------------------------------------------------------------------------------
/tests/embeddings/test_tfidf_embeddings.py:
--------------------------------------------------------------------------------
 1 | from flair.data import Sentence
 2 | from flair.embeddings import DocumentTFIDFEmbeddings
 3 | from tests.embedding_test_utils import BaseEmbeddingsTest
 4 | 
 5 | 
 6 | class TFIDFEmbeddingsTest(BaseEmbeddingsTest):
 7 |     embedding_cls = DocumentTFIDFEmbeddings
 8 |     is_document_embedding = True
 9 |     is_token_embedding = False
10 | 
11 |     default_args = {
12 |         "train_dataset": [
13 |             Sentence("This is a sentence"),
14 |             Sentence("This is another sentence"),
15 |             Sentence("another a This I Berlin"),
16 |         ]
17 |     }
18 | 


--------------------------------------------------------------------------------
/tests/embeddings/test_transformer_document_embeddings.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from flair.data import Dictionary, Sentence
 4 | from flair.embeddings import TransformerDocumentEmbeddings
 5 | from flair.models import TextClassifier
 6 | from flair.nn import Classifier
 7 | from tests.embedding_test_utils import BaseEmbeddingsTest
 8 | 
 9 | 
10 | class TestTransformerDocumentEmbeddings(BaseEmbeddingsTest):
11 |     embedding_cls = TransformerDocumentEmbeddings
12 |     is_document_embedding = True
13 |     is_token_embedding = False
14 |     default_args = {"model": "distilbert-base-uncased", "allow_long_sentences": False}
15 |     valid_args = [
16 |         {"layers": "-1,-2,-3,-4", "layer_mean": False},
17 |         {"layers": "all", "layer_mean": True},
18 |         {"layers": "all", "layer_mean": False},
19 |     ]
20 | 
21 |     name_field = "embeddings"
22 |     invalid_names = ["other", "not/existing/path/to/embeddings"]
23 | 
24 | 
25 | def test_if_loaded_embeddings_have_all_attributes(tasks_base_path):
26 |     # dummy model with embeddings
27 |     embeddings = TransformerDocumentEmbeddings(
28 |         "distilbert-base-uncased",
29 |         use_context=True,
30 |         use_context_separator=False,
31 |     )
32 | 
33 |     model = TextClassifier(label_type="ner", label_dictionary=Dictionary(), embeddings=embeddings)
34 | 
35 |     # save the dummy and load it again
36 |     model.save(tasks_base_path / "single.pt")
37 |     loaded_single_task = Classifier.load(tasks_base_path / "single.pt")
38 | 
39 |     # check that context_length and use_context_separator is the same for both
40 |     assert model.embeddings.context_length == loaded_single_task.embeddings.context_length
41 |     assert model.embeddings.use_context_separator == loaded_single_task.embeddings.use_context_separator
42 | 
43 | 
44 | @pytest.mark.parametrize("cls_pooling", ["cls", "mean", "max"])
45 | def test_cls_pooling(cls_pooling):
46 |     embeddings = TransformerDocumentEmbeddings(
47 |         model="distilbert-base-uncased",
48 |         layers="-1",
49 |         cls_pooling=cls_pooling,
50 |         allow_long_sentences=True,
51 |     )
52 |     sentence = Sentence("Today is a good day.")
53 |     embeddings.embed(sentence)
54 |     assert sentence.embedding is not None
55 | 


--------------------------------------------------------------------------------
/tests/embeddings/test_word_embeddings.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from flair.embeddings import MuseCrosslingualEmbeddings, NILCEmbeddings, WordEmbeddings
 4 | from tests.embedding_test_utils import BaseEmbeddingsTest
 5 | 
 6 | 
 7 | class TestWordEmbeddings(BaseEmbeddingsTest):
 8 |     embedding_cls = WordEmbeddings
 9 |     is_token_embedding = True
10 |     is_document_embedding = False
11 |     default_args = {"embeddings": "turian"}
12 | 
13 |     name_field = "embeddings"
14 |     invalid_names = ["other", "not/existing/path/to/embeddings"]
15 | 
16 | 
17 | class TestMuseCrosslingualEmbeddings(BaseEmbeddingsTest):
18 |     embedding_cls = MuseCrosslingualEmbeddings
19 |     is_token_embedding = True
20 |     is_document_embedding = False
21 |     default_args: dict[str, Any] = {}
22 | 
23 | 
24 | class TestNILCEmbeddings(BaseEmbeddingsTest):
25 |     embedding_cls = NILCEmbeddings
26 |     is_token_embedding = True
27 |     is_document_embedding = False
28 |     default_args = {"embeddings": "fasttext", "model": "cbow", "size": 50}
29 |     valid_args = [{"embeddings": "glove"}]
30 | 
31 |     name_field = "embeddings"
32 |     invalid_names = ["other", "not/existing/path/to/embeddings"]
33 | 


--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/models/__init__.py


--------------------------------------------------------------------------------
/tests/models/test_entity_linker.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from flair.data import Sentence
 4 | from flair.datasets import NEL_ENGLISH_AIDA
 5 | from flair.embeddings import TransformerWordEmbeddings
 6 | from flair.models import SpanClassifier
 7 | from tests.model_test_utils import BaseModelTest
 8 | 
 9 | 
10 | class TestEntityLinker(BaseModelTest):
11 |     model_cls = SpanClassifier
12 |     train_label_type = "nel"
13 |     training_args = {"max_epochs": 2}
14 | 
15 |     @pytest.fixture()
16 |     def embeddings(self):
17 |         return TransformerWordEmbeddings(model="distilbert-base-uncased", layers="-1", fine_tune=True)
18 | 
19 |     @pytest.fixture()
20 |     def corpus(self, tasks_base_path):
21 |         return NEL_ENGLISH_AIDA().downsample(0.01)
22 | 
23 |     @pytest.fixture()
24 |     def train_test_sentence(self):
25 |         sentence = Sentence("I love NYC and hate OYC")
26 | 
27 |         sentence[2:3].add_label("nel", "New York City")
28 |         sentence[5:6].add_label("nel", "Old York City")
29 |         return sentence
30 | 
31 |     @pytest.fixture()
32 |     def labeled_sentence(self):
33 |         sentence = Sentence("I love NYC and hate OYC")
34 | 
35 |         sentence[2:3].add_label("nel", "New York City")
36 |         sentence[5:6].add_label("nel", "Old York City")
37 |         return sentence
38 | 


--------------------------------------------------------------------------------
/tests/models/test_model_license.py:
--------------------------------------------------------------------------------
 1 | from flair.nn import Model
 2 | 
 3 | 
 4 | def test_model_license_persistence(tmp_path):
 5 |     """Test setting and persisting license information for a model."""
 6 |     # Create temporary file path using pytest's tmp_path fixture
 7 |     model_path = tmp_path / "test_model_license.pt"
 8 | 
 9 |     # Load a base model
10 |     model = Model.load("ner-fast")
11 | 
12 |     # Check initial license (should be none/default)
13 |     assert model.license_info == "No license information available"
14 | 
15 |     # Set a new license
16 |     test_license = "MIT License - Copyright (c) 2024"
17 |     model.license_info = test_license
18 |     assert model.license_info == test_license
19 | 
20 |     # Save the model with the new license
21 |     model.save(str(model_path))
22 | 
23 |     # Load the saved model and check license persists
24 |     loaded_model = Model.load(model_path)
25 |     assert loaded_model.license_info == test_license
26 | 


--------------------------------------------------------------------------------
/tests/models/test_regexp_tagger.py:
--------------------------------------------------------------------------------
 1 | from flair.data import Sentence
 2 | from flair.models import RegexpTagger
 3 | 
 4 | 
 5 | def test_regexp_tagger():
 6 | 
 7 |     sentence = Sentence('Der sagte: "das ist durchaus interessant"')
 8 | 
 9 |     tagger = RegexpTagger(
10 |         mapping=[(r'["„»]((?:(?=(\\?))\2.)*?)[”"“«]', "quote_part", 1), (r'["„»]((?:(?=(\\?))\2.)*?)[”"“«]', "quote")]
11 |     )
12 | 
13 |     tagger.predict(sentence)
14 | 
15 |     assert sentence.get_label("quote_part").data_point.text == "das ist durchaus interessant"
16 |     assert sentence.get_label("quote").data_point.text == '"das ist durchaus interessant"'
17 | 


--------------------------------------------------------------------------------
/tests/models/test_relation_extractor.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from flair.data import Sentence
 4 | from flair.datasets import ColumnCorpus
 5 | from flair.embeddings import TransformerWordEmbeddings
 6 | from flair.models import RelationExtractor
 7 | from tests.model_test_utils import BaseModelTest
 8 | 
 9 | 
10 | class TestRelationExtractor(BaseModelTest):
11 |     model_cls = RelationExtractor
12 |     train_label_type = "relation"
13 |     pretrained_model = "relations"
14 |     model_args = {
15 |         "entity_label_type": "ner",
16 |         "train_on_gold_pairs_only": True,
17 |         "entity_pair_filters": {  # Define valid entity pair combinations, used as relation candidates
18 |             ("ORG", "PER"),  # founded_by
19 |             ("LOC", "PER"),  # place_of_birth
20 |         },
21 |     }
22 |     training_args = {
23 |         "max_epochs": 4,
24 |         "mini_batch_size": 4,
25 |         "learning_rate": 0.1,
26 |     }
27 | 
28 |     @pytest.fixture()
29 |     def corpus(self, tasks_base_path):
30 |         return ColumnCorpus(
31 |             data_folder=tasks_base_path / "conllu",
32 |             train_file="train.conllup",
33 |             dev_file="train.conllup",
34 |             test_file="train.conllup",
35 |             column_format={1: "text", 2: "pos", 3: "ner"},
36 |         )
37 | 
38 |     @pytest.fixture()
39 |     def example_sentence(self):
40 |         sentence = Sentence(["Microsoft", "was", "found", "by", "Bill", "Gates"])
41 |         sentence[:1].add_label(typename="ner", value="ORG", score=1.0)
42 |         sentence[4:].add_label(typename="ner", value="PER", score=1.0)
43 |         return sentence
44 | 
45 |     @pytest.fixture()
46 |     def train_test_sentence(self):
47 |         sentence = Sentence(["Apple", "was", "founded", "by", "Steve", "Jobs", "."])
48 |         sentence[0:1].add_label("ner", "ORG")
49 |         sentence[4:6].add_label("ner", "PER")
50 |         return sentence
51 | 
52 |     @pytest.fixture()
53 |     def embeddings(self):
54 |         return TransformerWordEmbeddings(model="distilbert-base-uncased", fine_tune=True)
55 | 
56 |     def assert_training_example(self, predicted_training_example):
57 |         relations = predicted_training_example.get_relations("relation")
58 |         assert len(relations) == 1
59 |         assert relations[0].tag == "founded_by"
60 | 
61 |     def has_embedding(self, sentence):
62 |         return all(token.get_embedding().cpu().numpy().size != 0 for token in sentence)
63 | 


--------------------------------------------------------------------------------
/tests/models/test_text_regressor.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import flair
 4 | from flair.embeddings import DocumentRNNEmbeddings, WordEmbeddings
 5 | from flair.models.text_regression_model import TextRegressor
 6 | from tests.model_test_utils import BaseModelTest
 7 | 
 8 | 
 9 | class TestTextRegressor(BaseModelTest):
10 |     model_cls = TextRegressor
11 |     train_label_type = "regression"
12 |     training_args = {
13 |         "max_epochs": 3,
14 |         "mini_batch_size": 2,
15 |         "learning_rate": 0.1,
16 |         "main_evaluation_metric": ("correlation", "pearson"),
17 |     }
18 | 
19 |     def build_model(self, embeddings, label_dict, **kwargs):
20 |         # no need for label_dict
21 |         return self.model_cls(embeddings, self.train_label_type)
22 | 
23 |     @pytest.fixture()
24 |     def embeddings(self):
25 |         glove_embedding = WordEmbeddings("turian")
26 |         return DocumentRNNEmbeddings([glove_embedding], 128, 1, False, 64, False, False)
27 | 
28 |     @pytest.fixture()
29 |     def corpus(self, tasks_base_path):
30 |         return flair.datasets.ClassificationCorpus(tasks_base_path / "regression", label_type=self.train_label_type)
31 | 


--------------------------------------------------------------------------------
/tests/models/test_word_tagger.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import flair
 4 | from flair.embeddings import TransformerWordEmbeddings
 5 | from flair.models import TokenClassifier
 6 | from tests.model_test_utils import BaseModelTest
 7 | 
 8 | 
 9 | class TestWordTagger(BaseModelTest):
10 |     model_cls = TokenClassifier
11 |     train_label_type = "pos"
12 |     training_args = {
13 |         "max_epochs": 2,
14 |         "learning_rate": 0.1,
15 |         "mini_batch_size": 2,
16 |     }
17 | 
18 |     def has_embedding(self, sentence):
19 |         for token in sentence:
20 |             if token.get_embedding().cpu().numpy().size == 0:
21 |                 return False
22 |         return None
23 | 
24 |     def build_model(self, embeddings, label_dict, **kwargs):
25 |         model_args = dict(self.model_args)
26 |         for k in kwargs:
27 |             if k in model_args:
28 |                 del model_args[k]
29 |         return self.model_cls(
30 |             embeddings=embeddings,
31 |             label_dictionary=label_dict,
32 |             label_type=self.train_label_type,
33 |             **model_args,
34 |             **kwargs,
35 |         )
36 | 
37 |     @pytest.fixture()
38 |     def corpus(self, tasks_base_path):
39 |         return flair.datasets.UD_ENGLISH(tasks_base_path)
40 | 
41 |     @pytest.fixture()
42 |     def embeddings(self):
43 |         return TransformerWordEmbeddings("distilbert-base-uncased")
44 | 


--------------------------------------------------------------------------------
/tests/resources/corpora/lorem_ipsum/test.txt:
--------------------------------------------------------------------------------
1 | Adipiscing commodo elit at imperdiet. Consequat interdum varius sit amet mattis vulputate enim nulla. Nulla aliquet porttitor lacus luctus accumsan tortor. Curabitur gravida arcu ac tortor. Adipiscing elit pellentesque habitant morbi. Sed viverra tellus in hac habitasse platea dictumst. Turpis cursus in hac habitasse. Pharetra vel turpis nunc eget. Enim facilisis gravida neque convallis a cras semper auctor neque. Interdum posuere lorem ipsum dolor sit amet consectetur adipiscing elit.
2 | 
3 | Mauris sit amet massa vitae tortor condimentum lacinia. Neque gravida in fermentum et sollicitudin. Blandit volutpat maecenas volutpat blandit aliquam. Gravida neque convallis a cras semper auctor neque vitae. Viverra aliquet eget sit amet tellus cras adipiscing enim eu. Risus sed vulputate odio ut enim blandit volutpat maecenas. Amet tellus cras adipiscing enim eu. Viverra tellus in hac habitasse platea dictumst vestibulum rhoncus est. Magna etiam tempor orci eu lobortis elementum. Leo vel fringilla est ullamcorper eget. Nisl nisi scelerisque eu ultrices. Eros donec ac odio tempor orci dapibus ultrices in. Nisl nisi scelerisque eu ultrices vitae auctor eu augue. Hac habitasse platea dictumst vestibulum rhoncus est pellentesque elit. Habitasse platea dictumst vestibulum rhoncus est pellentesque elit. In ornare quam viverra orci sagittis. Morbi quis commodo odio aenean. Nam at lectus urna duis convallis convallis tellus id interdum.


--------------------------------------------------------------------------------
/tests/resources/corpora/lorem_ipsum/valid.txt:
--------------------------------------------------------------------------------
1 | Nulla at volutpat diam ut venenatis tellus in metus vulputate. Porttitor leo a diam sollicitudin tempor. Tincidunt vitae semper quis lectus nulla at volutpat diam. Ornare aenean euismod elementum nisi quis eleifend quam adipiscing. Tortor pretium viverra suspendisse potenti. Arcu risus quis varius quam quisque id. Non sodales neque sodales ut etiam sit amet nisl. Porttitor lacus luctus accumsan tortor posuere ac ut consequat. Diam sit amet nisl suscipit. Ut sem nulla pharetra diam sit amet nisl suscipit adipiscing. Varius quam quisque id diam. Elementum tempus egestas sed sed risus pretium quam vulputate. Eu ultrices vitae auctor eu augue ut lectus. Tincidunt id aliquet risus feugiat in ante metus dictum at. Mauris cursus mattis molestie a iaculis at erat pellentesque. Leo urna molestie at elementum eu.
2 | 
3 | Posuere morbi leo urna molestie. Tincidunt nunc pulvinar sapien et. Mattis molestie a iaculis at erat pellentesque. Arcu cursus euismod quis viverra nibh cras pulvinar mattis nunc. Phasellus vestibulum lorem sed risus ultricies tristique nulla aliquet enim. Aenean et tortor at risus viverra. Ut placerat orci nulla pellentesque dignissim. Est lorem ipsum dolor sit amet. Eros donec ac odio tempor. Elementum integer enim neque volutpat ac tincidunt vitae.


--------------------------------------------------------------------------------
/tests/resources/tasks/ag_news/README.md:
--------------------------------------------------------------------------------
 1 | ## AG_NEWS
 2 | 
 3 | Data is taken from [here](https://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html).
 4 | 
 5 | The dataset contains a collection of news articles grouped into different categories.
 6 | We took a small random sample and converted it to the expected format of our data fetcher:
 7 | ```
 8 | __label__<class_name> <text>
 9 | ```
10 | 
11 | #### Publications Using the Dataset
12 | 
13 | * G. M. Del Corso, A. Gulli, and F. Romani. Ranking a stream of news. In Proceedings of 14th International World Wide Web Conference, pages 97–106, Chiba, Japan, 2005.
14 | * A. Gulli. The anatomy of a news search engine. In Proceedings of 14th International World Wide Web Conference, pages 880–881, Chiba, Japan, 2005.
15 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/ag_news/test.txt:
--------------------------------------------------------------------------------
 1 | __label__World Libya Seems Honest About Nuke Program -- UN Report VIENNA, Austria (Reuters) - The U.N. nuclear watchdog said  in a confidential report circulated Monday that Libya appears  to have been telling the truth in its declarations on the  covert atomic weapons program that it agreed to abandon last  year.
 2 | __label__Business Judge Orders Parmalat Auditors to Trial MILAN (Reuters) - An Italian judge opened preliminary  hearings on Tuesday into the 14-billion-euro collapse of  Parmalat and immediately ruled that two former auditors of the  food group should stand trial in one of Europe's biggest fraud  cases.
 3 | __label__Business Market Turmoil Saps Confidence \N
 4 | __label__Sci/Tech Repairing airplane wings with nanotubes in flight An electrical pulse through nanotubes and wires helps find the crack.
 5 | __label__Sports US lacks golden touch LAKE PLACID, N.Y. -- After Team USA racked up six goals in a 6-3 victory over the Swedes Saturday, coach Ben Smith said he hoped his players saved a few because goals were going to be hard to get yesterday against Canada.
 6 | __label__World Experts Doubt Drop In Violence in Iraq The U.S. military's claim that violence has decreased sharply in Iraq in recent months has come under scrutiny from many experts within and outside the government, who contend that some of the underlying statistics are questionable and selectively ignore negative trends.<br clear="all"><a href="http://ad.doubleclick.net/jump/wpni.rss/world;pos=ad9;tile=9;ad=rss;sz=479x40;ord=366125358022" target="_blank"><img src="http://ad.doubleclick.net/ad/wpni.rss/world;pos=ad9;tile=9;ad=rss;sz=479x40;ord=366125358022" border="0" vspace="5"></a>
 7 | __label__Sports Passing the torch Calgary Sun. Dan Marino wouldn&#39;t swap his collection of NFL records and Hall-of-Fame nomination for the world. Not even for that elusive Super Bowl victory.
 8 | __label__Sci/Tech Saved, and Enslaved, by the Cell A growing number of experts say cellphone use may be making us less autonomous and less capable of solving problems on our own.
 9 | __label__Sports Kolzig helps Capitals slide by Avalanche WASHINGTON (Reuters) - The Washington Capitals stayed in the hunt for a playoff spot after Olaf Kolzig made 18 saves in a 2-1 win over the struggling Colorado Avalanche on Wednesday.
10 | __label__World Pension Fund of New York Files Suit Against Merck The main pension fund of New York State filed a lawsuit against Merck & Company, accusing it of misleading shareholders about the safety of its pain drug Vioxx.
11 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/ag_news/train.txt:
--------------------------------------------------------------------------------
 1 | __label__World Light relay 'should be dropped' A leading astronomy group in the US voices opposition to a global 'light relay' planned for next year.
 2 | __label__World LA City Council Ordered to Pay Attention (AP) AP - During public hearings, members of the City Council talk on cell phones, chat among themselves, read mail or wander around the room. A state appeals court says they should be doing something else: paying attention.
 3 | __label__Business Tribune Profit Declines on Lower Newspaper Ad Revenue Tribune said its third-quarter earnings dropped 7 percent, beating expectations, adding that the housing slump and lower consumer spending worsened advertising revenue.
 4 | __label__World Blair is warned about an attack on Iran LONDON -- Foreign policy specialists warned Prime Minister Tony Blair yesterday that military action against Iran could worsen violence across the Middle East and urged him to persuade the United States to hold talks with Tehran.
 5 | __label__World Bangladesh Awakes in Shock as Blast Toll Hits 16 Extra armed police patrolled the streets of the Bangladeshi capital and traffic was light on Sunday, a working day, as shocked Bangladeshis woke up to the aftermath of grenade blasts that killed at least 16 people.
 6 | __label__Business Wall St. Turns to the Time Out as Punishment Regulators are wielding a new weapon against Wall Street firms instead of multimillion-dollar fines: temporarily shutting down certain business lines.
 7 | __label__World Legal move over halted BAE probe Campaigners threaten the government with legal action after a probe into arms deals with Saudi Arabia is dropped.
 8 | __label__Sports Gamecocks, Tigers Say &quot;No&quot; To Bowl Bids COLUMBIA, SC -- Clemson and South Carolina will not accept bowl bids, punishment for a brawl between players toward the end of Saturday&#39;s game, the schools announced Monday.
 9 | __label__Sports Capitals Have Budget to Attract Free Agents The NHL free agent signing period begins Sunday at noon, and Capitals General Manager George McPhee is shopping with a bigger-than-usual budget. <br clear="all"><a href="http://ad.doubleclick.net/jump/wpni.rss/sports;pos=ad9;tile=9;ad=rss;sz=479x40;ord=2630111426816" target="_blank"><img src="http://ad.doubleclick.net/ad/wpni.rss/sports;pos=ad9;tile=9;ad=rss;sz=479x40;ord=2630111426816" border="0" vspace="5"></a>
10 | __label__Sci/Tech Hobbit-sized Humans Called Homo floresiensis Discovered by <b>...</b> Long live the real Bilbo Baggins, the first Little People of the World, Homo floresiensis and Homo sapien archeologists Michael Morwood, Peter Brown and Professor Soejono!
11 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/column_corpus_options/eng.testa:
--------------------------------------------------------------------------------
1 | WORD	TAG
2 | This	O
3 | is	O
4 | Coca Cola	O
5 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/column_corpus_options/eng.testb:
--------------------------------------------------------------------------------
1 | WORD	TAG
2 | This	O
3 | is	O
4 | New York	O
5 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/column_corpus_options/eng.train:
--------------------------------------------------------------------------------
1 | WORD	TAG
2 | This	O
3 | is	O
4 | New Berlin	LOC
5 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/column_with_whitespaces/eng.testa:
--------------------------------------------------------------------------------
1 | It O +
2 | is O +
3 | a O +
4 | French B-LOC -
5 | - O -
6 | speaking O +
7 | town O -
8 | . O +


--------------------------------------------------------------------------------
/tests/resources/tasks/column_with_whitespaces/eng.testb:
--------------------------------------------------------------------------------
1 | It O +
2 | is O +
3 | a O +
4 | US B-LOC -
5 | - O -
6 | based O +
7 | company O -
8 | . O +


--------------------------------------------------------------------------------
/tests/resources/tasks/column_with_whitespaces/eng.train:
--------------------------------------------------------------------------------
1 | It O +
2 | is O +
3 | a O +
4 | German B-LOC -
5 | - O -
6 | owned O +
7 | firm O -
8 | . O +


--------------------------------------------------------------------------------
/tests/resources/tasks/conllu/train.conllu:
--------------------------------------------------------------------------------
 1 | # text = Larry Page and Sergey Brin founded Google.
 2 | # relations = 7;7;1;2;founded_by|7;7;4;5;founded_by
 3 | 1	Larry	PROPN	B-PER	_
 4 | 2	Page	PROPN	I-PER	_
 5 | 3	and	CCONJ	O	_
 6 | 4	Sergey	PROPN	B-PER	_
 7 | 5	Brin	PROPN	I-PER	_
 8 | 6	founded	VERB	O	_
 9 | 7	Google	PROPN	B-ORG	SpaceAfter=No
10 | 8	.	PUNCT	O	_
11 | 
12 | # text = Microsoft was founded by Bill Gates.
13 | # relations = 1;1;5;6;founded_by
14 | 1	Microsoft	PROPN	B-ORG	_
15 | 2	was	AUX	O	_
16 | 3	founded	VERB	O	_
17 | 4	by	ADP	O	_
18 | 5	Bill	PROPN	B-PER	_
19 | 6	Gates	PROPN	I-PER	SpaceAfter=No
20 | 7	.	PUNCT	O	_
21 | 
22 | # text = Konrad Zuse was born in Berlin on 22 June 1910.
23 | # relations = 6;6;1;2;place_of_birth
24 | 1	Konrad	PROPN	B-PER	_
25 | 2	Zuse	PROPN	I-PER	_
26 | 3	was	AUX	O	_
27 | 4	born	VERB	O	_
28 | 5	in	ADP	O	_
29 | 6	Berlin	PROPN	B-LOC	_
30 | 7	on	ADP	O	_
31 | 8	22	NUM	B-DATE	_
32 | 9	June	PROPN	I-DATE	_
33 | 10	1910	NUM	I-DATE	SpaceAfter=No
34 | 11	.	PUNCT	O	_
35 | 
36 | # text = Joseph Weizenbaum was born in Berlin, Germany.
37 | # relations = 6;6;1;2;place_of_birth
38 | 1	Joseph	PROPN	B-PER	_
39 | 2	Weizenbaum	PROPN	I-PER	_
40 | 3	was	AUX	O	_
41 | 4	born	VERB	O	_
42 | 5	in	ADP	O	_
43 | 6	Berlin	PROPN	B-LOC	_
44 | 7	,	PUNCT	O	_
45 | 8	Germany	PROPN	B-LOC	SpaceAfter=No
46 | 9	.	PUNCT	O	_
47 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/conllu/train.conllup:
--------------------------------------------------------------------------------
 1 | # global.columns = id form upos ner misc
 2 | # text = Larry Page and Sergey Brin founded Google.
 3 | # relations = 7;7;1;2;founded_by|7;7;4;5;founded_by
 4 | 1	Larry	PROPN	B-PER	_
 5 | 2	Page	PROPN	I-PER	_
 6 | 3	and	CCONJ	O	_
 7 | 4	Sergey	PROPN	B-PER	_
 8 | 5	Brin	PROPN	I-PER	_
 9 | 6	founded	VERB	O	_
10 | 7	Google	PROPN	B-ORG	SpaceAfter=No
11 | 8	.	PUNCT	O	_
12 | 
13 | # text = Microsoft was founded by Bill Gates.
14 | # relations = 1;1;5;6;founded_by
15 | 1	Microsoft	PROPN	B-ORG	_
16 | 2	was	AUX	O	_
17 | 3	founded	VERB	O	_
18 | 4	by	ADP	O	_
19 | 5	Bill	PROPN	B-PER	_
20 | 6	Gates	PROPN	I-PER	SpaceAfter=No
21 | 7	.	PUNCT	O	_
22 | 
23 | # text = Konrad Zuse was born in Berlin on 22 June 1910.
24 | # relations = 6;6;1;2;place_of_birth
25 | 1	Konrad	PROPN	B-PER	_
26 | 2	Zuse	PROPN	I-PER	_
27 | 3	was	AUX	O	_
28 | 4	born	VERB	O	_
29 | 5	in	ADP	O	_
30 | 6	Berlin	PROPN	B-LOC	_
31 | 7	on	ADP	O	_
32 | 8	22	NUM	B-DATE	_
33 | 9	June	PROPN	I-DATE	_
34 | 10	1910	NUM	I-DATE	SpaceAfter=No
35 | 11	.	PUNCT	O	_
36 | 
37 | # text = Joseph Weizenbaum, a professor at MIT, was born in Berlin, Germany.
38 | # relations = 12;12;1;2;place_of_birth|14;14;1;2;place_of_birth
39 | 1	Joseph	PROPN	B-PER	_
40 | 2	Weizenbaum	PROPN	I-PER	SpaceAfter=No
41 | 3	,	PUNCT	O	_
42 | 4	a	DET	O	_
43 | 5	professor	NOUN	O	_
44 | 6	at	ADP	O	_
45 | 7	MIT	PROPN	B-ORG	SpaceAfter=No
46 | 8	,	PUNCT	O	_
47 | 9	was	AUX	O	_
48 | 10	born	VERB	O	_
49 | 11	in	ADP	O	_
50 | 12	Berlin	PROPN	B-LOC	SpaceAfter=No
51 | 13	,	PUNCT	O	_
52 | 14	Germany	PROPN	B-LOC	SpaceAfter=No
53 | 15	.	PUNCT	O	_
54 | 
55 | # text = The German-American computer scientist Joseph Weizenbaum (8 January 1923 - 5 March 2008) was born in Berlin.
56 | # relations = 21;21;7;8;place_of_birth
57 | 1	The	DET	O	_
58 | 2	German	PROPN	O	SpaceAfter=No
59 | 3	-	PUNCT	O	SpaceAfter=No
60 | 4	American	PROPN	O	_
61 | 5	computer	PROPN	O	_
62 | 6	scientist	NOUN	O	_
63 | 7	Joseph	PROPN	B-PER	_
64 | 8	Weizenbaum	PROPN	I-PER	_
65 | 9	(	PUNCT	O	SpaceAfter=No
66 | 10	8	NUM	O	_
67 | 11	January	PROPN	O	_
68 | 12	1923	NUM	O	_
69 | 13	-	SYM	O	_
70 | 14	5	NUM	O	_
71 | 15	March	PROPN	O	_
72 | 16	2008	NUM	O	SpaceAfter=No
73 | 17	)	PUNCT	O	_
74 | 18	was	PRON	O	_
75 | 19	born	ADV	O	_
76 | 20	in	ADP	O	_
77 | 21	Berlin	PROPN	B-LOC	SpaceAfter=No
78 | 22	.	PUNCT	O	_
79 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/conllu/universal_dependencies.conllu:
--------------------------------------------------------------------------------
1 | # text = They buy and sell books.
2 | 1    They     they    PRON    PRP    Case=Nom|Number=Plur               2    nsubj    2:nsubj|4:nsubj	_
3 | 2    buy      buy     VERB    VBP    Number=Plur|Person=3|Tense=Pres    0    root     0:root	_
4 | 3    and      and     CONJ    CC     _                                  4    cc       4:cc	_
5 | 4    sell     sell    VERB    VBP    Number=Plur|Person=3|Tense=Pres    2    conj     0:root|2:conj	_
6 | 5    books    book    NOUN    NNS    Number=Plur                        2    obj      2:obj|4:obj	SpaceAfter=No
7 | 6    .        .       PUNCT   .      _                                  2    punct    2:punct	_
8 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/example_images/i_love_berlin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/resources/tasks/example_images/i_love_berlin.png


--------------------------------------------------------------------------------
/tests/resources/tasks/fashion/eng.testa:
--------------------------------------------------------------------------------
 1 | Most _ _ O
 2 | wedding _ _ B-Occasion
 3 | dresses _ _ B-NominalProduct
 4 | , _ _ O
 5 | for _ _ O
 6 | example _ _ O
 7 | , _ _ O
 8 | are _ _ O
 9 | simply _ _ O
10 | too _ _ O
11 | enormous _ _ O
12 | and _ _ O
13 | terrifyingly _ _ O
14 | loaded _ _ O
15 | with _ _ O
16 | sentimental _ _ O
17 | value _ _ O
18 | for _ _ O
19 | DIY _ _ B-ProductDesign
20 | dyeing _ _ I-ProductDesign
21 | . _ _ O


--------------------------------------------------------------------------------
/tests/resources/tasks/fashion/eng.testb:
--------------------------------------------------------------------------------
 1 | For _ _ O
 2 | my _ _ O
 3 | Nurse _ _ B-NamedOrganizationBrand
 4 | Ratched _ _ I-NamedOrganizationBrand
 5 | dress _ _ B-NominalProduct
 6 | , _ _ O
 7 | I _ _ O
 8 | had _ _ O
 9 | brought _ _ O
10 | two _ _ O
11 | dyeing _ _ O
12 | options _ _ O
13 | — _ _ O
14 | one _ _ O
15 | more _ _ O
16 | ambitious _ _ O
17 | than _ _ O
18 | the _ _ O
19 | other _ _ O
20 | . _ _ O


--------------------------------------------------------------------------------
/tests/resources/tasks/fashion/eng.train:
--------------------------------------------------------------------------------
  1 | From _ _ O
  2 | the _ _ O
  3 | charming _ _ O
  4 | Arlésienne _ _ B-NamedPerson
  5 | to _ _ O
  6 | the _ _ O
  7 | shepherdess _ _ B-NominalProduct
  8 | in _ _ O
  9 | a _ _ O
 10 | fairy _ _ O
 11 | tale _ _ O
 12 | , _ _ O
 13 | with _ _ O
 14 | faille _ _ B-ProductPart
 15 | , _ _ O
 16 | piqué _ _ B-ProductPart
 17 | , _ _ O
 18 | taffeta _ _ B-ProductPart
 19 | , _ _ O
 20 | tulle _ _ B-ProductPart
 21 | , _ _ O
 22 | embroidery _ _ B-ProductPart
 23 | , _ _ O
 24 | lace _ _ B-ProductPart
 25 | , _ _ O
 26 | the _ _ O
 27 | repertoire _ _ B-ProductDesign
 28 | is _ _ O
 29 | inexhaustible _ _ O
 30 | . _ _ O
 31 | 
 32 | 
 33 | 
 34 | 
 35 | Subscribe _ _ O
 36 | to _ _ O
 37 | Highsnobiety _ _ B-NamedOrganizationPublisher
 38 | on _ _ O
 39 | YouTube _ _ B-NamedOrganizationOther
 40 | Eric _ _ B-NamedPerson
 41 | Schoenborn _ _ I-NamedPerson
 42 | and _ _ O
 43 | Ed _ _ B-NamedPerson
 44 | Selego _ _ I-NamedPerson
 45 | have _ _ O
 46 | joined _ _ O
 47 | forces _ _ O
 48 | with _ _ O
 49 | Nocturnal _ _ B-NamedOrganizationBrand
 50 | skate _ _ B-Activity
 51 | shop _ _ O
 52 | to _ _ O
 53 | turn _ _ O
 54 | Drexel _ _ B-NamedLocation
 55 | University _ _ I-NamedLocation
 56 | ’ _ _ O
 57 | s _ _ O
 58 | Leonard _ _ B-NamedLocation
 59 | Pearlstein _ _ I-NamedLocation
 60 | Gallery _ _ I-NamedLocation
 61 | into _ _ O
 62 | an _ _ O
 63 | interactive _ _ O
 64 | skate _ _ B-Activity
 65 | pop _ _ O
 66 | - _ _ O
 67 | up _ _ O
 68 | park _ _ O
 69 | . _ _ O
 70 | 
 71 | Philly _ _ B-NamedPerson
 72 | Radness _ _ I-NamedPerson
 73 | accounts _ _ O
 74 | for _ _ O
 75 | the _ _ O
 76 | second _ _ O
 77 | installment _ _ O
 78 | in _ _ O
 79 | the _ _ O
 80 | Phenomenal _ _ O
 81 | Radness _ _ O
 82 | project _ _ O
 83 | , _ _ O
 84 | after _ _ O
 85 | its _ _ O
 86 | debut _ _ S-Occasion
 87 | in _ _ O
 88 | Miami _ _ B-NamedLocation
 89 | a _ _ O
 90 | few _ _ O
 91 | years _ _ O
 92 | ago _ _ O
 93 | . _ _ O
 94 | 
 95 | Milan _ _ B-NamedLocation
 96 | was _ _ O
 97 | all _ _ O
 98 | the _ _ O
 99 | really _ _ O
100 | big _ _ O
101 | girls _ _ O
102 | . _ _ O
103 | 
104 | It _ _ O
105 | was _ _ O
106 | the _ _ O
107 | best _ _ O
108 | ! _ _ O
109 | 
110 | We _ _ O
111 | go _ _ O
112 | to _ _ O
113 | flea _ _ O
114 | markets _ _ O
115 | together _ _ O
116 | when _ _ O
117 | we _ _ O
118 | ' _ _ O
119 | re _ _ O
120 | in _ _ O
121 | LA _ _ B-NamedLocation
122 | . _ _ O


--------------------------------------------------------------------------------
/tests/resources/tasks/fashion_disjunct/eng.testa:
--------------------------------------------------------------------------------
 1 | Most _ _ O
 2 | wedding _ _ B-Occasion
 3 | dresses _ _ B-NominalProduct
 4 | , _ _ O
 5 | for _ _ O
 6 | example _ _ O
 7 | , _ _ O
 8 | are _ _ O
 9 | simply _ _ O
10 | too _ _ O
11 | enormous _ _ O
12 | and _ _ O
13 | terrifyingly _ _ B-CreativeWord
14 | loaded _ _ O
15 | with _ _ O
16 | sentimental _ _ O
17 | value _ _ O
18 | for _ _ O
19 | DIY _ _ B-ProductDesign
20 | dyeing _ _ I-ProductDesign
21 | . _ _ O


--------------------------------------------------------------------------------
/tests/resources/tasks/fashion_disjunct/eng.testb:
--------------------------------------------------------------------------------
 1 | For _ _ O
 2 | my _ _ O
 3 | Nurse _ _ B-NamedOrganizationBrand
 4 | Ratched _ _ I-NamedOrganizationBrand
 5 | dress _ _ B-NominalProduct
 6 | , _ _ O
 7 | I _ _ O
 8 | had _ _ O
 9 | brought _ _ O
10 | two _ _ O
11 | dyeing _ _ O
12 | options _ _ O
13 | — _ _ O
14 | one _ _ O
15 | more _ _ O
16 | ambitious _ _ B-Ambitiousness
17 | than _ _ O
18 | the _ _ O
19 | other _ _ O
20 | . _ _ O


--------------------------------------------------------------------------------
/tests/resources/tasks/fashion_disjunct/eng.train:
--------------------------------------------------------------------------------
  1 | From _ _ O
  2 | the _ _ O
  3 | charming _ _ O
  4 | Arlésienne _ _ B-NamedPerson
  5 | to _ _ O
  6 | the _ _ O
  7 | shepherdess _ _ O
  8 | in _ _ O
  9 | a _ _ O
 10 | fairy _ _ O
 11 | tale _ _ O
 12 | , _ _ O
 13 | with _ _ O
 14 | faille _ _ B-ProductPart
 15 | , _ _ O
 16 | piqué _ _ B-ProductPart
 17 | , _ _ O
 18 | taffeta _ _ B-ProductPart
 19 | , _ _ O
 20 | tulle _ _ B-ProductPart
 21 | , _ _ O
 22 | embroidery _ _ B-ProductPart
 23 | , _ _ O
 24 | lace _ _ B-ProductPart
 25 | , _ _ O
 26 | the _ _ O
 27 | repertoire _ _ O
 28 | is _ _ O
 29 | inexhaustible _ _ O
 30 | . _ _ O
 31 | 
 32 | 
 33 | 
 34 | 
 35 | Subscribe _ _ O
 36 | to _ _ O
 37 | Highsnobiety _ _ B-NamedOrganizationPublisher
 38 | on _ _ O
 39 | YouTube _ _ B-NamedOrganizationOther
 40 | Eric _ _ B-NamedPerson
 41 | Schoenborn _ _ I-NamedPerson
 42 | and _ _ O
 43 | Ed _ _ B-NamedPerson
 44 | Selego _ _ I-NamedPerson
 45 | have _ _ O
 46 | joined _ _ O
 47 | forces _ _ O
 48 | with _ _ O
 49 | Nocturnal _ _ B-NamedOrganizationBrand
 50 | skate _ _ B-Activity
 51 | shop _ _ O
 52 | to _ _ O
 53 | turn _ _ O
 54 | Drexel _ _ B-NamedLocation
 55 | University _ _ I-NamedLocation
 56 | ’ _ _ O
 57 | s _ _ O
 58 | Leonard _ _ B-NamedLocation
 59 | Pearlstein _ _ I-NamedLocation
 60 | Gallery _ _ I-NamedLocation
 61 | into _ _ O
 62 | an _ _ O
 63 | interactive _ _ O
 64 | skate _ _ B-Activity
 65 | pop _ _ O
 66 | - _ _ O
 67 | up _ _ O
 68 | park _ _ O
 69 | . _ _ O
 70 | 
 71 | Philly _ _ B-NamedPerson
 72 | Radness _ _ I-NamedPerson
 73 | accounts _ _ O
 74 | for _ _ O
 75 | the _ _ O
 76 | second _ _ O
 77 | installment _ _ O
 78 | in _ _ O
 79 | the _ _ O
 80 | Phenomenal _ _ O
 81 | Radness _ _ O
 82 | project _ _ O
 83 | , _ _ O
 84 | after _ _ O
 85 | its _ _ O
 86 | debut _ _ O
 87 | in _ _ O
 88 | Miami _ _ B-NamedLocation
 89 | a _ _ O
 90 | few _ _ O
 91 | years _ _ O
 92 | ago _ _ O
 93 | . _ _ O
 94 | 
 95 | Milan _ _ B-NamedLocation
 96 | was _ _ O
 97 | all _ _ O
 98 | the _ _ O
 99 | really _ _ O
100 | big _ _ O
101 | girls _ _ O
102 | . _ _ O
103 | 
104 | It _ _ O
105 | was _ _ O
106 | the _ _ O
107 | best _ _ O
108 | ! _ _ O
109 | 
110 | We _ _ O
111 | go _ _ O
112 | to _ _ O
113 | flea _ _ O
114 | markets _ _ O
115 | together _ _ O
116 | when _ _ O
117 | we _ _ O
118 | ' _ _ O
119 | re _ _ O
120 | in _ _ O
121 | LA _ _ B-NamedLocation
122 | . _ _ O


--------------------------------------------------------------------------------
/tests/resources/tasks/fashion_nodev/test.tsv:
--------------------------------------------------------------------------------
 1 | Most _ _ O
 2 | wedding _ _ B-Occasion
 3 | dresses _ _ B-NominalProduct
 4 | , _ _ O
 5 | for _ _ O
 6 | example _ _ O
 7 | , _ _ O
 8 | are _ _ O
 9 | simply _ _ O
10 | too _ _ O
11 | enormous _ _ O
12 | and _ _ O
13 | terrifyingly _ _ O
14 | loaded _ _ O
15 | with _ _ O
16 | sentimental _ _ O
17 | value _ _ O
18 | for _ _ O
19 | DIY _ _ B-ProductDesign
20 | dyeing _ _ I-ProductDesign
21 | . _ _ O


--------------------------------------------------------------------------------
/tests/resources/tasks/fashion_nodev/train.tsv:
--------------------------------------------------------------------------------
  1 | From _ _ O
  2 | the _ _ O
  3 | charming _ _ O
  4 | Arlésienne _ _ B-NamedPerson
  5 | to _ _ O
  6 | the _ _ O
  7 | shepherdess _ _ O
  8 | in _ _ O
  9 | a _ _ O
 10 | fairy _ _ O
 11 | tale _ _ O
 12 | , _ _ O
 13 | with _ _ O
 14 | faille _ _ B-ProductPart
 15 | , _ _ O
 16 | piqué _ _ B-ProductPart
 17 | , _ _ O
 18 | taffeta _ _ B-ProductPart
 19 | , _ _ O
 20 | tulle _ _ B-ProductPart
 21 | , _ _ O
 22 | embroidery _ _ B-ProductPart
 23 | , _ _ O
 24 | lace _ _ B-ProductPart
 25 | , _ _ O
 26 | the _ _ O
 27 | repertoire _ _ O
 28 | is _ _ O
 29 | inexhaustible _ _ O
 30 | . _ _ O
 31 | 
 32 | 
 33 | 
 34 | 
 35 | Subscribe _ _ O
 36 | to _ _ O
 37 | Highsnobiety _ _ B-NamedOrganizationPublisher
 38 | on _ _ O
 39 | YouTube _ _ B-NamedOrganizationOther
 40 | Eric _ _ B-NamedPerson
 41 | Schoenborn _ _ I-NamedPerson
 42 | and _ _ O
 43 | Ed _ _ B-NamedPerson
 44 | Selego _ _ I-NamedPerson
 45 | have _ _ O
 46 | joined _ _ O
 47 | forces _ _ O
 48 | with _ _ O
 49 | Nocturnal _ _ B-NamedOrganizationBrand
 50 | skate _ _ B-Activity
 51 | shop _ _ O
 52 | to _ _ O
 53 | turn _ _ O
 54 | Drexel _ _ B-NamedLocation
 55 | University _ _ I-NamedLocation
 56 | ’ _ _ O
 57 | s _ _ O
 58 | Leonard _ _ B-NamedLocation
 59 | Pearlstein _ _ I-NamedLocation
 60 | Gallery _ _ I-NamedLocation
 61 | into _ _ O
 62 | an _ _ O
 63 | interactive _ _ O
 64 | skate _ _ B-Activity
 65 | pop _ _ O
 66 | - _ _ O
 67 | up _ _ O
 68 | park _ _ O
 69 | . _ _ O
 70 | 
 71 | Philly _ _ B-NamedPerson
 72 | Radness _ _ I-NamedPerson
 73 | accounts _ _ O
 74 | for _ _ O
 75 | the _ _ O
 76 | second _ _ O
 77 | installment _ _ O
 78 | in _ _ O
 79 | the _ _ O
 80 | Phenomenal _ _ O
 81 | Radness _ _ O
 82 | project _ _ O
 83 | , _ _ O
 84 | after _ _ O
 85 | its _ _ O
 86 | debut _ _ O
 87 | in _ _ O
 88 | Miami _ _ B-NamedLocation
 89 | a _ _ O
 90 | few _ _ O
 91 | years _ _ O
 92 | ago _ _ O
 93 | . _ _ O
 94 | 
 95 | Milan _ _ B-NamedLocation
 96 | was _ _ O
 97 | all _ _ O
 98 | the _ _ O
 99 | really _ _ O
100 | big _ _ O
101 | girls _ _ O
102 | . _ _ O
103 | 
104 | It _ _ O
105 | was _ _ O
106 | the _ _ O
107 | best _ _ O
108 | ! _ _ O
109 | 
110 | We _ _ O
111 | go _ _ O
112 | to _ _ O
113 | flea _ _ O
114 | markets _ _ O
115 | together _ _ O
116 | when _ _ O
117 | we _ _ O
118 | ' _ _ O
119 | re _ _ O
120 | in _ _ O
121 | LA _ _ B-NamedLocation
122 | . _ _ O


--------------------------------------------------------------------------------
/tests/resources/tasks/fewshot_conll/1shot.txt:
--------------------------------------------------------------------------------
 1 | Three	O
 2 | Russian	B-MISC
 3 | servicemen	O
 4 | were	O
 5 | killed	O
 6 | on	O
 7 | Saturday	O
 8 | when	O
 9 | unidentified	O
10 | gunmen	O
11 | attacked	O
12 | guards	O
13 | at	O
14 | an	O
15 | anti-aircraft	O
16 | installation	O
17 | outside	O
18 | Moscow	B-LOC
19 | ,	O
20 | Interfax	B-ORG
21 | news	O
22 | agency	O
23 | said	O
24 | .	O
25 | 
26 | "	O
27 | I	O
28 | think	O
29 | that	O
30 | ,	O
31 | on	O
32 | balance	O
33 | ,	O
34 | it	O
35 | is	O
36 | looking	O
37 | a	O
38 | little	O
39 | bit	O
40 | on	O
41 | the	O
42 | strong	O
43 | side	O
44 | ,	O
45 | "	O
46 | Lindsey	B-PER
47 | said	O
48 | .	O
49 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/imdb/README.md:
--------------------------------------------------------------------------------
 1 | ## IMDB
 2 | 
 3 | Data is taken from [here](http://ai.stanford.edu/~amaas/data/sentiment/).
 4 | 
 5 | The dataset contains data for a binary sentiment classification.
 6 | We took a small random sample and converted it to the expected format of our data fetcher:
 7 | ```
 8 | __label__<class_name> <text>
 9 | ```
10 | 
11 | #### Publications Using the Dataset
12 | 
13 | * Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng, and Christopher Potts. (2011). Learning Word Vectors for Sentiment Analysis. The 49th Annual Meeting of the Association for Computational Linguistics (ACL 2011).
14 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/imdb_underscore/README.md:
--------------------------------------------------------------------------------
 1 | ## IMDB
 2 | 
 3 | Data is taken from [here](http://ai.stanford.edu/~amaas/data/sentiment/).
 4 | 
 5 | The dataset contains data for a binary sentiment classification.
 6 | We took a small random sample and converted it to the expected format of our data fetcher:
 7 | ```
 8 | __label__<class_name> <text>
 9 | ```
10 | 
11 | #### Publications Using the Dataset
12 | 
13 | * Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng, and Christopher Potts. (2011). Learning Word Vectors for Sentiment Analysis. The 49th Annual Meeting of the Association for Computational Linguistics (ACL 2011).
14 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/jsonl/testa.jsonl:
--------------------------------------------------------------------------------
1 | {"id": 101319, "data": "This is New Berlin", "label": [[8, 18, "LOC"]], "metadata": [["from", 123]]}
2 | {"id": 101320, "data": "EU rejects German call to boycott British lamb .", "label": [[0, 2, "ORG"], [11, 17, "MISC"], [34, 46, "MISC"]], "metadata": [["from", 124]]}
3 | {"id": 101321, "data": "Peter Blackburn", "label": [[0, 15, "PER"]], "metadata": [["from", 125]]}
4 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/jsonl/testb.jsonl:
--------------------------------------------------------------------------------
1 | {"id": 101319, "data": "This is New Berlin", "label": [[8, 18, "LOC"]]}
2 | {"id": 101320, "data": "EU rejects German call to boycott British lamb .", "label": [[0, 2, "ORG"], [11, 17, "MISC"], [34, 46, "MISC"]]}
3 | {"id": 101321, "data": "Peter Blackburn", "label": [[0, 15, "PER"]]}
4 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/jsonl/train.jsonl:
--------------------------------------------------------------------------------
1 | {"id": 101319, "data": "This is New Berlin", "label": [[8, 18, "LOC"]]}
2 | {"id": 101319, "data": "This is New Berlin.", "label": [[8, 18, "LOC"]]}
3 | {"id": 101319, "data": "This is New Berlin.", "label": [[8, 19, "LOC"]]}
4 | {"id": 101320, "data": "EU rejects German call to boycott British lamb .", "label": [[0, 2, "ORG"], [11, 17, "MISC"], [34, 46, "MISC"]]}
5 | {"id": 101321, "data": "Peter Blackburn", "label": [[0, 15, "PER"]]}
6 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class/dev.txt:
--------------------------------------------------------------------------------
1 | __label__apple apple
2 | __label__tv tv
3 | __label__guitar guitar
4 | __label__apple __label__tv apple tv


--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class/test.txt:
--------------------------------------------------------------------------------
1 | __label__guitar guitar
2 | __label__apple apple
3 | __label__tv tv
4 | __label__apple __label__tv apple tv
5 | __label__apple __label__guitar apple tv


--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class/train.txt:
--------------------------------------------------------------------------------
1 | __label__tv tv
2 | __label__apple __label__tv apple tv
3 | __label__apple apple
4 | __label__tv tv
5 | __label__apple __label__tv apple tv
6 | __label__guitar guitar
7 | __label__guitar guitar


--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class_negative_examples/dev.txt:
--------------------------------------------------------------------------------
1 | __label__apple apple
2 | __label__tv tv
3 | __label__guitar guitar
4 | __label__apple __label__tv apple tv
5 |  dev example without labels
6 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class_negative_examples/test.txt:
--------------------------------------------------------------------------------
1 | __label__guitar guitar
2 | __label__apple apple
3 | __label__tv tv
4 | __label__apple __label__tv apple tv
5 | __label__apple __label__guitar apple tv
6 | test example without labels
7 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class_negative_examples/train.txt:
--------------------------------------------------------------------------------
1 | __label__tv tv
2 | __label__apple __label__tv apple tv
3 | __label__apple apple
4 | __label__tv tv
5 | __label__apple __label__tv apple tv
6 | __label__guitar guitar
7 | __label__guitar guitar
8 | train example without labels
9 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/ner_german_germeval/NER-de-dev.tsv:
--------------------------------------------------------------------------------
 1 | #	http://de.wikipedia.org/wiki/Toyota_Crown	[2009-08-13]
 2 | 1	1980	O	O
 3 | 2	kam	O	O
 4 | 3	der	O	O
 5 | 4	Crown	B-OTH	O
 6 | 5	als	O	O
 7 | 6	Versuch	O	O
 8 | 7	von	O	O
 9 | 8	Toyota	B-ORG	O
10 | 9	,	O	O
11 | 10	sich	O	O
12 | 11	in	O	O
13 | 12	der	O	O
14 | 13	Oberen	O	O
15 | 14	Mittelklasse	O	O
16 | 15	zu	O	O
17 | 16	etablieren	O	O
18 | 17	,	O	O
19 | 18	auch	O	O
20 | 19	nach	O	O
21 | 20	Deutschland	B-LOC	O
22 | 21	.	O	O
23 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/ner_german_germeval/NER-de-test.tsv:
--------------------------------------------------------------------------------
 1 | #	http://de.wikipedia.org/wiki/Schönburg_(Rhein)	[2009-10-23]
 2 | 1	1951	O	O
 3 | 2	bis	O	O
 4 | 3	1953	O	O
 5 | 4	wurde	O	O
 6 | 5	der	O	O
 7 | 6	nördliche	O	O
 8 | 7	Teil	O	O
 9 | 8	als	O	O
10 | 9	Jugendburg	O	O
11 | 10	des	O	O
12 | 11	Kolpingwerkes	B-OTH	O
13 | 12	gebaut	O	O
14 | 13	.	O	O


--------------------------------------------------------------------------------
/tests/resources/tasks/ner_german_germeval/NER-de-train.tsv:
--------------------------------------------------------------------------------
 1 | #	n-tv.de vom 26.02.2005	[2005-02-26]
 2 | 1	Schartau	B-PER	O
 3 | 2	sagte	O	O
 4 | 3	dem	O	O
 5 | 4	"	O	O
 6 | 5	Tagesspiegel	B-ORG	O
 7 | 6	"	O	O
 8 | 7	vom	O	O
 9 | 8	Freitag	O	O
10 | 9	,	O	O
11 | 10	Fischer	B-PER	O
12 | 11	sei	O	O
13 | 12	"	O	O
14 | 13	in	O	O
15 | 14	einer	O	O
16 | 15	Weise	O	O
17 | 16	aufgetreten	O	O
18 | 17	,	O	O
19 | 18	die	O	O
20 | 19	alles	O	O
21 | 20	andere	O	O
22 | 21	als	O	O
23 | 22	überzeugend	O	O
24 | 23	war	O	O
25 | 24	"	O	O
26 | 25	.	O	O
27 | 
28 | #	welt.de vom 29.10.2005	[2005-10-29]
29 | 1	Firmengründer	O	O
30 | 2	Wolf	B-PER	O
31 | 3	Peter	I-PER	O
32 | 4	Bree	I-PER	O
33 | 5	arbeitete	O	O
34 | 6	Anfang	O	O
35 | 7	der	O	O
36 | 8	siebziger	O	O
37 | 9	Jahre	O	O
38 | 10	als	O	O
39 | 11	Möbelvertreter	O	O
40 | 12	,	O	O
41 | 13	als	O	O
42 | 14	er	O	O
43 | 15	einen	O	O
44 | 16	fliegenden	O	O
45 | 17	Händler	O	O
46 | 18	aus	O	O
47 | 19	dem	O	O
48 | 20	Libanon	B-LOC	O
49 | 21	traf	O	O
50 | 22	.	O	O


--------------------------------------------------------------------------------
/tests/resources/tasks/ontonotes/tiny-conll-2012.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/resources/tasks/ontonotes/tiny-conll-2012.zip


--------------------------------------------------------------------------------
/tests/resources/tasks/regression/README.md:
--------------------------------------------------------------------------------
 1 | ## REGRESSION
 2 | 
 3 | Data is taken from [here](http://saifmohammad.com/WebPages/EmotionIntensity-SharedTask.html).
 4 | 
 5 | The dataset contains a collection of tweets with joy intensity value.
 6 | We took the joy dataset and converted it to the expected format of our data fetcher:
 7 | ```
 8 | __label__<joy_intensity> <text>
 9 | ```
10 | 
11 | #### Publication About the Dataset
12 | 
13 | * Emotion Intensities in Tweets. Saif M. Mohammad and Felipe Bravo-Marquez. In Proceedings of the sixth joint conference on lexical and computational semantics (*Sem), August 2017, Vancouver, Canada.
14 | * WASSA-2017 Shared Task on Emotion Intensity. Saif M. Mohammad and Felipe Bravo-Marquez. In Proceedings of the EMNLP 2017 Workshop on Computational Approaches to Subjectivity, Sentiment, and Social Media (WASSA), September 2017, Copenhagen, Denmark.
15 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/span_labels/span_first.txt:
--------------------------------------------------------------------------------
1 | Vgl. O
2 | Rundschreiben O
3 | RAB PARTA
4 | 1/2010 YEAR
5 | Rz MISC
6 | 8. MISC


--------------------------------------------------------------------------------
/tests/resources/tasks/span_labels/span_second.txt:
--------------------------------------------------------------------------------
1 | -DOCSTART-
2 | 
3 | Vgl. O
4 | Rundschreiben O
5 | RAB PARTA
6 | 1/2010 YEAR
7 | Rz MISC
8 | 8. MISC


--------------------------------------------------------------------------------
/tests/resources/tasks/span_labels/span_third.txt:
--------------------------------------------------------------------------------
 1 | -DOCSTART-
 2 | 
 3 | Rundschreiben O
 4 | 
 5 | Vgl. O
 6 | Rundschreiben O
 7 | RAB PARTA
 8 | 1/2010 YEAR
 9 | Rz MISC
10 | 8. MISC


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes/dev.txt:
--------------------------------------------------------------------------------
 1 | this O
 2 | is O
 3 | New B-LOC
 4 | York I-LOC
 5 | 
 6 | this O
 7 | is O
 8 | Berlin B-LOC
 9 | 
10 | here O
11 | is O
12 | New B-LOC
13 | York I-LOC
14 | 
15 | here O
16 | is O
17 | Berlin B-LOC
18 | 
19 | I O
20 | like O
21 | New B-LOC
22 | York I-LOC
23 | 
24 | I O
25 | like O
26 | Berlin B-LOC
27 | 
28 | we O
29 | like O
30 | New B-LOC
31 | York I-LOC
32 | 
33 | we O
34 | like O
35 | Berlin B-LOC
36 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes/test.txt:
--------------------------------------------------------------------------------
 1 | this O
 2 | is O
 3 | New B-LOC
 4 | York I-LOC
 5 | 
 6 | this O
 7 | is O
 8 | Berlin B-LOC
 9 | 
10 | here O
11 | is O
12 | New B-LOC
13 | York I-LOC
14 | 
15 | here O
16 | is O
17 | Berlin B-LOC
18 | 
19 | I O
20 | like O
21 | New B-LOC
22 | York I-LOC
23 | 
24 | I O
25 | like O
26 | Berlin B-LOC
27 | 
28 | we O
29 | like O
30 | New B-LOC
31 | York I-LOC
32 | 
33 | we O
34 | like O
35 | Berlin B-LOC
36 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes/train.txt:
--------------------------------------------------------------------------------
 1 | this O
 2 | is O
 3 | New B-LOC
 4 | York I-LOC
 5 | 
 6 | this O
 7 | is O
 8 | Berlin B-LOC
 9 | 
10 | here O
11 | is O
12 | New B-LOC
13 | York I-LOC
14 | 
15 | here O
16 | is O
17 | Berlin B-LOC
18 | 
19 | I O
20 | like O
21 | New B-LOC
22 | York I-LOC
23 | 
24 | I O
25 | like O
26 | Berlin B-LOC
27 | 
28 | we O
29 | like O
30 | New B-LOC
31 | York I-LOC
32 | 
33 | we O
34 | like O
35 | Berlin B-LOC
36 | 
37 | this O
38 | is O
39 | New B-LOC
40 | York I-LOC
41 | 
42 | this O
43 | is O
44 | Berlin B-LOC
45 | 
46 | here O
47 | is O
48 | New B-LOC
49 | York I-LOC
50 | 
51 | here O
52 | is O
53 | Berlin B-LOC
54 | 
55 | I O
56 | like O
57 | New B-LOC
58 | York I-LOC
59 | 
60 | I O
61 | like O
62 | Berlin B-LOC
63 | 
64 | we O
65 | like O
66 | New B-LOC
67 | York I-LOC
68 | 
69 | we O
70 | like O
71 | Berlin B-LOC


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes_with_boundaries/dev.txt:
--------------------------------------------------------------------------------
 1 | this O
 2 | is O
 3 | New B-LOC
 4 | York I-LOC
 5 | 
 6 | here O
 7 | is O
 8 | New B-LOC
 9 | York I-LOC
10 | 
11 | I O
12 | like O
13 | New B-LOC
14 | York I-LOC
15 | 
16 | we O
17 | like O
18 | New B-LOC
19 | York I-LOC
20 | 
21 | -DOCSTART-
22 | 
23 | this O
24 | is O
25 | Berlin B-LOC
26 | 
27 | here O
28 | is O
29 | Berlin B-LOC
30 | 
31 | I O
32 | like O
33 | Berlin B-LOC
34 | 
35 | we O
36 | like O
37 | Berlin B-LOC


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes_with_boundaries/test.txt:
--------------------------------------------------------------------------------
 1 | this O
 2 | is O
 3 | New B-LOC
 4 | York I-LOC
 5 | 
 6 | here O
 7 | is O
 8 | New B-LOC
 9 | York I-LOC
10 | 
11 | I O
12 | like O
13 | New B-LOC
14 | York I-LOC
15 | 
16 | we O
17 | like O
18 | New B-LOC
19 | York I-LOC
20 | 
21 | -DOCSTART-
22 | 
23 | this O
24 | is O
25 | Berlin B-LOC
26 | 
27 | here O
28 | is O
29 | Berlin B-LOC
30 | 
31 | I O
32 | like O
33 | Berlin B-LOC
34 | 
35 | we O
36 | like O
37 | Berlin B-LOC
38 | 
39 | -DOCSTART-


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes_with_boundaries/train.txt:
--------------------------------------------------------------------------------
 1 | this O
 2 | is O
 3 | New B-LOC
 4 | York I-LOC
 5 | 
 6 | here O
 7 | is O
 8 | New B-LOC
 9 | York I-LOC
10 | 
11 | I O
12 | like O
13 | New B-LOC
14 | York I-LOC
15 | 
16 | we O
17 | like O
18 | New B-LOC
19 | York I-LOC
20 | 
21 | -DOCSTART-
22 | 
23 | this O
24 | is O
25 | Berlin B-LOC
26 | 
27 | here O
28 | is O
29 | Berlin B-LOC
30 | 
31 | I O
32 | like O
33 | Berlin B-LOC
34 | 
35 | we O
36 | like O
37 | Berlin B-LOC
38 | 
39 | -DOCSTART-
40 | 
41 | this O
42 | is O
43 | New B-LOC
44 | York I-LOC
45 | 
46 | here O
47 | is O
48 | New B-LOC
49 | York I-LOC
50 | 
51 | I O
52 | like O
53 | New B-LOC
54 | York I-LOC
55 | 
56 | we O
57 | like O
58 | New B-LOC
59 | York I-LOC


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_multi/dev.txt:
--------------------------------------------------------------------------------
 1 | __label__pizza this is pizza
 2 | __label__Berlin this is Berlin
 3 | __label__Berlin __label__pizza this is Berlin and pizza
 4 | __label__pizza here is pizza
 5 | __label__Berlin here is Berlin
 6 | __label__Berlin __label__pizza here is Berlin and pizza
 7 | __label__pizza I like pizza
 8 | __label__Berlin I like Berlin
 9 | __label__Berlin __label__pizza I like Berlin and pizza
10 | __label__pizza we like pizza
11 | __label__Berlin we like Berlin
12 | __label__Berlin __label__pizza we like Berlin and pizza


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_multi/test.txt:
--------------------------------------------------------------------------------
 1 | __label__pizza this is pizza
 2 | __label__Berlin this is Berlin
 3 | __label__Berlin __label__pizza this is Berlin and pizza
 4 | __label__pizza here is pizza
 5 | __label__Berlin here is Berlin
 6 | __label__Berlin __label__pizza here is Berlin and pizza
 7 | __label__pizza I like pizza
 8 | __label__Berlin I like Berlin
 9 | __label__Berlin __label__pizza I like Berlin and pizza
10 | __label__pizza we like pizza
11 | __label__Berlin we like Berlin
12 | __label__Berlin __label__pizza we like Berlin and pizza


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_multi/train.txt:
--------------------------------------------------------------------------------
 1 | __label__pizza this is pizza
 2 | __label__Berlin this is Berlin
 3 | __label__Berlin __label__pizza this is Berlin and pizza
 4 | __label__pizza here is pizza
 5 | __label__Berlin here is Berlin
 6 | __label__Berlin __label__pizza here is Berlin and pizza
 7 | __label__pizza I like pizza
 8 | __label__Berlin I like Berlin
 9 | __label__Berlin __label__pizza I like Berlin and pizza
10 | __label__pizza we like pizza
11 | __label__Berlin we like Berlin
12 | __label__Berlin __label__pizza we like Berlin and pizza
13 | __label__pizza this is pizza
14 | __label__Berlin this is Berlin
15 | __label__Berlin __label__pizza this is Berlin and pizza
16 | __label__pizza here is pizza
17 | __label__Berlin here is Berlin
18 | __label__Berlin __label__pizza here is Berlin and pizza
19 | __label__pizza I like pizza
20 | __label__Berlin I like Berlin
21 | __label__Berlin __label__pizza I like Berlin and pizza
22 | __label__pizza we like pizza
23 | __label__Berlin we like Berlin
24 | __label__Berlin __label__pizza we like Berlin and pizza


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_single/dev.txt:
--------------------------------------------------------------------------------
1 | __label__New_York this is New York
2 | __label__Berlin this is Berlin
3 | __label__New_York here is New York
4 | __label__Berlin here is Berlin
5 | __label__New_York I like New York
6 | __label__Berlin I like Berlin
7 | __label__New_York we like New York
8 | __label__Berlin we like Berlin
9 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_single/test.txt:
--------------------------------------------------------------------------------
1 | __label__New_York this is New York
2 | __label__Berlin this is Berlin
3 | __label__New_York here is New York
4 | __label__Berlin here is Berlin
5 | __label__New_York I like New York
6 | __label__Berlin I like Berlin
7 | __label__New_York we like New York
8 | __label__Berlin we like Berlin
9 | 


--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_single/train.txt:
--------------------------------------------------------------------------------
 1 | __label__New_York this is New York
 2 | __label__Berlin this is Berlin
 3 | __label__New_York here is New York
 4 | __label__Berlin here is Berlin
 5 | __label__New_York I like New York
 6 | __label__Berlin I like Berlin
 7 | __label__New_York we like New York
 8 | __label__Berlin we like Berlin
 9 | __label__New_York this is New York
10 | __label__Berlin this is Berlin
11 | __label__New_York here is New York
12 | __label__Berlin here is Berlin
13 | __label__New_York I like New York
14 | __label__Berlin I like Berlin
15 | __label__New_York we like New York
16 | __label__Berlin we like Berlin


--------------------------------------------------------------------------------
/tests/resources/tasks/ud_english/en_ewt-ud-dev.conllu:
--------------------------------------------------------------------------------
 1 | # newdoc id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713
 2 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0001
 3 | # text = From the AP comes this story :
 4 | 1	From	from	ADP	IN	_	3	case	3:case	_
 5 | 2	the	the	DET	DT	Definite=Def|PronType=Art	3	det	3:det	_
 6 | 3	AP	AP	PROPN	NNP	Number=Sing	4	obl	4:obl:from	_
 7 | 4	comes	come	VERB	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	0	root	0:root	_
 8 | 5	this	this	DET	DT	Number=Sing|PronType=Dem	6	det	6:det	_
 9 | 6	story	story	NOUN	NN	Number=Sing	4	nsubj	4:nsubj	_
10 | 7	:	:	PUNCT	:	_	4	punct	4:punct	_
11 | 
12 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0002
13 | # text = President Bush on Tuesday nominated two individuals to replace retiring jurists on federal courts in the Washington area.
14 | 1	President	President	PROPN	NNP	Number=Sing	5	nsubj	5:nsubj	_
15 | 2	Bush	Bush	PROPN	NNP	Number=Sing	1	flat	1:flat	_
16 | 3	on	on	ADP	IN	_	4	case	4:case	_
17 | 4	Tuesday	Tuesday	PROPN	NNP	Number=Sing	5	obl	5:obl:on	_
18 | 5	nominated	nominate	VERB	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	0	root	0:root	_
19 | 6	two	two	NUM	CD	NumType=Card	7	nummod	7:nummod	_
20 | 7	individuals	individual	NOUN	NNS	Number=Plur	5	obj	5:obj	_
21 | 8	to	to	PART	TO	_	9	mark	9:mark	_
22 | 9	replace	replace	VERB	VB	VerbForm=Inf	5	advcl	5:advcl:to	_
23 | 10	retiring	retire	VERB	VBG	VerbForm=Ger	11	amod	11:amod	_
24 | 11	jurists	jurist	NOUN	NNS	Number=Plur	9	obj	9:obj	_
25 | 12	on	on	ADP	IN	_	14	case	14:case	_
26 | 13	federal	federal	ADJ	JJ	Degree=Pos	14	amod	14:amod	_
27 | 14	courts	court	NOUN	NNS	Number=Plur	11	nmod	11:nmod:on	_
28 | 15	in	in	ADP	IN	_	18	case	18:case	_
29 | 16	the	the	DET	DT	Definite=Def|PronType=Art	18	det	18:det	_
30 | 17	Washington	Washington	PROPN	NNP	Number=Sing	18	compound	18:compound	_
31 | 18	area	area	NOUN	NN	Number=Sing	14	nmod	14:nmod:in	SpaceAfter=No
32 | 19	.	.	PUNCT	.	_	5	punct	5:punct	_


--------------------------------------------------------------------------------
/tests/resources/tasks/up_english/en_ewt-up-dev.conllu:
--------------------------------------------------------------------------------
 1 | # newdoc id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713
 2 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0001
 3 | # text = From the AP comes this story :
 4 | 1	From	from	ADP	IN	_	3	case	3:case	_	_	_
 5 | 2	the	the	DET	DT	Definite=Def|PronType=Art	3	det	3:det	_	_	_
 6 | 3	AP	AP	PROPN	NNP	Number=Sing	4	obl	4:obl:from	_	_	ARG2
 7 | 4	comes	come	VERB	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	0	root	0:root	_	come.03	V
 8 | 5	this	this	DET	DT	Number=Sing|PronType=Dem	6	det	6:det	_	_	_
 9 | 6	story	story	NOUN	NN	Number=Sing	4	nsubj	4:nsubj	_	_	ARG1
10 | 7	:	:	PUNCT	:	_	4	punct	4:punct	_	_	_
11 | 
12 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0002
13 | # text = President Bush on Tuesday nominated two individuals to replace retiring jurists on federal courts in the Washington area.
14 | 1	President	President	PROPN	NNP	Number=Sing	5	nsubj	5:nsubj	_	_	ARG0	_
15 | 2	Bush	Bush	PROPN	NNP	Number=Sing	1	flat	1:flat	_	_	_	_
16 | 3	on	on	ADP	IN	_	4	case	4:case	_	_	_	_
17 | 4	Tuesday	Tuesday	PROPN	NNP	Number=Sing	5	obl	5:obl:on	_	_	ARGM-TMP	_
18 | 5	nominated	nominate	VERB	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	0	root	0:root	_	nominate.01	V	_
19 | 6	two	two	NUM	CD	NumType=Card	7	nummod	7:nummod	_	_	_	_
20 | 7	individuals	individual	NOUN	NNS	Number=Plur	5	obj	5:obj	_	_	ARG1	ARG0
21 | 8	to	to	PART	TO	_	9	mark	9:mark	_	_	_	_
22 | 9	replace	replace	VERB	VB	VerbForm=Inf	5	advcl	5:advcl:to	_	replace.01	ARG2	V
23 | 10	retiring	retire	VERB	VBG	VerbForm=Ger	11	amod	11:amod	_	_	_	_
24 | 11	jurists	jurist	NOUN	NNS	Number=Plur	9	obj	9:obj	_	_	_	ARG1
25 | 12	on	on	ADP	IN	_	14	case	14:case	_	_	_	_
26 | 13	federal	federal	ADJ	JJ	Degree=Pos	14	amod	14:amod	_	_	_	_
27 | 14	courts	court	NOUN	NNS	Number=Plur	11	nmod	11:nmod:on	_	_	_	_
28 | 15	in	in	ADP	IN	_	18	case	18:case	_	_	_	_
29 | 16	the	the	DET	DT	Definite=Def|PronType=Art	18	det	18:det	_	_	_	_
30 | 17	Washington	Washington	PROPN	NNP	Number=Sing	18	compound	18:compound	_	_	_	_
31 | 18	area	area	NOUN	NN	Number=Sing	14	nmod	14:nmod:in	SpaceAfter=No	_	_	_
32 | 19	.	.	PUNCT	.	_	5	punct	5:punct	_	_	_	_


--------------------------------------------------------------------------------
/tests/resources/tasks/up_english/en_ewt-up-test.conllu:
--------------------------------------------------------------------------------
 1 | # newdoc id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200
 2 | # sent_id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200-0001
 3 | # text = What if Google Morphed Into GoogleOS?
 4 | 1	What	what	PRON	WP	PronType=Int	0	root	0:root	_	_	_
 5 | 2	if	if	SCONJ	IN	_	4	mark	4:mark	_	_	_
 6 | 3	Google	Google	PROPN	NNP	Number=Sing	4	nsubj	4:nsubj	_	_	ARG1
 7 | 4	Morphed	morph	VERB	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	1	advcl	1:advcl:if	_	morph.01	V
 8 | 5	Into	into	ADP	IN	_	6	case	6:case	_	_	_
 9 | 6	GoogleOS	GoogleOS	PROPN	NNP	Number=Sing	4	obl	4:obl:into	SpaceAfter=No	_	ARG2
10 | 7	?	?	PUNCT	.	_	4	punct	4:punct	_	_	_
11 | 
12 | # sent_id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200-0002
13 | # text = What if Google expanded on its search-engine (and now e-mail) wares into a full-fledged operating system?
14 | 1	What	what	PRON	WP	PronType=Int	0	root	0:root	_	_	_
15 | 2	if	if	SCONJ	IN	_	4	mark	4:mark	_	_	_
16 | 3	Google	Google	PROPN	NNP	Number=Sing	4	nsubj	4:nsubj	_	_	ARG0
17 | 4	expanded	expand	VERB	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	1	advcl	1:advcl:if	_	expand.01	V
18 | 5	on	on	ADP	IN	_	15	case	15:case	_	_	_
19 | 6	its	its	PRON	PRP$	Gender=Neut|Number=Sing|Person=3|Poss=Yes|PronType=Prs	15	nmod:poss	15:nmod:poss	_	_	_
20 | 7	search	search	NOUN	NN	Number=Sing	9	compound	9:compound	SpaceAfter=No	_	_
21 | 8	-	-	PUNCT	HYPH	_	9	punct	9:punct	SpaceAfter=No	_	_
22 | 9	engine	engine	NOUN	NN	Number=Sing	15	compound	15:compound	_	_	_
23 | 10	(	(	PUNCT	-LRB-	_	9	punct	9:punct	SpaceAfter=No	_	_
24 | 11	and	and	CCONJ	CC	_	13	cc	13:cc	_	_	_
25 | 12	now	now	ADV	RB	_	13	advmod	13:advmod	_	_	_
26 | 13	e-mail	e-mail	NOUN	NN	Number=Sing	9	conj	9:conj:and|15:compound	SpaceAfter=No	_	_
27 | 14	)	)	PUNCT	-RRB-	_	15	punct	15:punct	_	_	_
28 | 15	wares	wares	NOUN	NNS	Number=Plur	4	obl	4:obl:on	_	_	ARG1
29 | 16	into	into	ADP	IN	_	22	case	22:case	_	_	_
30 | 17	a	a	DET	DT	Definite=Ind|PronType=Art	22	det	22:det	_	_	_
31 | 18	full	full	ADV	RB	_	20	advmod	20:advmod	SpaceAfter=No	_	_
32 | 19	-	-	PUNCT	HYPH	_	20	punct	20:punct	SpaceAfter=No	_	_
33 | 20	fledged	fledged	ADJ	JJ	Degree=Pos	22	amod	22:amod	_	_	_
34 | 21	operating	operating	NOUN	NN	Number=Sing	22	compound	22:compound	_	_	_
35 | 22	system	system	NOUN	NN	Number=Sing	4	obl	4:obl:into	SpaceAfter=No	_	ARG4
36 | 23	?	?	PUNCT	.	_	4	punct	4:punct	_	_	_


--------------------------------------------------------------------------------
/tests/resources/visual/snippet.txt:
--------------------------------------------------------------------------------
 1 | The U.S. Centers for Disease Control and Prevention initially advised school systems to close if outbreaks occurred , then reversed itself , saying the apparent mildness of the virus meant most schools and day care centers should stay open , even if they had confirmed cases of swine flu .
 2 | When Ms. Winfrey invited Suzanne Somers to share her controversial views about bio-identical hormone treatment on her syndicated show in 2009 , it won Ms. Winfrey a rare dollop of unflattering press , including a Newsweek cover story titled " Crazy Talk : Oprah , Wacky Cures & You . "
 3 | Elk calling -- a skill that hunters perfected long ago to lure game with the promise of a little romance -- is now its own sport .
 4 | Don 't !
 5 | Fish , ranked 98th in the world , fired 22 aces en route to a 6-3 , 6-7 ( 5 / 7 ) , 7-6 ( 7 / 4 ) win over seventh-seeded Argentinian David Nalbandian .
 6 | Why does everything have to become such a big issue ?
 7 | AMMAN ( Reuters ) - King Abdullah of Jordan will meet U.S. President Barack Obama in Washington on April 21 to lobby on behalf of Arab states for a stronger U.S. role in Middle East peacemaking , palace officials said on Sunday .
 8 | To help keep traffic flowing the Congestion Charge will remain in operation through-out the strike and TfL will be suspending road works on major London roads wherever possible .
 9 | If no candidate wins an absolute majority , there will be a runoff between the top two contenders , most likely in mid-October .
10 | Authorities previously served search warrants at Murray 's Las Vegas home and his businesses in Las Vegas and Houston .


--------------------------------------------------------------------------------
/tests/test_lemmatizer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import flair
 4 | from flair.data import Sentence
 5 | from flair.models import Lemmatizer
 6 | 
 7 | 
 8 | def test_words_to_char_indices():
 9 |     sentence = Sentence("Hello look what a beautiful day!")
10 | 
11 |     lemmatizer = Lemmatizer()  # lemmatizer uses standard char dictionary
12 | 
13 |     d = lemmatizer.dummy_index
14 |     e = lemmatizer.end_index
15 |     s = lemmatizer.start_index
16 | 
17 |     string_list = sentence.to_tokenized_string().split()
18 | 
19 |     # With end symbol, without start symbol, padding in front
20 |     target = torch.tensor(
21 |         [
22 |             [d, d, d, d, 55, 5, 15, 15, 12, e],
23 |             [d, d, d, d, d, 15, 12, 12, 28, e],
24 |             [d, d, d, d, d, 23, 13, 9, 8, e],
25 |             [d, d, d, d, d, d, d, d, 9, e],
26 |             [24, 5, 9, 16, 8, 7, 22, 16, 15, e],
27 |             [d, d, d, d, d, d, 14, 9, 27, e],
28 |             [d, d, d, d, d, d, d, d, 76, e],
29 |         ],
30 |         dtype=torch.long,
31 |     ).to(flair.device)
32 |     out = lemmatizer.words_to_char_indices(string_list, end_symbol=True, start_symbol=False, padding_in_front=True)
33 |     assert torch.equal(target, out)
34 | 
35 |     # Without end symbol, with start symbol, padding in back
36 |     target = torch.tensor(
37 |         [
38 |             [s, 55, 5, 15, 15, 12, d, d, d, d],
39 |             [s, 15, 12, 12, 28, d, d, d, d, d],
40 |             [s, 23, 13, 9, 8, d, d, d, d, d],
41 |             [s, 9, d, d, d, d, d, d, d, d],
42 |             [s, 24, 5, 9, 16, 8, 7, 22, 16, 15],
43 |             [s, 14, 9, 27, d, d, d, d, d, d],
44 |             [s, 76, d, d, d, d, d, d, d, d],
45 |         ],
46 |         dtype=torch.long,
47 |     ).to(flair.device)
48 |     out = lemmatizer.words_to_char_indices(string_list, end_symbol=False, start_symbol=True, padding_in_front=False)
49 |     assert torch.equal(target, out)
50 | 
51 |     # Without end symbol, without start symbol, padding in front
52 |     assert lemmatizer.words_to_char_indices(
53 |         string_list, end_symbol=False, start_symbol=False, padding_in_front=True
54 |     ).size() == (7, 9)
55 | 


--------------------------------------------------------------------------------
/tests/test_multitask.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import flair
 4 | from flair.data import Sentence
 5 | from flair.datasets import SENTEVAL_CR, SENTEVAL_SST_GRANULAR
 6 | from flair.embeddings import TransformerDocumentEmbeddings
 7 | from flair.models import MultitaskModel, TextClassifier
 8 | from flair.nn.multitask import make_multitask_model_and_corpus
 9 | from flair.trainers import ModelTrainer
10 | 
11 | 
12 | @pytest.mark.integration()
13 | def test_train_load_use_classifier(results_base_path, tasks_base_path):
14 |     # --- Embeddings that are shared by both models --- #
15 |     shared_embedding = TransformerDocumentEmbeddings("distilbert-base-uncased", fine_tune=True)
16 | 
17 |     # --- Task 1: Sentiment Analysis (5-class) --- #
18 |     flair.set_seed(123)
19 | 
20 |     # Define corpus and model
21 |     corpus_1 = SENTEVAL_SST_GRANULAR().downsample(0.01)
22 | 
23 |     model_1 = TextClassifier(
24 |         shared_embedding, label_dictionary=corpus_1.make_label_dictionary("class", add_unk=False), label_type="class"
25 |     )
26 | 
27 |     # -- Task 2: Binary Sentiment Analysis on Customer Reviews -- #
28 |     flair.set_seed(123)
29 | 
30 |     # Define corpus and model
31 |     corpus_2 = SENTEVAL_CR().downsample(0.01)
32 | 
33 |     model_2 = TextClassifier(
34 |         shared_embedding,
35 |         label_dictionary=corpus_2.make_label_dictionary("sentiment", add_unk=False),
36 |         label_type="sentiment",
37 |         inverse_model=True,
38 |     )
39 | 
40 |     # -- Define mapping (which tagger should train on which model) -- #
41 |     multitask_model, multicorpus = make_multitask_model_and_corpus(
42 |         [
43 |             (model_1, corpus_1),
44 |             (model_2, corpus_2),
45 |         ]
46 |     )
47 | 
48 |     # -- Create model trainer and train -- #
49 |     trainer = ModelTrainer(multitask_model, multicorpus)
50 | 
51 |     trainer.fine_tune(results_base_path, max_epochs=1)
52 | 
53 |     del trainer, multitask_model, corpus_1, corpus_2
54 |     loaded_model = MultitaskModel.load(results_base_path / "final-model.pt")
55 | 
56 |     sentence = Sentence("I love Berlin")
57 |     sentence_empty = Sentence("       ")
58 | 
59 |     loaded_model.predict(sentence)
60 |     loaded_model.predict([sentence, sentence_empty])
61 |     loaded_model.predict([sentence_empty])
62 | 
63 |     for label in sentence.labels:
64 |         assert label.value is not None
65 |         assert 0.0 <= label.score <= 1.0
66 |         assert isinstance(label.score, float)
67 |     del loaded_model
68 | 


--------------------------------------------------------------------------------
/tests/test_tars.py:
--------------------------------------------------------------------------------
 1 | from flair.data import Sentence
 2 | from flair.datasets import ClassificationCorpus
 3 | from flair.models import TARSClassifier
 4 | from flair.trainers import ModelTrainer
 5 | 
 6 | 
 7 | def test_init_tars_and_switch(tasks_base_path):
 8 |     # test corpus
 9 |     corpus = ClassificationCorpus(tasks_base_path / "imdb")
10 | 
11 |     # create a TARS classifier
12 |     tars = TARSClassifier(
13 |         task_name="2_CLASS",
14 |         label_dictionary=corpus.make_label_dictionary(label_type="class"),
15 |         label_type="class",
16 |     )
17 | 
18 |     # check if right number of classes
19 |     assert len(tars.get_current_label_dictionary()) == 2
20 | 
21 |     # switch to task with only one label
22 |     tars.add_and_switch_to_new_task("1_CLASS", "one class", "testlabel")
23 | 
24 |     # check if right number of classes
25 |     assert len(tars.get_current_label_dictionary()) == 1
26 | 
27 |     # switch to task with three labels provided as list
28 |     tars.add_and_switch_to_new_task("3_CLASS", ["list 1", "list 2", "list 3"], "testlabel")
29 | 
30 |     # check if right number of classes
31 |     assert len(tars.get_current_label_dictionary()) == 3
32 | 
33 |     # switch to task with four labels provided as set
34 |     tars.add_and_switch_to_new_task("4_CLASS", {"set 1", "set 2", "set 3", "set 4"}, "testlabel")
35 | 
36 |     # check if right number of classes
37 |     assert len(tars.get_current_label_dictionary()) == 4
38 | 
39 |     # switch to task with two labels provided as Dictionary
40 |     tars.add_and_switch_to_new_task("2_CLASS_AGAIN", corpus.make_label_dictionary(label_type="class"), "testlabel")
41 | 
42 |     # check if right number of classes
43 |     assert len(tars.get_current_label_dictionary()) == 2
44 | 
45 | 
46 | def test_train_tars(tasks_base_path, results_base_path):
47 |     # test corpus
48 |     corpus = ClassificationCorpus(tasks_base_path / "imdb_underscore")
49 | 
50 |     # create a TARS classifier
51 |     tars = TARSClassifier(embeddings="sshleifer/tiny-distilroberta-base")
52 | 
53 |     # switch to a new task (TARS can do multiple tasks so you must define one)
54 |     tars.add_and_switch_to_new_task(
55 |         task_name="question 2_CLASS",
56 |         label_dictionary=corpus.make_label_dictionary(label_type="class"),
57 |         label_type="class",
58 |     )
59 | 
60 |     # initialize the text classifier trainer
61 |     trainer = ModelTrainer(tars, corpus)
62 | 
63 |     # start the training
64 |     trainer.train(
65 |         base_path=results_base_path,
66 |         learning_rate=0.02,
67 |         mini_batch_size=1,
68 |         max_epochs=1,
69 |     )
70 | 
71 |     sentence = Sentence("This is great!")
72 |     tars.predict(sentence)
73 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | from flair.data import Dictionary
 2 | from flair.training_utils import convert_labels_to_one_hot
 3 | 
 4 | 
 5 | def test_convert_labels_to_one_hot():
 6 |     label_dict = Dictionary(add_unk=False)
 7 |     label_dict.add_item("class-1")
 8 |     label_dict.add_item("class-2")
 9 |     label_dict.add_item("class-3")
10 | 
11 |     one_hot = convert_labels_to_one_hot([["class-2"]], label_dict)
12 | 
13 |     assert one_hot[0][0] == 0
14 |     assert one_hot[0][1] == 1
15 |     assert one_hot[0][2] == 0
16 | 


--------------------------------------------------------------------------------
/tests/test_visual.py:
--------------------------------------------------------------------------------
 1 | from flair.data import Sentence, Span, Token
 2 | from flair.embeddings import FlairEmbeddings
 3 | from flair.visual import Highlighter
 4 | from flair.visual.ner_html import HTML_PAGE, PARAGRAPH, TAGGED_ENTITY, render_ner_html
 5 | from flair.visual.training_curves import Plotter
 6 | 
 7 | 
 8 | def test_highlighter(resources_path):
 9 |     with (resources_path / "visual/snippet.txt").open() as f:
10 |         sentences = [x for x in f.read().split("\n") if x]
11 | 
12 |     embeddings = FlairEmbeddings("news-forward")
13 | 
14 |     features = embeddings.lm.get_representation(sentences[0], "", "").squeeze()
15 | 
16 |     Highlighter().highlight_selection(
17 |         features,
18 |         sentences[0],
19 |         n=1000,
20 |         file_=str(resources_path / "visual/highligh.html"),
21 |     )
22 | 
23 |     # clean up directory
24 |     (resources_path / "visual/highligh.html").unlink()
25 | 
26 | 
27 | def test_plotting_training_curves_and_weights(resources_path):
28 |     plotter = Plotter()
29 |     plotter.plot_training_curves(resources_path / "visual/loss.tsv")
30 |     plotter.plot_weights(resources_path / "visual/weights.txt")
31 | 
32 |     # clean up directory
33 |     (resources_path / "visual/weights.png").unlink()
34 |     (resources_path / "visual/training.png").unlink()
35 | 
36 | 
37 | def mock_ner_span(text, tag, start, end):
38 |     span = Span([]).set_label("class", tag)
39 |     span.start_pos = start
40 |     span.end_pos = end
41 |     span.tokens = [Token(text[start:end])]
42 |     return span
43 | 
44 | 
45 | def test_html_rendering():
46 |     text = (
47 |         "Boris Johnson has been elected new Conservative leader in "
48 |         "a ballot of party members and will become the "
49 |         "next UK prime minister. &"
50 |     )
51 |     sentence = Sentence(text)
52 | 
53 |     print(sentence[0:2].add_label("ner", "PER"))
54 |     print(sentence[6:7].add_label("ner", "MISC"))
55 |     print(sentence[19:20].add_label("ner", "LOC"))
56 |     colors = {
57 |         "PER": "#F7FF53",
58 |         "ORG": "#E8902E",
59 |         "LOC": "yellow",
60 |         "MISC": "#4647EB",
61 |         "O": "#ddd",
62 |     }
63 |     actual = render_ner_html([sentence], colors=colors)
64 | 
65 |     expected_res = HTML_PAGE.format(
66 |         text=PARAGRAPH.format(
67 |             sentence=TAGGED_ENTITY.format(color="#F7FF53", entity="Boris Johnson", label="PER")
68 |             + " has been elected new "
69 |             + TAGGED_ENTITY.format(color="#4647EB", entity="Conservative", label="MISC")
70 |             + " leader in a ballot of party members and will become the next "
71 |             + TAGGED_ENTITY.format(color="yellow", entity="UK", label="LOC")
72 |             + " prime minister. &amp;"
73 |         ),
74 |         title="Flair",
75 |     )
76 | 
77 |     assert expected_res == actual
78 | 


--------------------------------------------------------------------------------