27 | Privacy Policy
28 | The webserver / web hosting company might collect certain log files to prevent abuse of services.
29 | These log files can include: IP address, URL, date and time.
30 | We do not use any tracking services or cookies to track or re-identify visitors.
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/docs/_templates/page.html:
--------------------------------------------------------------------------------
1 | {% extends "!page.html" %}
2 | {% block body %}
3 | {% if current_version and latest_version and current_version != latest_version and current_version != release and current_version.name != latest_version.release %}
4 |
5 |
6 | {% if current_version.is_released %}
7 | {% if latest_version.release.replace('v', '').split('.') | map('int') | list > current_version.name.replace('v', '').split('.') | map('int') | list %}
8 | You're reading an old version of this documentation.
9 | If you want up-to-date information, please have a look at {{latest_version.name}}.
10 | {% endif %}
11 | {% else %}
12 | You're reading the documentation for a development version.
13 | For the latest stable version, please have a look at {{latest_version.name}}.
14 | {% endif %}
15 |
16 |
17 | {% endif %}
18 | {{ super() }}
19 | {% endblock %}%
--------------------------------------------------------------------------------
/docs/_templates/version-switcher.html:
--------------------------------------------------------------------------------
1 | {# As the version switcher will only work when JavaScript is enabled, we add it through JavaScript.
2 | #}
3 |
41 |
46 |
--------------------------------------------------------------------------------
/docs/_templates/versioning.html:
--------------------------------------------------------------------------------
1 | {% if versions %}
2 |
17 | {% endif %}
--------------------------------------------------------------------------------
/docs/api/datasets/base.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.base
2 | ===================
3 |
4 | .. currentmodule:: flair.datasets.base
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/datasets/biomedical.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.biomedical
2 | =========================
3 |
4 | .. currentmodule:: flair.datasets.biomedical
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/datasets/document_classification.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.document_classification
2 | ======================================
3 |
4 | .. currentmodule:: flair.datasets.document_classification
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/datasets/entity_linking.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.entity_linking
2 | =============================
3 |
4 | .. currentmodule:: flair.datasets.entity_linking
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/datasets/ocr.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.ocr
2 | ==================
3 |
4 | .. currentmodule:: flair.datasets.ocr
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/datasets/relation_extraction.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.relation_extraction
2 | ==================================
3 |
4 | .. currentmodule:: flair.datasets.relation_extraction
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/datasets/sequence_labeling.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.sequence_labeling
2 | ================================
3 |
4 | .. currentmodule:: flair.datasets.sequence_labeling
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/datasets/text_image.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.text_image
2 | =========================
3 |
4 | .. currentmodule:: flair.datasets.text_image
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/datasets/text_text.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.text_text
2 | ========================
3 |
4 | .. currentmodule:: flair.datasets.text_text
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
10 |
11 |
--------------------------------------------------------------------------------
/docs/api/datasets/treebanks.rst:
--------------------------------------------------------------------------------
1 | flair.datasets.treebanks
2 | ========================
3 |
4 | .. currentmodule:: flair.datasets.treebanks
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/embeddings/base.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.base
2 | =====================
3 |
4 | .. currentmodule:: flair.embeddings.base
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/embeddings/document.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.document
2 | =========================
3 |
4 | .. currentmodule:: flair.embeddings.document
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/embeddings/image.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.image
2 | ======================
3 |
4 | .. currentmodule:: flair.embeddings.image
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/embeddings/legacy.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.legacy
2 | =======================
3 |
4 | .. currentmodule:: flair.embeddings.legacy
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/embeddings/token.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.token
2 | ======================
3 |
4 | .. currentmodule:: flair.embeddings.token
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/embeddings/transformer.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings.transformer
2 | ============================
3 |
4 | .. currentmodule:: flair.embeddings.transformer
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/flair.data.rst:
--------------------------------------------------------------------------------
1 | flair.data
2 | ==========
3 |
4 | .. currentmodule:: flair.data
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/flair.datasets.rst:
--------------------------------------------------------------------------------
1 | flair.datasets
2 | ==============
3 |
4 | .. currentmodule:: flair.datasets
5 |
6 | .. toctree::
7 | :glob:
8 | :maxdepth: 2
9 |
10 | datasets/*
11 |
--------------------------------------------------------------------------------
/docs/api/flair.embeddings.rst:
--------------------------------------------------------------------------------
1 | flair.embeddings
2 | ================
3 |
4 | .. currentmodule:: flair.embeddings
5 |
6 | .. toctree::
7 | :glob:
8 | :maxdepth: 2
9 |
10 | embeddings/*
--------------------------------------------------------------------------------
/docs/api/flair.models.rst:
--------------------------------------------------------------------------------
1 | flair.models
2 | ============
3 |
4 | .. currentmodule:: flair.models
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/flair.nn.rst:
--------------------------------------------------------------------------------
1 | flair.nn
2 | ========
3 |
4 | .. currentmodule:: flair.nn
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/flair.rst:
--------------------------------------------------------------------------------
1 | flair
2 | =====
3 |
4 | .. currentmodule:: flair
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/flair.splitter.rst:
--------------------------------------------------------------------------------
1 | flair.splitter
2 | ==============
3 |
4 | .. currentmodule:: flair.splitter
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
10 |
--------------------------------------------------------------------------------
/docs/api/flair.tokenization.rst:
--------------------------------------------------------------------------------
1 | flair.tokenization
2 | ==================
3 |
4 | .. currentmodule:: flair.tokenization
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/flair.trainers.plugins.rst:
--------------------------------------------------------------------------------
1 | flair.trainers.plugins
2 | ======================
3 |
4 | .. currentmodule:: flair.trainers.plugins
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/flair.trainers.rst:
--------------------------------------------------------------------------------
1 | flair.trainers
2 | ==============
3 |
4 | .. currentmodule:: flair.trainers
5 |
6 | .. autosummary::
7 | :toctree: generated
8 | :nosignatures:
9 |
--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
1 | API Docs
2 | ========
3 |
4 | .. toctree::
5 | :glob:
6 | :maxdepth: 2
7 |
8 | flair
9 | flair.*
--------------------------------------------------------------------------------
/docs/contributing/index.rst:
--------------------------------------------------------------------------------
1 | Contributing
2 | ============
3 |
4 | .. toctree::
5 | :maxdepth: 1
6 |
7 | writing_a_good_issue
8 | local_development
9 | making_a_pull_request
10 | updating_documentation
11 |
--------------------------------------------------------------------------------
/docs/contributing/local_development.md:
--------------------------------------------------------------------------------
1 | # Local Development
2 |
3 | For contributors looking to get deeper into the API we suggest cloning the repository and checking out the unit
4 | tests for examples of how to call methods. Most classes and methods are documented, so finding your way around
5 | the code should hopefully be easy.
6 |
7 | ## Setup
8 |
9 | Flair requires python-3.9 or higher. To make sure our code also runs on the oldest supported
10 | python version, it is recommended to use python-3.9.x for flair development.
11 |
12 | Create a python environment of your preference and run:
13 | ```bash
14 | pip install -r requirements-dev.txt
15 | pip install -e .
16 | ```
17 |
18 | ## Tests
19 |
20 | To only run typechecks and check the code formatting execute:
21 |
22 | ```bash
23 | pytest flair
24 | ```
25 |
26 | To run all basic tests execute:
27 |
28 | ```bash
29 | pytest
30 | ```
31 |
32 | To run integration tests execute:
33 |
34 | ```bash
35 | pytest --runintegration
36 | ```
37 |
38 | The integration tests will train small models and therefore take more time.
39 | In general, it is recommended to ensure all basic tests are running through before testing the integration tests
40 |
41 | ## Code Formatting
42 |
43 | To ensure a standardized code style we use the formatter [black](https://github.com/ambv/black) and for standardizing imports we use [ruff](https://github.com/charliermarsh/ruff).
44 | If your code is not formatted properly, the tests will fail.
45 |
46 | We recommend configuring your IDE to run these formatters for you, but you can also always run them manually via
47 | `black . && ruff --fix .` in the flair root folder.
--------------------------------------------------------------------------------
/docs/contributing/making_a_pull_request.md:
--------------------------------------------------------------------------------
1 | # Making a pull request
2 |
3 | We are happy to accept your contributions to make `flair` better and more awesome! To avoid unnecessary work on either
4 | side, please stick to the following process:
5 |
6 | 1. Check if there is already [an issue](https://github.com/flairNLP/flair/issues) for your concern.
7 | 2. If there is not, open a new one to start a discussion. We hate to close finished PRs!
8 | 3. If we decide your concern needs code changes, we would be happy to accept a pull request. Please consider the
9 | commit guidelines below.
10 |
11 |
12 | ## Git Commit Guidelines
13 |
14 | If there is already a ticket, use this number at the start of your commit message.
15 | Use meaningful commit messages that described what you did.
16 |
17 | **Example:** `GH-42: Added new type of embeddings: DocumentEmbedding.`
--------------------------------------------------------------------------------
/docs/contributing/updating_documentation.md:
--------------------------------------------------------------------------------
1 | # Updating documentation
2 |
3 |
4 | ## What is good documentation?
5 |
6 | Good Documentation
7 | * Always refers to the enduser. Do not document *why* something is the way it is, but rather *how* to use it.
8 | * Doesn't lie and is always up-to-ate. Whenever code is updated, consider if the documentation needs to change accordingly to reflect reality.
9 | * Provides useful links whenever usable. Do not reference another object without linking it.
10 |
11 |
12 | ## Tutorials
13 |
14 | All tutorials are markdown files stored at [the tutorial folder](https://github.com/flairNLP/flair/tree/master/docs/tutorial).
15 | When adding a new tutorial, you must add its name to the `index.rst` file in the respective folder.
16 | We are using the [MyST parser](https://myst-parser.readthedocs.io/en/latest/syntax/typography.html) which adds
17 | some additional syntax over markdown.
18 |
19 | A tutorial should always be easy to understand, and reference api documentation for future readings.
20 |
21 | ```{note}
22 | You can reference symbols by defining links
23 | e.g.: ``[`flair.set_seed`](#flair.set_seed)`` for a function
24 | e.g.: `[entity-linking](project:../tutorial/tutorial-basics/entity-linking.md)` for another tutorial
25 | ```
26 |
27 | ## Docstrings
28 |
29 | For docstrings we follow the [Google docstring](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) format.
30 | We do not need to specify types or default values, as those will be extracted from the function signature.
31 |
32 | Docstrings have usual a 1 liner giving a simple explanation of the object. Then there is a more detailed explanation followed **if required**.
33 | Ensure that you always use cross-references instead of just mentioning another object,
34 | e.g. ``:class:`flair.models.SequenceTagger` `` can be used to reference the SequenceTagger.
35 |
36 |
37 | ## Building the local docs
38 |
39 | For building the docs,
40 |
41 | * Ensure that you have everything committed. Local changes won't be used for building.
42 | * Install the build dependencies via `pip install -r docs/requirements.txt`.
43 | * In `docs/conf.py` temporarily add your local branch name to the `smv_branch_whitelist` pattern.
44 | E.g. if your branch is called `doc-page` `smv_branch_whitelist` need to have the value `r"^master|doc-page$"`
45 | * run `sphinx-multiversion docs doc_build/` to generate the docs.
46 | * open `doc_build//index.html` to view the docs.
47 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. _flair_docs_mainpage:
2 |
3 | .. title:: Home
4 |
5 | .. raw:: html
6 | :file: _templates/landing_page_styles.html
7 |
8 | .. raw:: html
9 | :file: _templates/landing-page-banner.html
10 |
11 | .. raw:: html
12 | :file: _templates/landing-page-illustrations.html
13 |
14 | .. toctree::
15 | :maxdepth: 3
16 | :hidden:
17 |
18 | Tutorials
19 | API
20 | Contributing
--------------------------------------------------------------------------------
/docs/legal-notice/index.rst:
--------------------------------------------------------------------------------
1 | Legal Notice
2 | ============
3 |
4 | .. title:: Legal Notice
5 |
6 | .. raw:: html
7 | :file: ../_templates/legal-notice-content.html
8 |
9 | .. toctree::
10 | :maxdepth: 3
11 | :hidden:
12 |
13 | Tutorials <../tutorial/index>
14 | API <../api/index>
15 | Contributing <../contributing/index>
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx-github-style<=1.0.2 # 1.0.3 changes logic that breaks with sphinx-multiversion
2 | sphinx-autodoc-typehints
3 | myst-parser
4 | sphinx<8.0.0
5 | importlib-metadata
6 | sphinx-multiversion
7 | pydata-sphinx-theme<0.14
8 | sphinx_design
9 | sphinx-autosummary-autocollect
10 |
11 | # previous dependencies that are required to build docs for later versions too.
12 | semver
13 | gensim
14 | bpemb
--------------------------------------------------------------------------------
/docs/tutorial/index.rst:
--------------------------------------------------------------------------------
1 | Tutorials
2 | =========
3 |
4 |
5 | .. _flair_tutorials:
6 |
7 | .. toctree::
8 | :maxdepth: 2
9 |
10 | intro
11 | tutorial-basics/index
12 | tutorial-training/index
13 | tutorial-embeddings/index
14 | tutorial-hunflair2/index
--------------------------------------------------------------------------------
/docs/tutorial/intro.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 1
3 | ---
4 |
5 | (getting_started)=
6 |
7 | # Quick Start
8 |
9 | Let's discover **Flair in less than 5 minutes**.
10 |
11 | ## Requirements and Installation
12 |
13 | In your favorite virtual environment, simply do:
14 |
15 | ```
16 | pip install flair
17 | ```
18 |
19 | Flair requires Python 3.9+.
20 |
21 | ## Example 1: Tag Entities in Text
22 |
23 | Let's run **named entity recognition** (NER) over the following example sentence: "_I love Berlin and New York._"
24 |
25 | Our goal is to identify names in this sentence, and their types.
26 |
27 | To do this, all you need is to make a [`Sentence`](#flair.data.Sentence) for this text, load a pre-trained model and use it to predict tags for the sentence:
28 |
29 |
30 | ```python
31 | from flair.data import Sentence
32 | from flair.nn import Classifier
33 |
34 | # make a sentence
35 | sentence = Sentence('I love Berlin and New York.')
36 |
37 | # load the NER tagger
38 | tagger = Classifier.load('ner')
39 |
40 | # run NER over sentence
41 | tagger.predict(sentence)
42 |
43 | # print the sentence with all annotations
44 | print(sentence)
45 | ```
46 |
47 | This should print:
48 |
49 | ```console
50 | Sentence[7]: "I love Berlin and New York." → ["Berlin"/LOC, "New York"/LOC]
51 | ```
52 |
53 | The output shows that both "Berlin" and "New York" were tagged as **location entities** (LOC) in this sentence.
54 |
55 |
56 | ## Example 2: Detect Sentiment
57 |
58 | Let's run **sentiment analysis** over the same sentence to determine whether it is POSITIVE or NEGATIVE.
59 |
60 | You can do this with essentially the same code as above. Just instead of loading the 'ner' model, you now load the 'sentiment' model:
61 |
62 |
63 | ```python
64 | from flair.data import Sentence
65 | from flair.nn import Classifier
66 |
67 | # make a sentence
68 | sentence = Sentence('I love Berlin and New York.')
69 |
70 | # load the sentiment tagger
71 | tagger = Classifier.load('sentiment')
72 |
73 | # run sentiment analysis over sentence
74 | tagger.predict(sentence)
75 |
76 | # print the sentence with all annotations
77 | print(sentence)
78 |
79 | ```
80 |
81 | This should print:
82 |
83 | ```console
84 | Sentence[7]: "I love Berlin and New York." → POSITIVE (0.9982)
85 | ```
86 |
87 | The output shows that the sentence "_I love Berlin and New York._" was tagged as having **POSITIVE** sentiment.
88 |
89 |
90 | ## Summary
91 |
92 | Congrats, you now know how to use Flair to find entities and detect sentiment!
93 |
94 | ## Next steps
95 |
96 | If you want to know more about Flair, next check out [Tutorial 1](tutorial-basics/) that gives an intro into the basics of Flair!
--------------------------------------------------------------------------------
/docs/tutorial/tutorial-basics/how-predictions-work.md:
--------------------------------------------------------------------------------
1 | # How predictions work
2 |
3 | All taggers in Flair make predictions. This tutorial helps you understand what information you can get out of each prediction.
4 |
5 | ## Running example
6 |
7 | Let's use our standard NER example to illustrate how annotations work:
8 |
9 | ```python
10 | from flair.nn import Classifier
11 | from flair.data import Sentence
12 |
13 | # load the model
14 | tagger = Classifier.load('ner')
15 |
16 | # make a sentence
17 | sentence = Sentence('George Washington went to Washington.')
18 |
19 | # predict NER tags
20 | tagger.predict(sentence)
21 |
22 | # print the sentence with the tags
23 | print(sentence)
24 | ```
25 |
26 | This should print:
27 | ```console
28 | Sentence: "George Washington went to Washington ." → ["George Washington"/PER, "Washington"/LOC]
29 | ```
30 |
31 | Showing us that two entities are labeled in this sentence: "George Washington" as PER (person) and "Washington"
32 | as LOC (location.)
33 |
34 | ## Getting the predictions
35 |
36 | A common question that gets asked is **how to access these predictions directly**. You can do this by using
37 | the [`get_labels()`](#flair.data.Sentence.get_labels) method to iterate over all predictions:
38 |
39 | ```python
40 | for label in sentence.get_labels():
41 | print(label)
42 | ```
43 | This should print the two NER predictions:
44 |
45 | ```console
46 | Span[0:2]: "George Washington" → PER (0.9989)
47 | Span[4:5]: "Washington" → LOC (0.9942)
48 | ```
49 |
50 | As you can see, each entity is printed, together with the predicted class.
51 | The confidence of the prediction is indicated as a score in brackets.
52 |
53 | ## Values for each prediction
54 |
55 | For each prediction, you can even **directly access** the label value, and all other attributes of the [`Label`](#flair.data.Label) class:
56 |
57 | ```python
58 | # iterate over all labels in the sentence
59 | for label in sentence.get_labels():
60 | # print label value and score
61 | print(f'label.value is: "{label.value}"')
62 | print(f'label.score is: "{label.score}"')
63 | # access the data point to which label attaches and print its text
64 | print(f'the text of label.data_point is: "{label.data_point.text}"\n')
65 | ```
66 |
67 | This should print:
68 | ```console
69 | label.value is: "PER"
70 | label.score is: "0.998886227607727"
71 | the text of label.data_point is: "George Washington"
72 |
73 | label.value is: "LOC"
74 | label.score is: "0.9942097663879395"
75 | the text of label.data_point is: "Washington"
76 | ```
77 |
78 |
79 | ### Next
80 |
81 | Congrats, you've made your first predictions with Flair and accessed value and confidence scores of each prediction.
82 |
83 | Next, let's discuss specifically how to [predict named entities with Flair](tagging-entities.md).
84 |
--------------------------------------------------------------------------------
/docs/tutorial/tutorial-basics/how-to-tag-corpus.md:
--------------------------------------------------------------------------------
1 | # How to tag a whole corpus
2 |
3 | Often, you may want to tag an entire text corpus. In this case, you need to split the corpus into sentences and pass a
4 | list of [`Sentence`](#flair.data.Sentence) objects to the [`Classifier.predict()`](#flair.nn.Classifier.predict) method.
5 |
6 | For instance, you can use a [`SentenceSplitter`](#flair.splitter.SentenceSplitter) to split your text:
7 |
8 | ```python
9 | from flair.nn import Classifier
10 | from flair.splitter import SegtokSentenceSplitter
11 |
12 | # example text with many sentences
13 | text = "This is a sentence. This is another sentence. I love Berlin."
14 |
15 | # initialize sentence splitter
16 | splitter = SegtokSentenceSplitter()
17 |
18 | # use splitter to split text into list of sentences
19 | sentences = splitter.split(text)
20 |
21 | # predict tags for sentences
22 | tagger = Classifier.load('ner')
23 | tagger.predict(sentences)
24 |
25 | # iterate through sentences and print predicted labels
26 | for sentence in sentences:
27 | print(sentence)
28 | ```
29 |
30 | Using the `mini_batch_size` parameter of the [`Classifier.predict()`](#flair.nn.Classifier.predict) method, you can set the size of mini batches passed to the
31 | tagger. Depending on your resources, you might want to play around with this parameter to optimize speed.
32 |
33 | ### Next
34 |
35 | That's it - you completed tutorial 1! Congrats!
36 |
37 | You've learned how basic classes work and how to use Flair to make various predictions.
38 |
39 | Next, you can check out our tutorial on how to [train your own model](../tutorial-training/how-model-training-works.md).
40 |
--------------------------------------------------------------------------------
/docs/tutorial/tutorial-basics/index.rst:
--------------------------------------------------------------------------------
1 | Tutorial 1: Basic Tagging
2 | =========================
3 |
4 | This tutorial shows you in more detail how to tag your text and access predictions,
5 | and showcases various models we ship with Flair.
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 |
10 | basic-types
11 | how-predictions-work
12 | tagging-entities
13 | tagging-sentiment
14 | entity-linking
15 | entity-mention-linking
16 | part-of-speech-tagging
17 | other-models
18 | how-to-tag-corpus
19 |
--------------------------------------------------------------------------------
/docs/tutorial/tutorial-basics/tagging-sentiment.md:
--------------------------------------------------------------------------------
1 | # Tagging sentiment
2 |
3 | This tutorials shows you how to do sentiment analysis in Flair.
4 |
5 | ## Tagging sentiment with our standard model
6 |
7 | Our standard sentiment analysis model uses distilBERT embeddings and was trained over a mix of corpora, notably
8 | the Amazon review corpus, and can thus handle a variety of domains and language.
9 |
10 | Let's use an example sentence:
11 |
12 | ```python
13 | from flair.nn import Classifier
14 | from flair.data import Sentence
15 |
16 | # load the model
17 | tagger = Classifier.load('sentiment')
18 |
19 | # make a sentence
20 | sentence = Sentence('This movie is not at all bad.')
21 |
22 | # predict NER tags
23 | tagger.predict(sentence)
24 |
25 | # print sentence with predicted tags
26 | print(sentence)
27 | ```
28 |
29 | This should print:
30 | ```console
31 | Sentence[8]: "This movie is not at all bad." → POSITIVE (0.9929)
32 | ```
33 |
34 | Showing us that the sentence overall is tagged to be of POSITIVE sentiment.
35 |
36 | ## Tagging sentiment with our fast model
37 |
38 | We also offer an RNN-based variant which is faster but less accurate. Use it like this:
39 |
40 |
41 | ```python
42 | from flair.nn import Classifier
43 | from flair.data import Sentence
44 |
45 | # load the model
46 | tagger = Classifier.load('sentiment-fast')
47 |
48 | # make a sentence
49 | sentence = Sentence('This movie is very bad.')
50 |
51 | # predict NER tags
52 | tagger.predict(sentence)
53 |
54 | # print sentence with predicted tags
55 | print(sentence)
56 | ```
57 |
58 | This should print:
59 | ```console
60 | Sentence[6]: "This movie is very bad." → NEGATIVE (0.9999)
61 | ```
62 |
63 | This indicates that the sentence is of NEGATIVE sentiment. As you can see, its the same code as above, just loading the
64 | '**sentiment-fast**' model instead of '**sentiment**'.
65 |
66 |
67 | ### List of Sentiment Models
68 |
69 | We end this section with a list of all models we currently ship with Flair:
70 |
71 | | ID | Language | Task | Training Dataset | Accuracy |
72 | | ------------- | ---- | ------------- |------------- |------------- |
73 | | 'sentiment' | English | detecting positive and negative sentiment (transformer-based) | movie and product reviews | **98.87** |
74 | | 'sentiment-fast' | English | detecting positive and negative sentiment (RNN-based) | movie and product reviews | **96.83**|
75 | | 'de-offensive-language' | German | detecting offensive language | [GermEval 2018 Task 1](https://projects.fzai.h-da.de/iggsa/projekt/) | **75.71** (Macro F1) |
76 |
77 |
78 | ### Next
79 |
80 | Congrats, you learned how to predict sentiment with Flair!
81 |
82 | Next, let's discuss how to [link entities to Wikipedia with Flair](entity-linking.md).
83 |
84 |
--------------------------------------------------------------------------------
/docs/tutorial/tutorial-embeddings/index.rst:
--------------------------------------------------------------------------------
1 | Tutorial 3: Embeddings
2 | ======================
3 |
4 | This tutorial shows you how to use Flair to produce embeddings for words and documents.
5 | Embeddings are vector representations that are useful for a variety of reasons.
6 | All Flair models are trained on top of embeddings, so if you want to train your own models,
7 | you should understand how embeddings work.
8 |
9 | .. toctree::
10 | :maxdepth: 1
11 |
12 | embeddings
13 | transformer-embeddings
14 | flair-embeddings
15 | classic-word-embeddings
16 | other-embeddings
17 |
--------------------------------------------------------------------------------
/docs/tutorial/tutorial-hunflair2/index.rst:
--------------------------------------------------------------------------------
1 | Tutorial: HunFlair2
2 | ===================
3 |
4 | *HunFlair2* is a state-of-the-art named entity tagger and linker for biomedical texts. It comes with
5 | models for genes/proteins, chemicals, diseases, species and cell lines. *HunFlair2*
6 | builds on pretrained domain-specific language models and outperforms other biomedical
7 | NER tools on unseen corpora.
8 |
9 | .. toctree::
10 | :glob:
11 | :maxdepth: 1
12 |
13 | overview
14 | tagging
15 | linking
16 | training-ner-models
17 | customize-linking
18 |
--------------------------------------------------------------------------------
/docs/tutorial/tutorial-training/how-to-train-text-classifier.md:
--------------------------------------------------------------------------------
1 | # Train a Text Classifier
2 |
3 | This tutorial shows you how to train your own text classifier models with Flair. For instance, you
4 | could train your own sentiment analysis model, or offensive language detection model.
5 |
6 |
7 | ## Training a text classification model with transformers
8 |
9 | For text classification, you reach state-of-the-art scores by fine-tuning a transformer.
10 |
11 | Training a model is easy: load the appropriate corpus, make a label dictionary, then fine-tune a [`TextClassifier`](#flair.models.TextClassifier)
12 | model using the [`ModelTrainer.fine_tune()`](#flair.trainers.ModelTrainer.fine_tune) method. See the example script below:
13 |
14 | ```python
15 | from flair.data import Corpus
16 | from flair.datasets import TREC_6
17 | from flair.embeddings import TransformerDocumentEmbeddings
18 | from flair.models import TextClassifier
19 | from flair.trainers import ModelTrainer
20 |
21 | # 1. get the corpus
22 | corpus: Corpus = TREC_6()
23 |
24 | # 2. what label do we want to predict?
25 | label_type = 'question_class'
26 |
27 | # 3. create the label dictionary
28 | label_dict = corpus.make_label_dictionary(label_type=label_type)
29 |
30 | # 4. initialize transformer document embeddings (many models are available)
31 | document_embeddings = TransformerDocumentEmbeddings('distilbert-base-uncased', fine_tune=True)
32 |
33 | # 5. create the text classifier
34 | classifier = TextClassifier(document_embeddings, label_dictionary=label_dict, label_type=label_type)
35 |
36 | # 6. initialize trainer
37 | trainer = ModelTrainer(classifier, corpus)
38 |
39 | # 7. run training with fine-tuning
40 | trainer.fine_tune('resources/taggers/question-classification-with-transformer',
41 | learning_rate=5.0e-5,
42 | mini_batch_size=4,
43 | max_epochs=10,
44 | )
45 | ```
46 |
47 | Once the model is trained you can load it to predict the class of new sentences. Just call the [`predict`](#flair.nn.DefaultClassifier.predict) method of the model.
48 |
49 | ```python
50 | classifier = TextClassifier.load('resources/taggers/question-classification-with-transformer/final-model.pt')
51 |
52 | # create example sentence
53 | sentence = Sentence('Who built the Eiffel Tower ?')
54 |
55 | # predict class and print
56 | classifier.predict(sentence)
57 |
58 | print(sentence.labels)
59 | ```
60 |
61 |
62 | ## Next
63 |
64 | Next, learn [how to train an entity linker](how-to-train-span-classifier.md).
--------------------------------------------------------------------------------
/docs/tutorial/tutorial-training/index.rst:
--------------------------------------------------------------------------------
1 | Tutorial 2: Training models
2 | ===========================
3 |
4 | This tutorial illustrates how you can train your own state-of-the-art NLP models with Flair.
5 |
6 | .. toctree::
7 | :glob:
8 | :maxdepth: 1
9 |
10 | how-model-training-works
11 | train-vs-fine-tune
12 | how-to-load-prepared-dataset
13 | how-to-load-custom-dataset
14 | how-to-train-sequence-tagger
15 | how-to-train-text-classifier
16 | how-to-train-span-classifier
17 | how-to-train-multitask-model
18 |
--------------------------------------------------------------------------------
/docs/tutorial/tutorial-training/train-vs-fine-tune.md:
--------------------------------------------------------------------------------
1 | # Training vs fine-tuning
2 |
3 | There are two broad ways you train a model: The "classic" approach and the fine-tuning approach. This section
4 | explains the differences.
5 |
6 |
7 | ## Fine-Tuning
8 |
9 | Fine-tuning is the current state-of-the-art approach. The main idea is that you take a pre-trained language model that
10 | consists of (hundreds of) millions of trained parameters. To this language model you add a simple prediction head with
11 | randomly initialized weights.
12 |
13 | Since in this case, the vast majority of parameters in the model is already trained, you only need to "fine-tune" this
14 | model. This means: Very small learning rate (LR) and just a few epochs. You are essentially just minimally modifying
15 | the model to adapt it to the task you want to solve.
16 |
17 | Use this method by calling [`ModelTrainer.fine_tune()`](#flair.trainers.ModelTrainer.fine_tune).
18 | Since most models in Flair were trained this way, this is likely the approach you'll want to use.
19 |
20 |
21 | ## Training
22 |
23 | On the other hand, you should use the classic training approach if the majority of the trainable parameters in your
24 | model is randomly initialized. This can happen for instance if you freeze the model weights of the pre-trained language
25 | model, leaving only the randomly initialited prediction head as trainable parameters. This training approach is also
26 | referred to as "feature-based" or "probing" in some papers.
27 |
28 | Since the majority of parameters is randomly initialized, you need to fully train the model. This means: high learning
29 | rate and many epochs.
30 |
31 | Use this method by calling [`ModelTrainer.train()`](#flair.trainers.ModelTrainer.train) .
32 |
33 | ```{note}
34 | Another application of classic training is for linear probing of pre-trained language models. In this scenario, you
35 | "freeze" the weights of the language model (meaning that they cannot be changed) and add a prediction head that is
36 | trained from scratch. So, even though a language model is involved, its parameters are not trainable. This means that
37 | all trainable parameters in this scenario are randomly initialized, therefore necessitating the use of the classic
38 | training approach.
39 | ```
40 |
41 |
42 | ## Paper
43 |
44 | If you are interested in an experimental comparison of the two above-mentioned approach, check out [our paper](https://arxiv.org/pdf/2011.06993)
45 | that compares fine-tuning to the feature-based approach.
46 |
47 |
48 | ## Next
49 |
50 | Next, learn how to load a [training dataset](how-to-load-prepared-dataset.md).
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 |
3 | This folder contains actively maintained examples of use of Flair, organized along various NLP tasks.
4 |
5 | ## Table of Tasks
6 |
7 | | Task | Documentation
8 | | ------------------------------ | -------------
9 | | Named Entity Recognition (NER) | [Here](ner/)
10 | | Multi GPU | [Here](multi_gpu/)
11 |
--------------------------------------------------------------------------------
/examples/multi_gpu/README.md:
--------------------------------------------------------------------------------
1 | # Multi GPU
2 |
3 | Training can be distributed across multiple GPUs on a local machine when using
4 | [`ModelTrainer`](#flair.trainers.trainer.ModelTrainer).
5 |
6 | ## Example
7 |
8 | See the script `run_multi_gpu.py` and its comments.
9 |
10 | ## Tutorial
11 |
12 | There are 2 changes that are always required, as well as a few things to consider
13 |
14 | Always Required:
15 | 1) Pass the argument `multi_gpu=True` to your [`.train()`](#flair.trainers.trainer.ModelTrainer.train) or `.fine_tune()`
16 | 2) Wrap your code in [`launch_distributed`](#flair.distributed_utils.launch_distributed), e.g.
17 | `launch_distributed(main, *args)`. This spawns multiple processes, each driving a GPU
18 |
19 | Other considerations:
20 | - The corpus and other preprocessing must be the same on all processes. For example, if corpus initialization involves
21 | anything random, you should either
22 | - Set the random seed before initializing the corpus (e.g. [`flair.set_seed(42)`) OR
23 | - Initialize the corpus before calling `launch_distributed` and pass the corpus as an argument so it's serialized to
24 | all processes
25 | - The effective batch size will be larger by a factor of num_gpus
26 | - Each GPU will now process `mini_batch_size` examples before the optimizer steps, resulting in fewer total steps
27 | taken relative to training with a single device. To obtain comparable results between single/multi gpu,
28 | both mathematically, and in terms of wall time, consider the method in the example script.
29 | - Large batch sizes may be necessary to see faster runs, otherwise the communication overhead may dominate
30 |
31 | Only the parameter updates in the training process will be distributed across multiple GPUs. Evaluation and prediction
32 | are still done on a single device.
33 |
--------------------------------------------------------------------------------
/examples/multi_gpu/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/examples/multi_gpu/__init__.py
--------------------------------------------------------------------------------
/examples/multi_gpu/run_multi_gpu.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | import flair
4 | from flair.datasets import IMDB
5 | from flair.distributed_utils import launch_distributed
6 | from flair.embeddings import TransformerDocumentEmbeddings
7 | from flair.models import TextClassifier
8 | from flair.trainers import ModelTrainer
9 |
10 |
11 | def main(multi_gpu):
12 | # Note: Multi-GPU can affect corpus loading
13 | # This code will run multiple times -- each GPU gets its own process and each process runs this code. We need to
14 | # ensure that the corpus has the same elements and order on all processes, despite sampling. We do that by using
15 | # the same seed on all processes.
16 | flair.set_seed(42)
17 |
18 | corpus = IMDB()
19 | corpus.downsample(0.1)
20 | label_type = "sentiment"
21 | label_dictionary = corpus.make_label_dictionary(label_type)
22 |
23 | embeddings = TransformerDocumentEmbeddings(model="distilbert-base-uncased")
24 | model = TextClassifier(embeddings, label_type, label_dictionary=label_dictionary)
25 |
26 | # Note: Multi-GPU can affect choice of batch size.
27 | # In order to compare batch updates fairly between single and multi-GPU training, we should:
28 | # 1) Step the optimizer after the same number of examples to achieve com
29 | # 2) Process the same number of examples in each forward pass
30 | mini_batch_chunk_size = 32 # Make this as large as possible without running out of GPU-memory to pack device
31 | num_devices_when_distributing = max(torch.cuda.device_count(), 1)
32 | mini_batch_size = mini_batch_chunk_size if multi_gpu else mini_batch_chunk_size * num_devices_when_distributing
33 | # e.g. Suppose your machine has 2 GPUs. If multi_gpu=False, the first gpu will process 32 examples, then the
34 | # first gpu will process another 32 examples, then the optimizer will step. If multi_gpu=True, each gpu will
35 | # process 32 examples at the same time, then the optimizer will step.
36 |
37 | trainer = ModelTrainer(model, corpus)
38 | trainer.fine_tune(
39 | "resources/taggers/multi-gpu",
40 | multi_gpu=multi_gpu, # Required for multi-gpu
41 | max_epochs=2,
42 | mini_batch_chunk_size=mini_batch_chunk_size,
43 | mini_batch_size=mini_batch_size,
44 | )
45 |
46 |
47 | if __name__ == "__main__":
48 | """Minimal example demonstrating how to train a model on multiple GPUs."""
49 | multi_gpu = True
50 |
51 | if multi_gpu:
52 | launch_distributed(main, multi_gpu) # Required for multi-gpu
53 | else:
54 | main(multi_gpu)
55 |
--------------------------------------------------------------------------------
/examples/ner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/examples/ner/__init__.py
--------------------------------------------------------------------------------
/flair/class_utils.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import inspect
3 | from collections.abc import Iterable
4 | from types import ModuleType
5 | from typing import Any, Optional, Protocol, TypeVar, Union, overload
6 |
7 | T = TypeVar("T")
8 |
9 |
10 | class StringLike(Protocol):
11 | def __str__(self) -> str: ...
12 |
13 |
14 | def get_non_abstract_subclasses(cls: type[T]) -> Iterable[type[T]]:
15 | for subclass in cls.__subclasses__():
16 | yield from get_non_abstract_subclasses(subclass)
17 | if inspect.isabstract(subclass):
18 | continue
19 | yield subclass
20 |
21 |
22 | def get_state_subclass_by_name(cls: type[T], cls_name: Optional[str]) -> type[T]:
23 | for sub_cls in get_non_abstract_subclasses(cls):
24 | if sub_cls.__name__ == cls_name:
25 | return sub_cls
26 | raise ValueError(f"Could not find any class with name '{cls_name}'")
27 |
28 |
29 | @overload
30 | def lazy_import(group: str, module: str, first_symbol: None) -> ModuleType: ...
31 |
32 |
33 | @overload
34 | def lazy_import(group: str, module: str, first_symbol: str, *symbols: str) -> list[Any]: ...
35 |
36 |
37 | def lazy_import(
38 | group: str, module: str, first_symbol: Optional[str] = None, *symbols: str
39 | ) -> Union[list[Any], ModuleType]:
40 | try:
41 | imported_module = importlib.import_module(module)
42 | except ImportError:
43 | raise ImportError(
44 | f"Could not import {module}. Please install the optional '{group}' dependency. Via 'pip install flair[{group}]'"
45 | )
46 | if first_symbol is None:
47 | return imported_module
48 | symbols = (first_symbol, *symbols)
49 |
50 | return [getattr(imported_module, symbol) for symbol in symbols]
51 |
--------------------------------------------------------------------------------
/flair/embeddings/__init__.py:
--------------------------------------------------------------------------------
1 | # Expose base classses
2 | from flair.embeddings.transformer import (
3 | TransformerEmbeddings,
4 | TransformerJitDocumentEmbeddings,
5 | TransformerJitWordEmbeddings,
6 | TransformerOnnxDocumentEmbeddings,
7 | TransformerOnnxWordEmbeddings,
8 | )
9 |
10 | from .base import Embeddings, ScalarMix
11 |
12 | # Expose document embedding classes
13 | from .document import (
14 | DocumentCNNEmbeddings,
15 | DocumentEmbeddings,
16 | DocumentLMEmbeddings,
17 | DocumentPoolEmbeddings,
18 | DocumentRNNEmbeddings,
19 | DocumentTFIDFEmbeddings,
20 | SentenceTransformerDocumentEmbeddings,
21 | TransformerDocumentEmbeddings,
22 | )
23 |
24 | # Expose image embedding classes
25 | from .image import (
26 | ConvTransformNetworkImageEmbeddings,
27 | IdentityImageEmbeddings,
28 | ImageEmbeddings,
29 | NetworkImageEmbeddings,
30 | PrecomputedImageEmbeddings,
31 | )
32 |
33 | # Expose legacy embedding classes
34 | from .legacy import (
35 | CharLMEmbeddings,
36 | DocumentLSTMEmbeddings,
37 | DocumentMeanEmbeddings,
38 | ELMoEmbeddings,
39 | )
40 |
41 | # Expose token embedding classes
42 | from .token import (
43 | BytePairEmbeddings,
44 | CharacterEmbeddings,
45 | FastTextEmbeddings,
46 | FlairEmbeddings,
47 | HashEmbeddings,
48 | MuseCrosslingualEmbeddings,
49 | NILCEmbeddings,
50 | OneHotEmbeddings,
51 | PooledFlairEmbeddings,
52 | StackedEmbeddings,
53 | TokenEmbeddings,
54 | TransformerWordEmbeddings,
55 | WordEmbeddings,
56 | )
57 |
58 | __all__ = [
59 | "BPEmbSerializable",
60 | "BytePairEmbeddings",
61 | "CharLMEmbeddings",
62 | "CharacterEmbeddings",
63 | "ConvTransformNetworkImageEmbeddings",
64 | "DocumentCNNEmbeddings",
65 | "DocumentEmbeddings",
66 | "DocumentLMEmbeddings",
67 | "DocumentLSTMEmbeddings",
68 | "DocumentMeanEmbeddings",
69 | "DocumentPoolEmbeddings",
70 | "DocumentRNNEmbeddings",
71 | "DocumentTFIDFEmbeddings",
72 | "ELMoEmbeddings",
73 | "Embeddings",
74 | "FastTextEmbeddings",
75 | "FlairEmbeddings",
76 | "HashEmbeddings",
77 | "IdentityImageEmbeddings",
78 | "ImageEmbeddings",
79 | "MuseCrosslingualEmbeddings",
80 | "NILCEmbeddings",
81 | "NetworkImageEmbeddings",
82 | "OneHotEmbeddings",
83 | "PooledFlairEmbeddings",
84 | "PrecomputedImageEmbeddings",
85 | "ScalarMix",
86 | "SentenceTransformerDocumentEmbeddings",
87 | "StackedEmbeddings",
88 | "TokenEmbeddings",
89 | "TransformerDocumentEmbeddings",
90 | "TransformerEmbeddings",
91 | "TransformerJitDocumentEmbeddings",
92 | "TransformerJitWordEmbeddings",
93 | "TransformerOnnxDocumentEmbeddings",
94 | "TransformerOnnxWordEmbeddings",
95 | "TransformerWordEmbeddings",
96 | "WordEmbeddings",
97 | ]
98 |
--------------------------------------------------------------------------------
/flair/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .entity_linker_model import SpanClassifier
2 | from .entity_mention_linking import EntityMentionLinker
3 | from .language_model import LanguageModel
4 | from .lemmatizer_model import Lemmatizer
5 | from .multitask_model import MultitaskModel
6 | from .pairwise_classification_model import TextPairClassifier
7 | from .pairwise_regression_model import TextPairRegressor
8 | from .prefixed_tagger import PrefixedSequenceTagger # This import has to be after SequenceTagger!
9 | from .regexp_tagger import RegexpTagger
10 | from .relation_classifier_model import RelationClassifier
11 | from .relation_extractor_model import RelationExtractor
12 | from .sequence_tagger_model import SequenceTagger
13 | from .tars_model import FewshotClassifier, TARSClassifier, TARSTagger
14 | from .text_classification_model import TextClassifier
15 | from .text_regression_model import TextRegressor
16 | from .triple_classification_model import TextTripleClassifier
17 | from .word_tagger_model import TokenClassifier, WordTagger
18 |
19 | __all__ = [
20 | "EntityMentionLinker",
21 | "FewshotClassifier",
22 | "LanguageModel",
23 | "Lemmatizer",
24 | "MultitaskModel",
25 | "PrefixedSequenceTagger",
26 | "RegexpTagger",
27 | "RelationClassifier",
28 | "RelationExtractor",
29 | "SequenceTagger",
30 | "SpanClassifier",
31 | "TARSClassifier",
32 | "TARSTagger",
33 | "TextClassifier",
34 | "TextPairClassifier",
35 | "TextPairRegressor",
36 | "TextRegressor",
37 | "TextTripleClassifier",
38 | "TokenClassifier",
39 | "WordTagger",
40 | ]
41 |
--------------------------------------------------------------------------------
/flair/models/sequence_tagger_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/models/sequence_tagger_utils/__init__.py
--------------------------------------------------------------------------------
/flair/models/sequence_tagger_utils/crf.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | import flair
4 |
5 | START_TAG: str = ""
6 | STOP_TAG: str = ""
7 |
8 |
9 | class CRF(torch.nn.Module):
10 | """Conditional Random Field.
11 |
12 | Conditional Random Field Implementation according to sgrvinod (https://github.com/sgrvinod).
13 | Classifier which predicts single tag / class / label for given word based on not just the word,
14 | but also on previous seen annotations.
15 | """
16 |
17 | def __init__(self, tag_dictionary, tagset_size: int, init_from_state_dict: bool) -> None:
18 | """Initialize the Conditional Random Field.
19 |
20 | Args:
21 | tag_dictionary: tag dictionary in order to find ID for start and stop tags
22 | tagset_size: number of tag from tag dictionary
23 | init_from_state_dict: whether we load pretrained model from state dict
24 | """
25 | super().__init__()
26 |
27 | self.tagset_size = tagset_size
28 | # Transitions are used in the following way: transitions[to, from].
29 | self.transitions = torch.nn.Parameter(torch.randn(tagset_size, tagset_size))
30 | # If we are not using a pretrained model and train a fresh one, we need to set transitions from any tag
31 | # to START-tag and from STOP-tag to any other tag to -10000.
32 | if not init_from_state_dict:
33 | self.transitions.detach()[tag_dictionary.get_idx_for_item(START_TAG), :] = -10000
34 |
35 | self.transitions.detach()[:, tag_dictionary.get_idx_for_item(STOP_TAG)] = -10000
36 | self.to(flair.device)
37 |
38 | def forward(self, features: torch.Tensor) -> torch.Tensor:
39 | """Forward propagation of Conditional Random Field.
40 |
41 | Args:
42 | features: output from RNN / Linear layer in shape (batch size, seq len, hidden size)
43 |
44 | Returns: CRF scores (emission scores for each token + transitions prob from previous state) in shape (batch_size, seq len, tagset size, tagset size)
45 | """
46 | batch_size, seq_len = features.size()[:2]
47 |
48 | emission_scores = features
49 | emission_scores = emission_scores.unsqueeze(-1).expand(batch_size, seq_len, self.tagset_size, self.tagset_size)
50 |
51 | crf_scores = emission_scores + self.transitions.unsqueeze(0).unsqueeze(0)
52 | return crf_scores
53 |
--------------------------------------------------------------------------------
/flair/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .decoder import DeepNCMDecoder, LabelVerbalizerDecoder, PrototypicalDecoder
2 | from .dropout import LockedDropout, WordDropout
3 | from .model import Classifier, DefaultClassifier, Model
4 |
5 | __all__ = [
6 | "Classifier",
7 | "DeepNCMDecoder",
8 | "DefaultClassifier",
9 | "LabelVerbalizerDecoder",
10 | "LockedDropout",
11 | "Model",
12 | "PrototypicalDecoder",
13 | "WordDropout",
14 | ]
15 |
--------------------------------------------------------------------------------
/flair/nn/distance/__init__.py:
--------------------------------------------------------------------------------
1 | from .cosine import CosineDistance, LogitCosineDistance, NegativeScaledDotProduct
2 | from .euclidean import EuclideanDistance, EuclideanMean
3 | from .hyperbolic import HyperbolicDistance, HyperbolicMean
4 |
5 | __all__ = [
6 | "CosineDistance",
7 | "EuclideanDistance",
8 | "EuclideanMean",
9 | "HyperbolicDistance",
10 | "HyperbolicMean",
11 | "LogitCosineDistance",
12 | "NegativeScaledDotProduct",
13 | ]
14 |
--------------------------------------------------------------------------------
/flair/nn/distance/cosine.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | # Source: https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/util.py#L23
4 |
5 |
6 | def dot_product(a: torch.Tensor, b: torch.Tensor, normalize=False):
7 | """Computes dot product for pairs of vectors.
8 |
9 | Args:
10 | a: the left tensor
11 | b: the right tensor
12 | normalize: Vectors are normalized (leads to cosine similarity)
13 |
14 | Returns: Matrix with res[i][j] = dot_product(a[i], b[j])
15 | """
16 | if len(a.shape) == 1:
17 | a = a.unsqueeze(0)
18 |
19 | if len(b.shape) == 1:
20 | b = b.unsqueeze(0)
21 |
22 | if normalize:
23 | a = torch.nn.functional.normalize(a, p=2, dim=1)
24 | b = torch.nn.functional.normalize(b, p=2, dim=1)
25 |
26 | return torch.mm(a, b.transpose(0, 1))
27 |
28 |
29 | class CosineDistance(torch.nn.Module):
30 | def forward(self, a, b):
31 | return -dot_product(a, b, normalize=True)
32 |
33 |
34 | class LogitCosineDistance(torch.nn.Module):
35 | def forward(self, a, b):
36 | return torch.logit(0.5 - 0.5 * dot_product(a, b, normalize=True))
37 |
38 |
39 | class NegativeScaledDotProduct(torch.nn.Module):
40 | def forward(self, a, b):
41 | sqrt_d = torch.sqrt(torch.tensor(a.size(-1)))
42 | return -dot_product(a, b, normalize=False) / sqrt_d
43 |
--------------------------------------------------------------------------------
/flair/nn/distance/euclidean.py:
--------------------------------------------------------------------------------
1 | """Euclidean distances implemented in pytorch.
2 |
3 | This module was copied from the repository the following repository:
4 | https://github.com/asappresearch/dynamic-classification
5 |
6 | It contains the code from the paper "Metric Learning for Dynamic Text
7 | Classification".
8 |
9 | https://arxiv.org/abs/1911.01026
10 |
11 | In case this file is modified, please consider contributing to the original
12 | repository.
13 |
14 | It was published under MIT License:
15 | https://github.com/asappresearch/dynamic-classification/blob/master/LICENSE.md
16 |
17 | Source: https://github.com/asappresearch/dynamic-classification/blob/55beb5a48406c187674bea40487c011e8fa45aab/distance/euclidean.py
18 | """
19 |
20 | import torch
21 | from torch import Tensor, nn
22 |
23 |
24 | class EuclideanDistance(nn.Module):
25 | """Implement a EuclideanDistance object."""
26 |
27 | def forward(self, mat_1: Tensor, mat_2: Tensor) -> Tensor:
28 | """Returns the squared euclidean distance between each element in mat_1 and each element in mat_2.
29 |
30 | Parameters
31 | ----------
32 | mat_1: torch.Tensor
33 | matrix of shape (n_1, n_features)
34 | mat_2: torch.Tensor
35 | matrix of shape (n_2, n_features)
36 |
37 | Returns:
38 | -------
39 | dist: torch.Tensor
40 | distance matrix of shape (n_1, n_2)
41 |
42 | """
43 | return torch.cdist(mat_1, mat_2).pow(2)
44 |
45 |
46 | class EuclideanMean(nn.Module):
47 | """Implement a EuclideanMean object."""
48 |
49 | def forward(self, data: Tensor) -> Tensor:
50 | """Performs a forward pass through the network.
51 |
52 | Parameters
53 | ----------
54 | data : torch.Tensor
55 | The input data, as a float tensor
56 |
57 | Returns:
58 | -------
59 | torch.Tensor
60 | The encoded output, as a float tensor
61 |
62 | """
63 | return data.mean(0)
64 |
--------------------------------------------------------------------------------
/flair/nn/dropout.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class LockedDropout(torch.nn.Module):
5 | """Implementation of locked (or variational) dropout.
6 |
7 | Randomly drops out entire parameters in embedding space.
8 | """
9 |
10 | def __init__(self, dropout_rate=0.5, batch_first=True, inplace=False) -> None:
11 | super().__init__()
12 | self.dropout_rate = dropout_rate
13 | self.batch_first = batch_first
14 | self.inplace = inplace
15 |
16 | def forward(self, x):
17 | if not self.training or not self.dropout_rate:
18 | return x
19 |
20 | if not self.batch_first:
21 | m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - self.dropout_rate)
22 | else:
23 | m = x.data.new(x.size(0), 1, x.size(2)).bernoulli_(1 - self.dropout_rate)
24 |
25 | mask = torch.autograd.Variable(m, requires_grad=False) / (1 - self.dropout_rate)
26 | mask = mask.expand_as(x)
27 | return mask * x
28 |
29 | def extra_repr(self):
30 | inplace_str = ", inplace" if self.inplace else ""
31 | return f"p={self.dropout_rate}{inplace_str}"
32 |
33 |
34 | class WordDropout(torch.nn.Module):
35 | """Implementation of word dropout.
36 |
37 | Randomly drops out entire words (or characters) in embedding space.
38 | """
39 |
40 | def __init__(self, dropout_rate=0.05, inplace=False) -> None:
41 | super().__init__()
42 | self.dropout_rate = dropout_rate
43 | self.inplace = inplace
44 |
45 | def forward(self, x):
46 | if not self.training or not self.dropout_rate:
47 | return x
48 |
49 | m = x.data.new(x.size(0), x.size(1), 1).bernoulli_(1 - self.dropout_rate)
50 |
51 | mask = torch.autograd.Variable(m, requires_grad=False)
52 | return mask * x
53 |
54 | def extra_repr(self):
55 | inplace_str = ", inplace" if self.inplace else ""
56 | return f"p={self.dropout_rate}{inplace_str}"
57 |
--------------------------------------------------------------------------------
/flair/nn/multitask.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Iterable
2 | from typing import Union
3 |
4 | from flair.data import Corpus, MultiCorpus
5 | from flair.models import MultitaskModel
6 | from flair.nn import Classifier, Model
7 |
8 |
9 | def make_multitask_model_and_corpus(
10 | mapping: Iterable[Union[tuple[Classifier, Corpus], tuple[Classifier, Corpus, float]]]
11 | ) -> tuple[Model, Corpus]:
12 | models = []
13 | corpora = []
14 | loss_factors = []
15 | ids = []
16 |
17 | for task_id, _map in enumerate(mapping):
18 | models.append(_map[0])
19 | corpora.append(_map[1])
20 | if len(_map) == 3:
21 | loss_factors.append(_map[2])
22 | else:
23 | loss_factors.append(1.0)
24 |
25 | ids.append(f"Task_{task_id}")
26 |
27 | return MultitaskModel(models=models, task_ids=ids, loss_factors=loss_factors), MultiCorpus(corpora, ids)
28 |
--------------------------------------------------------------------------------
/flair/nn/recurrent.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 | rnn_layers = {"lstm": (nn.LSTM, 2), "gru": (nn.GRU, 1)}
4 |
5 |
6 | def create_recurrent_layer(layer_type, initial_size, hidden_size, nlayers, dropout=0, **kwargs):
7 | layer_type = layer_type.lower()
8 | assert layer_type in rnn_layers
9 | module, hidden_count = rnn_layers[layer_type]
10 |
11 | if nlayers == 1:
12 | dropout = 0
13 |
14 | return module(initial_size, hidden_size, nlayers, dropout=dropout, **kwargs), hidden_count
15 |
--------------------------------------------------------------------------------
/flair/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/py.typed
--------------------------------------------------------------------------------
/flair/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | from .language_model_trainer import LanguageModelTrainer, TextCorpus
2 | from .trainer import ModelTrainer
3 |
4 | __all__ = ["LanguageModelTrainer", "ModelTrainer", "TextCorpus"]
5 |
--------------------------------------------------------------------------------
/flair/trainers/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BasePlugin, Pluggable, TrainerPlugin, TrainingInterrupt
2 | from .functional.anneal_on_plateau import AnnealingPlugin
3 | from .functional.checkpoints import CheckpointPlugin
4 | from .functional.deepncm_trainer_plugin import DeepNCMPlugin
5 | from .functional.linear_scheduler import LinearSchedulerPlugin
6 | from .functional.reduce_transformer_vocab import ReduceTransformerVocabPlugin
7 | from .functional.weight_extractor import WeightExtractorPlugin
8 | from .loggers.clearml_logger import ClearmlLoggerPlugin
9 | from .loggers.log_file import LogFilePlugin
10 | from .loggers.loss_file import LossFilePlugin
11 | from .loggers.metric_history import MetricHistoryPlugin
12 | from .loggers.tensorboard import TensorboardLogger
13 | from .metric_records import MetricName, MetricRecord
14 |
15 | __all__ = [
16 | "AnnealingPlugin",
17 | "BasePlugin",
18 | "CheckpointPlugin",
19 | "ClearmlLoggerPlugin",
20 | "DeepNCMPlugin",
21 | "LinearSchedulerPlugin",
22 | "LogFilePlugin",
23 | "LossFilePlugin",
24 | "MetricHistoryPlugin",
25 | "MetricName",
26 | "MetricRecord",
27 | "Pluggable",
28 | "ReduceTransformerVocabPlugin",
29 | "TensorboardLogger",
30 | "TrainerPlugin",
31 | "TrainingInterrupt",
32 | "WeightExtractorPlugin",
33 | ]
34 |
--------------------------------------------------------------------------------
/flair/trainers/plugins/functional/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/trainers/plugins/functional/__init__.py
--------------------------------------------------------------------------------
/flair/trainers/plugins/functional/checkpoints.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import Any
3 |
4 | from flair.trainers.plugins.base import TrainerPlugin
5 |
6 | log = logging.getLogger("flair")
7 |
8 |
9 | class CheckpointPlugin(TrainerPlugin):
10 | def __init__(
11 | self,
12 | save_model_each_k_epochs,
13 | save_optimizer_state,
14 | base_path,
15 | ) -> None:
16 | super().__init__()
17 | self.save_optimizer_state = save_optimizer_state
18 | self.save_model_each_k_epochs = save_model_each_k_epochs
19 | self.base_path = base_path
20 |
21 | @TrainerPlugin.hook
22 | def after_training_epoch(self, epoch, **kw):
23 | """Saves the model each k epochs."""
24 | if self.save_model_each_k_epochs > 0 and epoch % self.save_model_each_k_epochs == 0:
25 | log.info(
26 | f"Saving model at current epoch since 'save_model_each_k_epochs={self.save_model_each_k_epochs}' "
27 | f"was set"
28 | )
29 | model_name = "model_epoch_" + str(epoch) + ".pt"
30 |
31 | # Use trainer's _save_model method - we have access to trainer through self.trainer
32 | self.trainer._save_model(self.base_path / model_name, save_optimizer_state=self.save_optimizer_state)
33 |
34 | @property
35 | def attach_to_all_processes(self) -> bool:
36 | return False
37 |
38 | def get_state(self) -> dict[str, Any]:
39 | return {
40 | **super().get_state(),
41 | "base_path": str(self.base_path),
42 | "save_model_each_k_epochs": self.save_model_each_k_epochs,
43 | "save_optimizer_state": self.save_optimizer_state,
44 | }
45 |
--------------------------------------------------------------------------------
/flair/trainers/plugins/functional/deepncm_trainer_plugin.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Iterable
2 |
3 | import torch
4 |
5 | from flair.models import MultitaskModel
6 | from flair.nn import DeepNCMDecoder
7 | from flair.trainers.plugins.base import TrainerPlugin
8 |
9 |
10 | class DeepNCMPlugin(TrainerPlugin):
11 | """Plugin for training DeepNCMClassifier.
12 |
13 | Handles both multitask and single-task scenarios.
14 | """
15 |
16 | @property
17 | def decoders(self) -> Iterable[DeepNCMDecoder]:
18 | """Iterator over all DeepNCMDecoder decoders in the trainer."""
19 | model = self.trainer.model
20 |
21 | models = model.tasks.values() if isinstance(model, MultitaskModel) else [model]
22 |
23 | for sub_model in models:
24 | if hasattr(sub_model, "decoder") and isinstance(sub_model.decoder, DeepNCMDecoder):
25 | yield sub_model.decoder
26 |
27 | @TrainerPlugin.hook
28 | def after_training_epoch(self, **kwargs):
29 | """Reset class counts after each training epoch."""
30 | for decoder in self.decoders:
31 | if decoder.mean_update_method == "condensation":
32 | decoder.class_counts.data = torch.ones_like(decoder.class_counts)
33 |
34 | @TrainerPlugin.hook
35 | def after_training_batch(self, **kwargs):
36 | """Update prototypes after each training batch."""
37 | for decoder in self.decoders:
38 | decoder.update_prototypes()
39 |
40 | def __str__(self) -> str:
41 | return "DeepNCMPlugin"
42 |
--------------------------------------------------------------------------------
/flair/trainers/plugins/functional/weight_extractor.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from flair.trainers.plugins.base import TrainerPlugin
4 | from flair.training_utils import WeightExtractor
5 |
6 |
7 | class WeightExtractorPlugin(TrainerPlugin):
8 | """Simple Plugin for weight extraction."""
9 |
10 | def __init__(self, base_path) -> None:
11 | super().__init__()
12 | self.base_path = base_path
13 | self.weight_extractor = WeightExtractor(base_path)
14 |
15 | @TrainerPlugin.hook
16 | def after_training_batch(self, batch_no, epoch, total_number_of_batches, **kw):
17 | """Extracts weights."""
18 | modulo = max(1, int(total_number_of_batches / 10))
19 | iteration = epoch * total_number_of_batches + batch_no
20 |
21 | if (iteration + 1) % modulo == 0:
22 | self.weight_extractor.extract_weights(self.model.state_dict(), iteration)
23 |
24 | @property
25 | def attach_to_all_processes(self) -> bool:
26 | return False
27 |
28 | def get_state(self) -> dict[str, Any]:
29 | return {
30 | **super().get_state(),
31 | "base_path": str(self.base_path),
32 | }
33 |
--------------------------------------------------------------------------------
/flair/trainers/plugins/loggers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/flair/trainers/plugins/loggers/__init__.py
--------------------------------------------------------------------------------
/flair/trainers/plugins/loggers/clearml_logger.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from flair.trainers.plugins.base import TrainerPlugin
4 | from flair.trainers.plugins.metric_records import MetricRecord
5 |
6 |
7 | class ClearmlLoggerPlugin(TrainerPlugin):
8 | def __init__(self, task_id_or_task: Any):
9 | if isinstance(task_id_or_task, str):
10 | self.task_id = task_id_or_task
11 | self.task = None
12 | else:
13 | self.task = task_id_or_task
14 | self.task_id = self.task.task_id
15 | super().__init__()
16 |
17 | @property
18 | def logger(self):
19 | try:
20 | import clearml
21 | except ImportError:
22 | raise ImportError(
23 | "Please install clearml 1.11.0 or higher before using the clearml plugin"
24 | "otherwise you can remove the clearml plugin from the training or model card."
25 | )
26 | if self.task is None:
27 | self.task = clearml.Task.get_task(task_id=self.task_id)
28 | return self.task.get_logger()
29 |
30 | @TrainerPlugin.hook
31 | def metric_recorded(self, record: MetricRecord) -> None:
32 | record_name = ".".join(record.name)
33 |
34 | if record.is_scalar:
35 | self.logger.report_scalar(record_name, record_name, record.value, record.global_step)
36 | elif record.is_scalar_list:
37 | for i, v in enumerate(record.value):
38 | self.logger.report_scalar(record_name, f"{record_name}_{i}", v, record.global_step)
39 | elif record.is_string:
40 | self.logger.report_text(record.value, print_console=False)
41 | elif record.is_histogram:
42 | self.logger.report_histogram(record_name, record_name, record.value, record.global_step)
43 |
44 | @property
45 | def attach_to_all_processes(self) -> bool:
46 | return False
47 |
--------------------------------------------------------------------------------
/flair/trainers/plugins/loggers/log_file.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from pathlib import Path
3 | from typing import Any
4 |
5 | from flair.trainers.plugins.base import TrainerPlugin
6 | from flair.training_utils import add_file_handler
7 |
8 | log = logging.getLogger("flair")
9 |
10 |
11 | class LogFilePlugin(TrainerPlugin):
12 | """Plugin for the training.log file."""
13 |
14 | def __init__(self, base_path) -> None:
15 | super().__init__()
16 | self.base_path = base_path
17 | self.log_handler = add_file_handler(log, Path(base_path) / "training.log")
18 |
19 | @TrainerPlugin.hook("_training_exception", "after_training")
20 | def close_file_handler(self, **kw):
21 | self.log_handler.close()
22 | log.removeHandler(self.log_handler)
23 |
24 | @property
25 | def attach_to_all_processes(self) -> bool:
26 | return False
27 |
28 | def get_state(self) -> dict[str, Any]:
29 | return {**super().get_state(), "base_path": str(self.base_path)}
30 |
--------------------------------------------------------------------------------
/flair/trainers/plugins/loggers/metric_history.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from collections.abc import Mapping
3 | from typing import Any
4 |
5 | from flair.trainers.plugins.base import TrainerPlugin
6 |
7 | log = logging.getLogger("flair")
8 |
9 |
10 | default_metrics_to_collect = {
11 | ("train", "loss"): "train_loss_history",
12 | ("dev", "score"): "dev_score_history",
13 | ("dev", "loss"): "dev_loss_history",
14 | }
15 |
16 |
17 | class MetricHistoryPlugin(TrainerPlugin):
18 | def __init__(self, metrics_to_collect: Mapping = default_metrics_to_collect) -> None:
19 | super().__init__()
20 |
21 | self.metric_history: dict[str, list] = {}
22 | self.metrics_to_collect: Mapping = metrics_to_collect
23 | for target in self.metrics_to_collect.values():
24 | self.metric_history[target] = []
25 |
26 | @TrainerPlugin.hook
27 | def metric_recorded(self, record):
28 | if tuple(record.name) in self.metrics_to_collect:
29 | target = self.metrics_to_collect[tuple(record.name)]
30 | self.metric_history[target].append(record.value)
31 |
32 | @TrainerPlugin.hook
33 | def after_training(self, **kw):
34 | """Returns metric history."""
35 | self.trainer.return_values.update(self.metric_history)
36 |
37 | @property
38 | def attach_to_all_processes(self) -> bool:
39 | return False
40 |
41 | def get_state(self) -> dict[str, Any]:
42 | return {
43 | **super().get_state(),
44 | "metrics_to_collect": dict(self.metrics_to_collect),
45 | }
46 |
--------------------------------------------------------------------------------
/flair/visual/__init__.py:
--------------------------------------------------------------------------------
1 | from .activations import Highlighter
2 | from .manifold import Visualizer
3 |
4 | __all__ = ["Highlighter", "Visualizer"]
5 |
--------------------------------------------------------------------------------
/flair/visual/activations.py:
--------------------------------------------------------------------------------
1 | import numpy
2 |
3 |
4 | class Highlighter:
5 | def __init__(self) -> None:
6 | self.color_map = [
7 | "#ff0000",
8 | "#ff4000",
9 | "#ff8000",
10 | "#ffbf00",
11 | "#ffff00",
12 | "#bfff00",
13 | "#80ff00",
14 | "#40ff00",
15 | "#00ff00",
16 | "#00ff40",
17 | "#00ff80",
18 | "#00ffbf",
19 | "#00ffff",
20 | "#00bfff",
21 | "#0080ff",
22 | "#0040ff",
23 | "#0000ff",
24 | "#4000ff",
25 | "#8000ff",
26 | "#bf00ff",
27 | "#ff00ff",
28 | "#ff00bf",
29 | "#ff0080",
30 | "#ff0040",
31 | "#ff0000",
32 | ]
33 |
34 | def highlight(self, activation, text):
35 | activation = activation.detach().cpu().numpy()
36 |
37 | step_size = (max(activation) - min(activation)) / len(self.color_map)
38 |
39 | lookup = numpy.array(list(numpy.arange(min(activation), max(activation), step_size)))
40 |
41 | colors = []
42 |
43 | for _i, act in enumerate(activation):
44 | try:
45 | colors.append(self.color_map[numpy.where(act > lookup)[0][-1]])
46 | except IndexError:
47 | colors.append(len(self.color_map) - 1)
48 |
49 | str_ = "
"
50 |
51 | for i, (char, color) in enumerate(zip(list(text), colors)):
52 | str_ += self._render(char, color)
53 |
54 | if i % 100 == 0 and i > 0:
55 | str_ += " "
56 |
57 | return str_
58 |
59 | def highlight_selection(self, activations, text, file_="resources/data/highlight.html", n=10):
60 | ix = numpy.random.default_rng().choice(activations.shape[1], size=n)
61 |
62 | rendered = ""
63 |
64 | for i in ix:
65 | rendered += self.highlight(activations[:, i], text)
66 |
67 | with open(file_, "w") as f:
68 | f.write(rendered)
69 |
70 | @staticmethod
71 | def _render(char, color):
72 | return f'{char}'
73 |
--------------------------------------------------------------------------------
/flair/visual/tree_printer.py:
--------------------------------------------------------------------------------
1 | from pptree import print_tree
2 |
3 | from flair.data import Sentence, Token
4 |
5 |
6 | class NodeToken:
7 | def __init__(self, token: Token, tag_type: str) -> None:
8 | self.token: Token = token
9 | self.tag_type: str = tag_type
10 | self.children: list[NodeToken] = []
11 |
12 | def set_haed(self, parent):
13 | parent.children.append(self)
14 |
15 | def __str__(self) -> str:
16 | return f" {self.token.text}({self.token.get_labels(self.tag_type)[0].value}) "
17 |
18 |
19 | def tree_printer(sentence: Sentence, tag_type: str):
20 | tree: list[NodeToken] = [NodeToken(token, tag_type) for token in sentence]
21 | for x in tree:
22 | if x.token.head_id != 0:
23 | head_token = x.token.get_head()
24 |
25 | for y in tree:
26 | if y.token == head_token:
27 | x.set_haed(y)
28 | else:
29 | root_node = x
30 | print_tree(root_node, "children")
31 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | black[jupyter]==24.2.*
2 | konoha[janome]<6.0.0
3 | mypy>=1.2.0
4 | pytest>=7.3.1
5 | pytest-black-ng==0.4.*
6 | pytest-github-actions-annotate-failures>=0.1.8
7 | pytest-mypy>=0.10.3
8 | pytest-ruff==0.3.*
9 | ruff==0.7.*
10 | types-dataclasses>=0.6.6
11 | types-Deprecated>=1.2.9.2
12 | types-requests>=2.28.11.17
13 | types-tabulate>=0.9.0.2
14 | pyab3p
15 | transformers!=4.40.1,!=4.40.0
16 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3>=1.20.27
2 | conllu>=4.0,<5.0.0
3 | deprecated>=1.2.13
4 | ftfy>=6.1.0
5 | gdown>=4.4.0
6 | huggingface-hub>=0.10.0
7 | langdetect>=1.0.9
8 | lxml>=4.8.0
9 | matplotlib>=2.2.3
10 | more-itertools>=8.13.0
11 | mpld3>=0.3
12 | pptree>=3.1
13 | python-dateutil>=2.8.2
14 | pytorch_revgrad>=0.2.0
15 | regex>=2022.1.18
16 | scikit-learn>=1.0.2
17 | segtok>=1.5.11
18 | sqlitedict>=2.0.0
19 | tabulate>=0.8.10
20 | torch>=1.13.1
21 | tqdm>=4.63.0
22 | transformer-smaller-training-vocab>=0.2.3
23 | transformers[sentencepiece]>=4.25.0,<5.0.0
24 | wikipedia-api>=0.5.7
25 | bioc<3.0.0,>=2.0.0
26 |
--------------------------------------------------------------------------------
/resources/docs/HUNFLAIR_TUTORIAL_3_ENTITY_LINKING.md:
--------------------------------------------------------------------------------
1 | # HunFlair Tutorial 3: Entity Linking
2 |
3 | After adding named entity recognition tags to your sentence, you can run named entity linking on these annotations.
4 |
5 | ```python
6 | from flair.models import EntityMentionLinker
7 | from flair.nn import Classifier
8 | from flair.tokenization import SciSpacyTokenizer
9 | from flair.data import Sentence
10 |
11 | sentence = Sentence(
12 | "The mutation in the ABCD1 gene causes X-linked adrenoleukodystrophy, "
13 | "a neurodegenerative disease, which is exacerbated by exposure to high "
14 | "levels of mercury in dolphin populations.",
15 | use_tokenizer=SciSpacyTokenizer()
16 | )
17 |
18 | ner_tagger = Classifier.load("hunflair")
19 | ner_tagger.predict(sentence)
20 |
21 | nen_tagger = EntityMentionLinker.load("disease-linker")
22 | nen_tagger.predict(sentence)
23 |
24 | nen_tagger = EntityMentionLinker.load("gene-linker")
25 | nen_tagger.predict(sentence)
26 |
27 | nen_tagger = EntityMentionLinker.load("chemical-linker")
28 | nen_tagger.predict(sentence)
29 |
30 | nen_tagger = EntityMentionLinker.load("species-linker")
31 | nen_tagger.predict(sentence)
32 |
33 | for tag in sentence.get_labels():
34 | print(tag)
35 | ```
36 |
37 | This should print:
38 |
39 | ```
40 | Span[4:5]: "ABCD1" → Gene (0.9575)
41 | Span[4:5]: "ABCD1" → abcd1 - NCBI-GENE-HUMAN:215 (14.5503)
42 | Span[7:11]: "X-linked adrenoleukodystrophy" → Disease (0.9867)
43 | Span[7:11]: "X-linked adrenoleukodystrophy" → x linked adrenoleukodystrophy - CTD-DISEASES:MESH:D000326 (13.9717)
44 | Span[13:15]: "neurodegenerative disease" → Disease (0.8865)
45 | Span[13:15]: "neurodegenerative disease" → neurodegenerative disease - CTD-DISEASES:MESH:D019636 (14.2779)
46 | Span[25:26]: "mercury" → Chemical (0.9456)
47 | Span[25:26]: "mercury" → mercury - CTD-CHEMICALS:MESH:D008628 (14.9185)
48 | Span[27:28]: "dolphin" → Species (0.8082)
49 | Span[27:28]: "dolphin" → marine dolphins - NCBI-TAXONOMY:9726 (14.473)
50 | ```
51 |
52 | The output contains both the NER disease annotations and their entity / concept identifiers according to
53 | a knowledge base or ontology. We have pre-configured combinations of models and dictionaries for
54 | "disease", "chemical" and "gene".
55 |
56 | You can also provide your own model and dictionary:
57 |
58 | ```python
59 | from flair.models import EntityMentionLinker
60 |
61 | nen_tagger = EntityMentionLinker.build("name_or_path_to_your_model",
62 | dictionary_names_or_path="name_or_path_to_your_dictionary")
63 | nen_tagger = EntityMentionLinker.build("path_to_custom_disease_model", dictionary_names_or_path="disease")
64 | ```
65 |
66 | You can use any combination of provided models, provided dictionaries and your own.
67 |
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_8_MODEL_OPTIMIZATION.md:
--------------------------------------------------------------------------------
1 | # Tutorial 8: Model Tuning
2 |
3 | **Important**: This tutorial has been removed.
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_CORPUS_CUSTOM.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4.3: Loading a Custom Corpus
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-load-custom-dataset
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
6 |
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_CORPUS_PREPARED.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4.1: Loading a Prepared Corpus
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-load-prepared-dataset
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
6 |
7 |
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_EMBEDDINGS_OVERVIEW.md:
--------------------------------------------------------------------------------
1 | # Tutorial 3: Embeddings
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/category/tutorial-3-embeddings
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_FLAIR_BASICS.md:
--------------------------------------------------------------------------------
1 | # Tutorial 1: NLP Base Types
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/basic-types
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_CIRCUS.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.6: Other Crazy Models in Flair
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/other-models
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_LINKING.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.3: Entity Linking on Your Text
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/entity-linking
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_NER.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.1: Tagging Entities in your Text
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/tagging-entities
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_OVERVIEW.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2: Tagging your Text
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/category/tutorial-1-basic-tagging
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_POS.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.4: Tagging Parts of Speech in your Text
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/part-of-speech-tagging
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_RELATIONS.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.5: Relation Extraction on Your Text
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/other-models
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TAGGING_SENTIMENT.md:
--------------------------------------------------------------------------------
1 | # Tutorial 2.2: Sentiment Analysis on Your Text
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-basics/tagging-sentiment
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TRAINING_MODELS.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4.1: How Model Training works in Flair
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-model-training-works
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
6 |
7 |
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TRAINING_OVERVIEW.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4: Training your own Models
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/category/tutorial-2-training-models
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
6 |
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TRAINING_SEQUENCE_LABELER.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4.4: Training Sequence Labeling Models
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-train-sequence-tagger
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
6 |
--------------------------------------------------------------------------------
/resources/docs/TUTORIAL_TRAINING_TEXT_CLASSIFIER.md:
--------------------------------------------------------------------------------
1 | # Tutorial 4.5: Training Text Classification Models
2 |
3 | **Important**: This tutorial has been moved to https://flairnlp.github.io/docs/tutorial-training/how-to-train-text-classifier
4 |
5 | All Flair documentation is now found at: https://flairnlp.github.io/
--------------------------------------------------------------------------------
/resources/docs/embeddings/BYTE_PAIR_EMBEDDINGS.md:
--------------------------------------------------------------------------------
1 | # Byte Pair Embeddings
2 |
3 | `BytePairEmbeddings` are word embeddings that are precomputed on the subword-level. This means that they are able to
4 | embed any word by splitting words into subwords and looking up their embeddings. `BytePairEmbeddings` were proposed
5 | and computed by [Heinzerling and Strube (2018)](https://www.aclweb.org/anthology/L18-1473) who found that they offer nearly the same accuracy as word embeddings, but at a fraction
6 | of the model size. So they are a great choice if you want to train small models.
7 |
8 | You initialize with a language code (275 languages supported), a number of 'syllables' (one of ) and
9 | a number of dimensions (one of 50, 100, 200 or 300). The following initializes and uses byte pair embeddings
10 | for English:
11 |
12 | ```python
13 | from flair.embeddings import BytePairEmbeddings
14 |
15 | # init embedding
16 | embedding = BytePairEmbeddings('en')
17 |
18 | # create a sentence
19 | sentence = Sentence('The grass is green .')
20 |
21 | # embed words in sentence
22 | embedding.embed(sentence)
23 | ```
24 |
25 | More information can be found
26 | on the [byte pair embeddings](https://nlp.h-its.org/bpemb/) web page.
27 |
28 | `BytePairEmbeddings` also have a multilingual model capable of embedding any word in any language.
29 | You can instantiate it with:
30 |
31 | ```python
32 | # init embedding
33 | embedding = BytePairEmbeddings('multi')
34 | ```
35 |
36 | You can also load custom `BytePairEmbeddings` by specifying a path to model_file_path and embedding_file_path arguments. They correspond respectively to a SentencePiece model file and to an embedding file (Word2Vec plain text or GenSim binary). For example:
37 |
38 | ```python
39 | # init custom embedding
40 | embedding = BytePairEmbeddings(model_file_path='your/path/m.model', embedding_file_path='your/path/w2v.txt')
41 | ```
42 |
--------------------------------------------------------------------------------
/resources/docs/embeddings/CHARACTER_EMBEDDINGS.md:
--------------------------------------------------------------------------------
1 | # Character Embeddings
2 |
3 | `CharacterEmbeddings` allow you to add character-level word embeddings during model training. Note that these embeddings
4 | are randomly initialized when you initialize the class, so they are not meaningful unless you train them on a specific
5 | downstream task.
6 |
7 | For instance, the standard sequence labeling architecture used by [Lample et al. (2016)](https://www.aclweb.org/anthology/N16-1030) is a combination of classic word embeddings with task-trained character features. Normally this would require you to implement a [hierarchical embedding architecture](http://neuroner.com/NeuroNERengine_with_caption_no_figure.png) in which character-level embeddings for each word are computed using an RNN and then concatenated with word embeddings.
8 |
9 | In Flair, we simplify this by treating `CharacterEmbeddings` just like any other embedding class. To reproduce the
10 | Lample architecture, you need only combine them with standard `WordEmbeddings` in an embedding stack:
11 |
12 |
13 | ```python
14 | # init embedding stack
15 | embedding = StackedEmbeddings(
16 | [
17 | # standard word embeddings
18 | WordEmbeddings('glove'),
19 |
20 | # character-level features
21 | CharacterEmbeddings(),
22 | ]
23 | )
24 | ```
25 |
26 | If you pass this stacked embedding to a train method, the character-level features will now automatically be trained
27 | for your downstream task.
28 |
--------------------------------------------------------------------------------
/resources/docs/embeddings/ELMO_EMBEDDINGS.md:
--------------------------------------------------------------------------------
1 | # ELMo Embeddings
2 |
3 | [ELMo embeddings](http://www.aclweb.org/anthology/N18-1202) were presented by Peters et al. in 2018. They are using
4 | a bidirectional recurrent neural network to predict the next word in a text.
5 | We are using the implementation of [AllenNLP](https://allennlp.org/elmo). As this implementation comes with a lot of
6 | sub-dependencies, which we don't want to include in Flair, you need to first install the library via
7 | `pip install allennlp==0.9.0` before you can use it in Flair.
8 | Using the embeddings is as simple as using any other embedding type:
9 |
10 | ```python
11 | from flair.embeddings import ELMoEmbeddings
12 |
13 | # init embedding
14 | embedding = ELMoEmbeddings()
15 |
16 | # create a sentence
17 | sentence = Sentence('The grass is green .')
18 |
19 | # embed words in sentence
20 | embedding.embed(sentence)
21 | ```
22 |
23 | ELMo word embeddings can be constructed by combining ELMo layers in different ways. The available combination strategies are:
24 | - `"all"`: Use the concatenation of the three ELMo layers.
25 | - `"top"`: Use the top ELMo layer.
26 | - `"average"`: Use the average of the three ELMo layers.
27 |
28 | By default, the top 3 layers are concatenated to form the word embedding.
29 |
30 | AllenNLP provides the following pre-trained models. To use any of the following models inside Flair
31 | simple specify the embedding id when initializing the `ELMoEmbeddings`.
32 |
33 | | ID | Language | Embedding |
34 | | ------------- | ------------- | ------------- |
35 | | 'small' | English | 1024-hidden, 1 layer, 14.6M parameters |
36 | | 'medium' | English | 2048-hidden, 1 layer, 28.0M parameters |
37 | | 'original' | English | 4096-hidden, 2 layers, 93.6M parameters |
38 | | 'large' | English | |
39 | | 'pt' | Portuguese | |
40 | | 'pubmed' | English biomedical data | [more information](https://allennlp.org/elmo) |
41 |
--------------------------------------------------------------------------------
/resources/docs/embeddings/FASTTEXT_EMBEDDINGS.md:
--------------------------------------------------------------------------------
1 | # FastText Embeddings
2 |
3 | FastText Embeddings can give you vectors for out of vocabulary(oov) words by using the sub-word information.
4 | To use this functionality with Flair, use `FastTextEmbeddings` class as shown:
5 |
6 | ```python
7 | from flair.embeddings import FastTextEmbeddings
8 |
9 | # init embedding
10 | embedding = FastTextEmbeddings('/path/to/local/custom_fasttext_embeddings.bin')
11 |
12 | # create a sentence
13 | sentence = Sentence('The grass is green .')
14 |
15 | # embed words in sentence
16 | embedding.embed(sentence)
17 | ```
18 |
19 | You can initialize the class by passing the remote downloadable URL as well.
20 |
21 | ```python
22 | embedding = FastTextEmbeddings('/path/to/remote/downloadable/custom_fasttext_embeddings.bin', use_local=False)
23 | ```
24 |
25 | Note that FastText embeddings typically have huge models resulting in equally huge models for downstream tasks.
26 |
27 | Alternatively, you can use FastText embeddings without the oov functionality by using normal `WordEmbeddings` which
28 | are smaller and get
29 | the oov functionality from the `BytePairEmbeddings` which are tiny. So, instead of using English `FastTextEmbeddings`
30 | with oov handling, you could use this stack:
31 |
32 | ```python
33 | from flair.embeddings import WordEmbeddings, BytePairEmbeddings, StackedEmbeddings
34 |
35 | # init embedding
36 | embedding = StackedEmbeddings(
37 | [
38 | # standard FastText word embeddings for English
39 | WordEmbeddings('en'),
40 | # Byte pair embeddings for English
41 | BytePairEmbeddings('en'),
42 | ]
43 | )
44 |
45 | # create a sentence
46 | sentence = Sentence('The grass is green .')
47 |
48 | # embed words in sentence
49 | embedding.embed(sentence)
50 | ```
51 |
--------------------------------------------------------------------------------
/resources/docs/flair_logo_2020.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020.png
--------------------------------------------------------------------------------
/resources/docs/flair_logo_2020_FINAL_day_dpi72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020_FINAL_day_dpi72.png
--------------------------------------------------------------------------------
/resources/docs/flair_logo_2020_FINAL_night_dpi72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020_FINAL_night_dpi72.png
--------------------------------------------------------------------------------
/resources/docs/flair_logo_2020_FINAL_night_light_dpi72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/resources/docs/flair_logo_2020_FINAL_night_light_dpi72.png
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from setuptools import find_packages, setup
4 |
5 | required = Path("requirements.txt").read_text(encoding="utf-8").split("\n")
6 |
7 | setup(
8 | name="flair",
9 | version="0.15.1",
10 | description="A very simple framework for state-of-the-art NLP",
11 | long_description=Path("README.md").read_text(encoding="utf-8"),
12 | long_description_content_type="text/markdown",
13 | author="Alan Akbik",
14 | author_email="alan.akbik@gmail.com",
15 | url="https://github.com/flairNLP/flair",
16 | packages=find_packages(exclude=["tests", "tests.*"]), # same as name
17 | license="MIT",
18 | install_requires=required,
19 | extras_require={
20 | "word-embeddings": ["gensim>=4.2.0", "bpemb>=0.3.5"],
21 | },
22 | include_package_data=True,
23 | python_requires=">=3.9",
24 | )
25 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/__init__.py
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 | import torch
5 |
6 | import flair
7 |
8 |
9 | @pytest.fixture(scope="module")
10 | def resources_path():
11 | return Path(__file__).parent / "resources"
12 |
13 |
14 | @pytest.fixture(scope="module")
15 | def tasks_base_path(resources_path):
16 | return resources_path / "tasks"
17 |
18 |
19 | @pytest.fixture()
20 | def results_base_path(resources_path):
21 | path = resources_path / "results"
22 | try:
23 | yield path
24 | finally:
25 | for p in reversed(list(path.rglob("*"))):
26 | if p.is_file():
27 | p.unlink()
28 | else:
29 | p.rmdir()
30 | if path.is_dir():
31 | path.rmdir()
32 |
33 |
34 | @pytest.fixture(autouse=True)
35 | def set_cpu(force_cpu):
36 | if force_cpu:
37 | flair.device = torch.device("cpu")
38 |
39 |
40 | def pytest_addoption(parser):
41 | parser.addoption(
42 | "--runintegration",
43 | action="store_true",
44 | default=False,
45 | help="run integration tests",
46 | )
47 | parser.addoption(
48 | "--force-cpu",
49 | action="store_true",
50 | default=False,
51 | help="use cpu for tests even when gpu is available",
52 | )
53 |
54 |
55 | def pytest_collection_modifyitems(config, items):
56 | if not config.getoption("--runintegration"):
57 | skip_integration = pytest.mark.skip(reason="need --runintegration option to run")
58 | for item in items:
59 | if "integration" in item.keywords:
60 | item.add_marker(skip_integration)
61 |
62 |
63 | def pytest_generate_tests(metafunc):
64 | option_value = metafunc.config.getoption("--force-cpu")
65 | if "force_cpu" in metafunc.fixturenames and option_value is not None:
66 | metafunc.parametrize("force_cpu", [option_value])
67 |
--------------------------------------------------------------------------------
/tests/embeddings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/embeddings/__init__.py
--------------------------------------------------------------------------------
/tests/embeddings/test_byte_pair_embeddings.py:
--------------------------------------------------------------------------------
1 | from flair.embeddings import BytePairEmbeddings
2 | from tests.embedding_test_utils import BaseEmbeddingsTest
3 |
4 |
5 | class TestBytePairEmbeddings(BaseEmbeddingsTest):
6 | embedding_cls = BytePairEmbeddings
7 | is_token_embedding = True
8 | is_document_embedding = False
9 | default_args = {"language": "en"}
10 |
--------------------------------------------------------------------------------
/tests/embeddings/test_document_transform_word_embeddings.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from flair.embeddings import (
4 | DocumentCNNEmbeddings,
5 | DocumentLMEmbeddings,
6 | DocumentPoolEmbeddings,
7 | DocumentRNNEmbeddings,
8 | FlairEmbeddings,
9 | TokenEmbeddings,
10 | WordEmbeddings,
11 | )
12 | from tests.embedding_test_utils import BaseEmbeddingsTest
13 |
14 | word: TokenEmbeddings = WordEmbeddings("turian")
15 | flair_embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast")
16 | flair_embedding_back: TokenEmbeddings = FlairEmbeddings("news-backward-fast")
17 |
18 |
19 | class BaseDocumentsViaWordEmbeddingsTest(BaseEmbeddingsTest):
20 | is_document_embedding = True
21 | is_token_embedding = False
22 | base_embeddings: list[TokenEmbeddings] = [word, flair_embedding]
23 |
24 | def create_embedding_from_name(self, name: str):
25 | """Overwrite this method if it is more complex to load an embedding by name."""
26 | assert self.name_field is not None
27 | kwargs = dict(self.default_args)
28 | kwargs.pop(self.name_field)
29 | return self.embedding_cls(name, **kwargs) # type: ignore[call-arg]
30 |
31 | def create_embedding_with_args(self, args: dict[str, Any]):
32 | kwargs = dict(self.default_args)
33 | for k, v in args.items():
34 | kwargs[k] = v
35 | return self.embedding_cls(self.base_embeddings, **kwargs) # type: ignore[call-arg]
36 |
37 |
38 | class TestDocumentLstmEmbeddings(BaseDocumentsViaWordEmbeddingsTest):
39 | embedding_cls = DocumentRNNEmbeddings
40 | default_args = {
41 | "hidden_size": 128,
42 | "bidirectional": False,
43 | }
44 | valid_args = [{"bidirectional": False}, {"bidirectional": True}]
45 |
46 |
47 | class TestDocumentPoolEmbeddings(BaseDocumentsViaWordEmbeddingsTest):
48 | embedding_cls = DocumentPoolEmbeddings
49 | default_args = {
50 | "fine_tune_mode": "nonlinear",
51 | }
52 | valid_args = [{"pooling": "mean"}, {"pooling": "max"}, {"pooling": "min"}]
53 |
54 |
55 | class TestDocumentCNNEmbeddings(BaseDocumentsViaWordEmbeddingsTest):
56 | embedding_cls = DocumentCNNEmbeddings
57 | default_args = {
58 | "kernels": ((50, 2), (50, 3)),
59 | }
60 | valid_args = [{"reproject_words_dimension": None}, {"reproject_words_dimension": 100}]
61 |
62 |
63 | class TestDocumentLMEmbeddings(BaseDocumentsViaWordEmbeddingsTest):
64 | embedding_cls = DocumentLMEmbeddings
65 | base_embeddings = [flair_embedding, flair_embedding_back]
66 | default_args: dict[str, Any] = {}
67 |
--------------------------------------------------------------------------------
/tests/embeddings/test_flair_embeddings.py:
--------------------------------------------------------------------------------
1 | from flair.data import Dictionary, Sentence
2 | from flair.embeddings import (
3 | DocumentLMEmbeddings,
4 | DocumentRNNEmbeddings,
5 | FlairEmbeddings,
6 | )
7 | from flair.models import LanguageModel
8 | from tests.embedding_test_utils import BaseEmbeddingsTest
9 |
10 |
11 | class TestFlairEmbeddings(BaseEmbeddingsTest):
12 | embedding_cls = FlairEmbeddings
13 | is_token_embedding = True
14 | is_document_embedding = False
15 | default_args = {"model": "news-forward-fast"}
16 |
17 | name_field = "model"
18 | invalid_names = ["other", "not/existing/path/to/embeddings"]
19 |
20 | def test_fine_tunable_flair_embedding(self):
21 | language_model_forward = LanguageModel(Dictionary.load("chars"), is_forward_lm=True, hidden_size=32, nlayers=1)
22 |
23 | embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
24 | [FlairEmbeddings(language_model_forward, fine_tune=True)],
25 | hidden_size=128,
26 | bidirectional=False,
27 | )
28 |
29 | sentence: Sentence = Sentence("I love Berlin.")
30 |
31 | embeddings.embed(sentence)
32 |
33 | assert len(sentence.get_embedding()) == 128
34 | assert len(sentence.get_embedding()) == embeddings.embedding_length
35 |
36 | sentence.clear_embeddings()
37 |
38 | assert len(sentence.get_embedding()) == 0
39 |
40 | embeddings: DocumentLMEmbeddings = DocumentLMEmbeddings(
41 | [FlairEmbeddings(language_model_forward, fine_tune=True)]
42 | )
43 |
44 | sentence: Sentence = Sentence("I love Berlin.")
45 |
46 | embeddings.embed(sentence)
47 |
48 | assert len(sentence.get_embedding()) == 32
49 | assert len(sentence.get_embedding()) == embeddings.embedding_length
50 |
51 | sentence.clear_embeddings()
52 |
53 | assert len(sentence.get_embedding()) == 0
54 | del embeddings
55 |
--------------------------------------------------------------------------------
/tests/embeddings/test_simple_token_embeddings.py:
--------------------------------------------------------------------------------
1 | from flair.data import Dictionary
2 | from flair.embeddings import CharacterEmbeddings, HashEmbeddings, OneHotEmbeddings
3 | from tests.embedding_test_utils import BaseEmbeddingsTest
4 |
5 | vocab_dictionary = Dictionary(add_unk=True)
6 | vocab_dictionary.add_item("I")
7 | vocab_dictionary.add_item("love")
8 | vocab_dictionary.add_item("berlin")
9 |
10 |
11 | class TestCharacterEmbeddings(BaseEmbeddingsTest):
12 | embedding_cls = CharacterEmbeddings
13 | is_token_embedding = True
14 | is_document_embedding = False
15 | default_args = {"path_to_char_dict": None}
16 |
17 |
18 | class TestOneHotEmbeddings(BaseEmbeddingsTest):
19 | embedding_cls = OneHotEmbeddings
20 | is_token_embedding = True
21 | is_document_embedding = False
22 | default_args = {"vocab_dictionary": vocab_dictionary}
23 |
24 |
25 | class TestHashEmbeddings(BaseEmbeddingsTest):
26 | embedding_cls = HashEmbeddings
27 | is_token_embedding = True
28 | is_document_embedding = False
29 | default_args = {"num_embeddings": 10}
30 |
--------------------------------------------------------------------------------
/tests/embeddings/test_stacked_embeddings.py:
--------------------------------------------------------------------------------
1 | from flair.data import Sentence
2 | from flair.embeddings import (
3 | FlairEmbeddings,
4 | StackedEmbeddings,
5 | TokenEmbeddings,
6 | WordEmbeddings,
7 | )
8 | from flair.embeddings.base import load_embeddings
9 |
10 |
11 | def test_stacked_embeddings():
12 | glove: TokenEmbeddings = WordEmbeddings("turian")
13 | flair_embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast")
14 | embeddings: StackedEmbeddings = StackedEmbeddings([glove, flair_embedding])
15 |
16 | sentence: Sentence = Sentence("I love Berlin. Berlin is a great place to live.")
17 | embeddings.embed(sentence)
18 |
19 | for token in sentence.tokens:
20 | assert len(token.get_embedding()) == 1074
21 |
22 | token.clear_embeddings()
23 |
24 | assert len(token.get_embedding()) == 0
25 | del embeddings
26 |
27 |
28 | def test_stacked_embeddings_stay_the_same_after_saving_and_loading():
29 | glove: TokenEmbeddings = WordEmbeddings("turian")
30 | flair_embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast")
31 | embeddings: StackedEmbeddings = StackedEmbeddings([glove, flair_embedding])
32 |
33 | assert not embeddings.training
34 |
35 | sentence_old: Sentence = Sentence("I love Berlin")
36 | embeddings.embed(sentence_old)
37 | names_old = embeddings.get_names()
38 | embedding_length_old = embeddings.embedding_length
39 |
40 | save_data = embeddings.save_embeddings(use_state_dict=True)
41 | new_embeddings = load_embeddings(save_data)
42 |
43 | sentence_new: Sentence = Sentence("I love Berlin")
44 | new_embeddings.embed(sentence_new)
45 | names_new = new_embeddings.get_names()
46 | embedding_length_new = new_embeddings.embedding_length
47 |
48 | assert not new_embeddings.training
49 | assert names_old == names_new
50 | assert embedding_length_old == embedding_length_new
51 |
52 | for token_old, token_new in zip(sentence_old, sentence_new):
53 | assert (token_old.get_embedding(names_old) == token_new.get_embedding(names_new)).all()
54 |
--------------------------------------------------------------------------------
/tests/embeddings/test_tfidf_embeddings.py:
--------------------------------------------------------------------------------
1 | from flair.data import Sentence
2 | from flair.embeddings import DocumentTFIDFEmbeddings
3 | from tests.embedding_test_utils import BaseEmbeddingsTest
4 |
5 |
6 | class TFIDFEmbeddingsTest(BaseEmbeddingsTest):
7 | embedding_cls = DocumentTFIDFEmbeddings
8 | is_document_embedding = True
9 | is_token_embedding = False
10 |
11 | default_args = {
12 | "train_dataset": [
13 | Sentence("This is a sentence"),
14 | Sentence("This is another sentence"),
15 | Sentence("another a This I Berlin"),
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/tests/embeddings/test_transformer_document_embeddings.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from flair.data import Dictionary, Sentence
4 | from flair.embeddings import TransformerDocumentEmbeddings
5 | from flair.models import TextClassifier
6 | from flair.nn import Classifier
7 | from tests.embedding_test_utils import BaseEmbeddingsTest
8 |
9 |
10 | class TestTransformerDocumentEmbeddings(BaseEmbeddingsTest):
11 | embedding_cls = TransformerDocumentEmbeddings
12 | is_document_embedding = True
13 | is_token_embedding = False
14 | default_args = {"model": "distilbert-base-uncased", "allow_long_sentences": False}
15 | valid_args = [
16 | {"layers": "-1,-2,-3,-4", "layer_mean": False},
17 | {"layers": "all", "layer_mean": True},
18 | {"layers": "all", "layer_mean": False},
19 | ]
20 |
21 | name_field = "embeddings"
22 | invalid_names = ["other", "not/existing/path/to/embeddings"]
23 |
24 |
25 | def test_if_loaded_embeddings_have_all_attributes(tasks_base_path):
26 | # dummy model with embeddings
27 | embeddings = TransformerDocumentEmbeddings(
28 | "distilbert-base-uncased",
29 | use_context=True,
30 | use_context_separator=False,
31 | )
32 |
33 | model = TextClassifier(label_type="ner", label_dictionary=Dictionary(), embeddings=embeddings)
34 |
35 | # save the dummy and load it again
36 | model.save(tasks_base_path / "single.pt")
37 | loaded_single_task = Classifier.load(tasks_base_path / "single.pt")
38 |
39 | # check that context_length and use_context_separator is the same for both
40 | assert model.embeddings.context_length == loaded_single_task.embeddings.context_length
41 | assert model.embeddings.use_context_separator == loaded_single_task.embeddings.use_context_separator
42 |
43 |
44 | @pytest.mark.parametrize("cls_pooling", ["cls", "mean", "max"])
45 | def test_cls_pooling(cls_pooling):
46 | embeddings = TransformerDocumentEmbeddings(
47 | model="distilbert-base-uncased",
48 | layers="-1",
49 | cls_pooling=cls_pooling,
50 | allow_long_sentences=True,
51 | )
52 | sentence = Sentence("Today is a good day.")
53 | embeddings.embed(sentence)
54 | assert sentence.embedding is not None
55 |
--------------------------------------------------------------------------------
/tests/embeddings/test_word_embeddings.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from flair.embeddings import MuseCrosslingualEmbeddings, NILCEmbeddings, WordEmbeddings
4 | from tests.embedding_test_utils import BaseEmbeddingsTest
5 |
6 |
7 | class TestWordEmbeddings(BaseEmbeddingsTest):
8 | embedding_cls = WordEmbeddings
9 | is_token_embedding = True
10 | is_document_embedding = False
11 | default_args = {"embeddings": "turian"}
12 |
13 | name_field = "embeddings"
14 | invalid_names = ["other", "not/existing/path/to/embeddings"]
15 |
16 |
17 | class TestMuseCrosslingualEmbeddings(BaseEmbeddingsTest):
18 | embedding_cls = MuseCrosslingualEmbeddings
19 | is_token_embedding = True
20 | is_document_embedding = False
21 | default_args: dict[str, Any] = {}
22 |
23 |
24 | class TestNILCEmbeddings(BaseEmbeddingsTest):
25 | embedding_cls = NILCEmbeddings
26 | is_token_embedding = True
27 | is_document_embedding = False
28 | default_args = {"embeddings": "fasttext", "model": "cbow", "size": 50}
29 | valid_args = [{"embeddings": "glove"}]
30 |
31 | name_field = "embeddings"
32 | invalid_names = ["other", "not/existing/path/to/embeddings"]
33 |
--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/models/__init__.py
--------------------------------------------------------------------------------
/tests/models/test_entity_linker.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from flair.data import Sentence
4 | from flair.datasets import NEL_ENGLISH_AIDA
5 | from flair.embeddings import TransformerWordEmbeddings
6 | from flair.models import SpanClassifier
7 | from tests.model_test_utils import BaseModelTest
8 |
9 |
10 | class TestEntityLinker(BaseModelTest):
11 | model_cls = SpanClassifier
12 | train_label_type = "nel"
13 | training_args = {"max_epochs": 2}
14 |
15 | @pytest.fixture()
16 | def embeddings(self):
17 | return TransformerWordEmbeddings(model="distilbert-base-uncased", layers="-1", fine_tune=True)
18 |
19 | @pytest.fixture()
20 | def corpus(self, tasks_base_path):
21 | return NEL_ENGLISH_AIDA().downsample(0.01)
22 |
23 | @pytest.fixture()
24 | def train_test_sentence(self):
25 | sentence = Sentence("I love NYC and hate OYC")
26 |
27 | sentence[2:3].add_label("nel", "New York City")
28 | sentence[5:6].add_label("nel", "Old York City")
29 | return sentence
30 |
31 | @pytest.fixture()
32 | def labeled_sentence(self):
33 | sentence = Sentence("I love NYC and hate OYC")
34 |
35 | sentence[2:3].add_label("nel", "New York City")
36 | sentence[5:6].add_label("nel", "Old York City")
37 | return sentence
38 |
--------------------------------------------------------------------------------
/tests/models/test_model_license.py:
--------------------------------------------------------------------------------
1 | from flair.nn import Model
2 |
3 |
4 | def test_model_license_persistence(tmp_path):
5 | """Test setting and persisting license information for a model."""
6 | # Create temporary file path using pytest's tmp_path fixture
7 | model_path = tmp_path / "test_model_license.pt"
8 |
9 | # Load a base model
10 | model = Model.load("ner-fast")
11 |
12 | # Check initial license (should be none/default)
13 | assert model.license_info == "No license information available"
14 |
15 | # Set a new license
16 | test_license = "MIT License - Copyright (c) 2024"
17 | model.license_info = test_license
18 | assert model.license_info == test_license
19 |
20 | # Save the model with the new license
21 | model.save(str(model_path))
22 |
23 | # Load the saved model and check license persists
24 | loaded_model = Model.load(model_path)
25 | assert loaded_model.license_info == test_license
26 |
--------------------------------------------------------------------------------
/tests/models/test_regexp_tagger.py:
--------------------------------------------------------------------------------
1 | from flair.data import Sentence
2 | from flair.models import RegexpTagger
3 |
4 |
5 | def test_regexp_tagger():
6 |
7 | sentence = Sentence('Der sagte: "das ist durchaus interessant"')
8 |
9 | tagger = RegexpTagger(
10 | mapping=[(r'["„»]((?:(?=(\\?))\2.)*?)[”"“«]', "quote_part", 1), (r'["„»]((?:(?=(\\?))\2.)*?)[”"“«]', "quote")]
11 | )
12 |
13 | tagger.predict(sentence)
14 |
15 | assert sentence.get_label("quote_part").data_point.text == "das ist durchaus interessant"
16 | assert sentence.get_label("quote").data_point.text == '"das ist durchaus interessant"'
17 |
--------------------------------------------------------------------------------
/tests/models/test_relation_extractor.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from flair.data import Sentence
4 | from flair.datasets import ColumnCorpus
5 | from flair.embeddings import TransformerWordEmbeddings
6 | from flair.models import RelationExtractor
7 | from tests.model_test_utils import BaseModelTest
8 |
9 |
10 | class TestRelationExtractor(BaseModelTest):
11 | model_cls = RelationExtractor
12 | train_label_type = "relation"
13 | pretrained_model = "relations"
14 | model_args = {
15 | "entity_label_type": "ner",
16 | "train_on_gold_pairs_only": True,
17 | "entity_pair_filters": { # Define valid entity pair combinations, used as relation candidates
18 | ("ORG", "PER"), # founded_by
19 | ("LOC", "PER"), # place_of_birth
20 | },
21 | }
22 | training_args = {
23 | "max_epochs": 4,
24 | "mini_batch_size": 4,
25 | "learning_rate": 0.1,
26 | }
27 |
28 | @pytest.fixture()
29 | def corpus(self, tasks_base_path):
30 | return ColumnCorpus(
31 | data_folder=tasks_base_path / "conllu",
32 | train_file="train.conllup",
33 | dev_file="train.conllup",
34 | test_file="train.conllup",
35 | column_format={1: "text", 2: "pos", 3: "ner"},
36 | )
37 |
38 | @pytest.fixture()
39 | def example_sentence(self):
40 | sentence = Sentence(["Microsoft", "was", "found", "by", "Bill", "Gates"])
41 | sentence[:1].add_label(typename="ner", value="ORG", score=1.0)
42 | sentence[4:].add_label(typename="ner", value="PER", score=1.0)
43 | return sentence
44 |
45 | @pytest.fixture()
46 | def train_test_sentence(self):
47 | sentence = Sentence(["Apple", "was", "founded", "by", "Steve", "Jobs", "."])
48 | sentence[0:1].add_label("ner", "ORG")
49 | sentence[4:6].add_label("ner", "PER")
50 | return sentence
51 |
52 | @pytest.fixture()
53 | def embeddings(self):
54 | return TransformerWordEmbeddings(model="distilbert-base-uncased", fine_tune=True)
55 |
56 | def assert_training_example(self, predicted_training_example):
57 | relations = predicted_training_example.get_relations("relation")
58 | assert len(relations) == 1
59 | assert relations[0].tag == "founded_by"
60 |
61 | def has_embedding(self, sentence):
62 | return all(token.get_embedding().cpu().numpy().size != 0 for token in sentence)
63 |
--------------------------------------------------------------------------------
/tests/models/test_text_regressor.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import flair
4 | from flair.embeddings import DocumentRNNEmbeddings, WordEmbeddings
5 | from flair.models.text_regression_model import TextRegressor
6 | from tests.model_test_utils import BaseModelTest
7 |
8 |
9 | class TestTextRegressor(BaseModelTest):
10 | model_cls = TextRegressor
11 | train_label_type = "regression"
12 | training_args = {
13 | "max_epochs": 3,
14 | "mini_batch_size": 2,
15 | "learning_rate": 0.1,
16 | "main_evaluation_metric": ("correlation", "pearson"),
17 | }
18 |
19 | def build_model(self, embeddings, label_dict, **kwargs):
20 | # no need for label_dict
21 | return self.model_cls(embeddings, self.train_label_type)
22 |
23 | @pytest.fixture()
24 | def embeddings(self):
25 | glove_embedding = WordEmbeddings("turian")
26 | return DocumentRNNEmbeddings([glove_embedding], 128, 1, False, 64, False, False)
27 |
28 | @pytest.fixture()
29 | def corpus(self, tasks_base_path):
30 | return flair.datasets.ClassificationCorpus(tasks_base_path / "regression", label_type=self.train_label_type)
31 |
--------------------------------------------------------------------------------
/tests/models/test_word_tagger.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import flair
4 | from flair.embeddings import TransformerWordEmbeddings
5 | from flair.models import TokenClassifier
6 | from tests.model_test_utils import BaseModelTest
7 |
8 |
9 | class TestWordTagger(BaseModelTest):
10 | model_cls = TokenClassifier
11 | train_label_type = "pos"
12 | training_args = {
13 | "max_epochs": 2,
14 | "learning_rate": 0.1,
15 | "mini_batch_size": 2,
16 | }
17 |
18 | def has_embedding(self, sentence):
19 | for token in sentence:
20 | if token.get_embedding().cpu().numpy().size == 0:
21 | return False
22 | return None
23 |
24 | def build_model(self, embeddings, label_dict, **kwargs):
25 | model_args = dict(self.model_args)
26 | for k in kwargs:
27 | if k in model_args:
28 | del model_args[k]
29 | return self.model_cls(
30 | embeddings=embeddings,
31 | label_dictionary=label_dict,
32 | label_type=self.train_label_type,
33 | **model_args,
34 | **kwargs,
35 | )
36 |
37 | @pytest.fixture()
38 | def corpus(self, tasks_base_path):
39 | return flair.datasets.UD_ENGLISH(tasks_base_path)
40 |
41 | @pytest.fixture()
42 | def embeddings(self):
43 | return TransformerWordEmbeddings("distilbert-base-uncased")
44 |
--------------------------------------------------------------------------------
/tests/resources/corpora/lorem_ipsum/test.txt:
--------------------------------------------------------------------------------
1 | Adipiscing commodo elit at imperdiet. Consequat interdum varius sit amet mattis vulputate enim nulla. Nulla aliquet porttitor lacus luctus accumsan tortor. Curabitur gravida arcu ac tortor. Adipiscing elit pellentesque habitant morbi. Sed viverra tellus in hac habitasse platea dictumst. Turpis cursus in hac habitasse. Pharetra vel turpis nunc eget. Enim facilisis gravida neque convallis a cras semper auctor neque. Interdum posuere lorem ipsum dolor sit amet consectetur adipiscing elit.
2 |
3 | Mauris sit amet massa vitae tortor condimentum lacinia. Neque gravida in fermentum et sollicitudin. Blandit volutpat maecenas volutpat blandit aliquam. Gravida neque convallis a cras semper auctor neque vitae. Viverra aliquet eget sit amet tellus cras adipiscing enim eu. Risus sed vulputate odio ut enim blandit volutpat maecenas. Amet tellus cras adipiscing enim eu. Viverra tellus in hac habitasse platea dictumst vestibulum rhoncus est. Magna etiam tempor orci eu lobortis elementum. Leo vel fringilla est ullamcorper eget. Nisl nisi scelerisque eu ultrices. Eros donec ac odio tempor orci dapibus ultrices in. Nisl nisi scelerisque eu ultrices vitae auctor eu augue. Hac habitasse platea dictumst vestibulum rhoncus est pellentesque elit. Habitasse platea dictumst vestibulum rhoncus est pellentesque elit. In ornare quam viverra orci sagittis. Morbi quis commodo odio aenean. Nam at lectus urna duis convallis convallis tellus id interdum.
--------------------------------------------------------------------------------
/tests/resources/corpora/lorem_ipsum/valid.txt:
--------------------------------------------------------------------------------
1 | Nulla at volutpat diam ut venenatis tellus in metus vulputate. Porttitor leo a diam sollicitudin tempor. Tincidunt vitae semper quis lectus nulla at volutpat diam. Ornare aenean euismod elementum nisi quis eleifend quam adipiscing. Tortor pretium viverra suspendisse potenti. Arcu risus quis varius quam quisque id. Non sodales neque sodales ut etiam sit amet nisl. Porttitor lacus luctus accumsan tortor posuere ac ut consequat. Diam sit amet nisl suscipit. Ut sem nulla pharetra diam sit amet nisl suscipit adipiscing. Varius quam quisque id diam. Elementum tempus egestas sed sed risus pretium quam vulputate. Eu ultrices vitae auctor eu augue ut lectus. Tincidunt id aliquet risus feugiat in ante metus dictum at. Mauris cursus mattis molestie a iaculis at erat pellentesque. Leo urna molestie at elementum eu.
2 |
3 | Posuere morbi leo urna molestie. Tincidunt nunc pulvinar sapien et. Mattis molestie a iaculis at erat pellentesque. Arcu cursus euismod quis viverra nibh cras pulvinar mattis nunc. Phasellus vestibulum lorem sed risus ultricies tristique nulla aliquet enim. Aenean et tortor at risus viverra. Ut placerat orci nulla pellentesque dignissim. Est lorem ipsum dolor sit amet. Eros donec ac odio tempor. Elementum integer enim neque volutpat ac tincidunt vitae.
--------------------------------------------------------------------------------
/tests/resources/tasks/ag_news/README.md:
--------------------------------------------------------------------------------
1 | ## AG_NEWS
2 |
3 | Data is taken from [here](https://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html).
4 |
5 | The dataset contains a collection of news articles grouped into different categories.
6 | We took a small random sample and converted it to the expected format of our data fetcher:
7 | ```
8 | __label__
9 | ```
10 |
11 | #### Publications Using the Dataset
12 |
13 | * G. M. Del Corso, A. Gulli, and F. Romani. Ranking a stream of news. In Proceedings of 14th International World Wide Web Conference, pages 97–106, Chiba, Japan, 2005.
14 | * A. Gulli. The anatomy of a news search engine. In Proceedings of 14th International World Wide Web Conference, pages 880–881, Chiba, Japan, 2005.
15 |
--------------------------------------------------------------------------------
/tests/resources/tasks/ag_news/test.txt:
--------------------------------------------------------------------------------
1 | __label__World Libya Seems Honest About Nuke Program -- UN Report VIENNA, Austria (Reuters) - The U.N. nuclear watchdog said in a confidential report circulated Monday that Libya appears to have been telling the truth in its declarations on the covert atomic weapons program that it agreed to abandon last year.
2 | __label__Business Judge Orders Parmalat Auditors to Trial MILAN (Reuters) - An Italian judge opened preliminary hearings on Tuesday into the 14-billion-euro collapse of Parmalat and immediately ruled that two former auditors of the food group should stand trial in one of Europe's biggest fraud cases.
3 | __label__Business Market Turmoil Saps Confidence \N
4 | __label__Sci/Tech Repairing airplane wings with nanotubes in flight An electrical pulse through nanotubes and wires helps find the crack.
5 | __label__Sports US lacks golden touch LAKE PLACID, N.Y. -- After Team USA racked up six goals in a 6-3 victory over the Swedes Saturday, coach Ben Smith said he hoped his players saved a few because goals were going to be hard to get yesterday against Canada.
6 | __label__World Experts Doubt Drop In Violence in Iraq The U.S. military's claim that violence has decreased sharply in Iraq in recent months has come under scrutiny from many experts within and outside the government, who contend that some of the underlying statistics are questionable and selectively ignore negative trends.
7 | __label__Sports Passing the torch Calgary Sun. Dan Marino wouldn't swap his collection of NFL records and Hall-of-Fame nomination for the world. Not even for that elusive Super Bowl victory.
8 | __label__Sci/Tech Saved, and Enslaved, by the Cell A growing number of experts say cellphone use may be making us less autonomous and less capable of solving problems on our own.
9 | __label__Sports Kolzig helps Capitals slide by Avalanche WASHINGTON (Reuters) - The Washington Capitals stayed in the hunt for a playoff spot after Olaf Kolzig made 18 saves in a 2-1 win over the struggling Colorado Avalanche on Wednesday.
10 | __label__World Pension Fund of New York Files Suit Against Merck The main pension fund of New York State filed a lawsuit against Merck & Company, accusing it of misleading shareholders about the safety of its pain drug Vioxx.
11 |
--------------------------------------------------------------------------------
/tests/resources/tasks/ag_news/train.txt:
--------------------------------------------------------------------------------
1 | __label__World Light relay 'should be dropped' A leading astronomy group in the US voices opposition to a global 'light relay' planned for next year.
2 | __label__World LA City Council Ordered to Pay Attention (AP) AP - During public hearings, members of the City Council talk on cell phones, chat among themselves, read mail or wander around the room. A state appeals court says they should be doing something else: paying attention.
3 | __label__Business Tribune Profit Declines on Lower Newspaper Ad Revenue Tribune said its third-quarter earnings dropped 7 percent, beating expectations, adding that the housing slump and lower consumer spending worsened advertising revenue.
4 | __label__World Blair is warned about an attack on Iran LONDON -- Foreign policy specialists warned Prime Minister Tony Blair yesterday that military action against Iran could worsen violence across the Middle East and urged him to persuade the United States to hold talks with Tehran.
5 | __label__World Bangladesh Awakes in Shock as Blast Toll Hits 16 Extra armed police patrolled the streets of the Bangladeshi capital and traffic was light on Sunday, a working day, as shocked Bangladeshis woke up to the aftermath of grenade blasts that killed at least 16 people.
6 | __label__Business Wall St. Turns to the Time Out as Punishment Regulators are wielding a new weapon against Wall Street firms instead of multimillion-dollar fines: temporarily shutting down certain business lines.
7 | __label__World Legal move over halted BAE probe Campaigners threaten the government with legal action after a probe into arms deals with Saudi Arabia is dropped.
8 | __label__Sports Gamecocks, Tigers Say "No" To Bowl Bids COLUMBIA, SC -- Clemson and South Carolina will not accept bowl bids, punishment for a brawl between players toward the end of Saturday's game, the schools announced Monday.
9 | __label__Sports Capitals Have Budget to Attract Free Agents The NHL free agent signing period begins Sunday at noon, and Capitals General Manager George McPhee is shopping with a bigger-than-usual budget.
10 | __label__Sci/Tech Hobbit-sized Humans Called Homo floresiensis Discovered by ... Long live the real Bilbo Baggins, the first Little People of the World, Homo floresiensis and Homo sapien archeologists Michael Morwood, Peter Brown and Professor Soejono!
11 |
--------------------------------------------------------------------------------
/tests/resources/tasks/column_corpus_options/eng.testa:
--------------------------------------------------------------------------------
1 | WORD TAG
2 | This O
3 | is O
4 | Coca Cola O
5 |
--------------------------------------------------------------------------------
/tests/resources/tasks/column_corpus_options/eng.testb:
--------------------------------------------------------------------------------
1 | WORD TAG
2 | This O
3 | is O
4 | New York O
5 |
--------------------------------------------------------------------------------
/tests/resources/tasks/column_corpus_options/eng.train:
--------------------------------------------------------------------------------
1 | WORD TAG
2 | This O
3 | is O
4 | New Berlin LOC
5 |
--------------------------------------------------------------------------------
/tests/resources/tasks/column_with_whitespaces/eng.testa:
--------------------------------------------------------------------------------
1 | It O +
2 | is O +
3 | a O +
4 | French B-LOC -
5 | - O -
6 | speaking O +
7 | town O -
8 | . O +
--------------------------------------------------------------------------------
/tests/resources/tasks/column_with_whitespaces/eng.testb:
--------------------------------------------------------------------------------
1 | It O +
2 | is O +
3 | a O +
4 | US B-LOC -
5 | - O -
6 | based O +
7 | company O -
8 | . O +
--------------------------------------------------------------------------------
/tests/resources/tasks/column_with_whitespaces/eng.train:
--------------------------------------------------------------------------------
1 | It O +
2 | is O +
3 | a O +
4 | German B-LOC -
5 | - O -
6 | owned O +
7 | firm O -
8 | . O +
--------------------------------------------------------------------------------
/tests/resources/tasks/conllu/train.conllu:
--------------------------------------------------------------------------------
1 | # text = Larry Page and Sergey Brin founded Google.
2 | # relations = 7;7;1;2;founded_by|7;7;4;5;founded_by
3 | 1 Larry PROPN B-PER _
4 | 2 Page PROPN I-PER _
5 | 3 and CCONJ O _
6 | 4 Sergey PROPN B-PER _
7 | 5 Brin PROPN I-PER _
8 | 6 founded VERB O _
9 | 7 Google PROPN B-ORG SpaceAfter=No
10 | 8 . PUNCT O _
11 |
12 | # text = Microsoft was founded by Bill Gates.
13 | # relations = 1;1;5;6;founded_by
14 | 1 Microsoft PROPN B-ORG _
15 | 2 was AUX O _
16 | 3 founded VERB O _
17 | 4 by ADP O _
18 | 5 Bill PROPN B-PER _
19 | 6 Gates PROPN I-PER SpaceAfter=No
20 | 7 . PUNCT O _
21 |
22 | # text = Konrad Zuse was born in Berlin on 22 June 1910.
23 | # relations = 6;6;1;2;place_of_birth
24 | 1 Konrad PROPN B-PER _
25 | 2 Zuse PROPN I-PER _
26 | 3 was AUX O _
27 | 4 born VERB O _
28 | 5 in ADP O _
29 | 6 Berlin PROPN B-LOC _
30 | 7 on ADP O _
31 | 8 22 NUM B-DATE _
32 | 9 June PROPN I-DATE _
33 | 10 1910 NUM I-DATE SpaceAfter=No
34 | 11 . PUNCT O _
35 |
36 | # text = Joseph Weizenbaum was born in Berlin, Germany.
37 | # relations = 6;6;1;2;place_of_birth
38 | 1 Joseph PROPN B-PER _
39 | 2 Weizenbaum PROPN I-PER _
40 | 3 was AUX O _
41 | 4 born VERB O _
42 | 5 in ADP O _
43 | 6 Berlin PROPN B-LOC _
44 | 7 , PUNCT O _
45 | 8 Germany PROPN B-LOC SpaceAfter=No
46 | 9 . PUNCT O _
47 |
--------------------------------------------------------------------------------
/tests/resources/tasks/conllu/train.conllup:
--------------------------------------------------------------------------------
1 | # global.columns = id form upos ner misc
2 | # text = Larry Page and Sergey Brin founded Google.
3 | # relations = 7;7;1;2;founded_by|7;7;4;5;founded_by
4 | 1 Larry PROPN B-PER _
5 | 2 Page PROPN I-PER _
6 | 3 and CCONJ O _
7 | 4 Sergey PROPN B-PER _
8 | 5 Brin PROPN I-PER _
9 | 6 founded VERB O _
10 | 7 Google PROPN B-ORG SpaceAfter=No
11 | 8 . PUNCT O _
12 |
13 | # text = Microsoft was founded by Bill Gates.
14 | # relations = 1;1;5;6;founded_by
15 | 1 Microsoft PROPN B-ORG _
16 | 2 was AUX O _
17 | 3 founded VERB O _
18 | 4 by ADP O _
19 | 5 Bill PROPN B-PER _
20 | 6 Gates PROPN I-PER SpaceAfter=No
21 | 7 . PUNCT O _
22 |
23 | # text = Konrad Zuse was born in Berlin on 22 June 1910.
24 | # relations = 6;6;1;2;place_of_birth
25 | 1 Konrad PROPN B-PER _
26 | 2 Zuse PROPN I-PER _
27 | 3 was AUX O _
28 | 4 born VERB O _
29 | 5 in ADP O _
30 | 6 Berlin PROPN B-LOC _
31 | 7 on ADP O _
32 | 8 22 NUM B-DATE _
33 | 9 June PROPN I-DATE _
34 | 10 1910 NUM I-DATE SpaceAfter=No
35 | 11 . PUNCT O _
36 |
37 | # text = Joseph Weizenbaum, a professor at MIT, was born in Berlin, Germany.
38 | # relations = 12;12;1;2;place_of_birth|14;14;1;2;place_of_birth
39 | 1 Joseph PROPN B-PER _
40 | 2 Weizenbaum PROPN I-PER SpaceAfter=No
41 | 3 , PUNCT O _
42 | 4 a DET O _
43 | 5 professor NOUN O _
44 | 6 at ADP O _
45 | 7 MIT PROPN B-ORG SpaceAfter=No
46 | 8 , PUNCT O _
47 | 9 was AUX O _
48 | 10 born VERB O _
49 | 11 in ADP O _
50 | 12 Berlin PROPN B-LOC SpaceAfter=No
51 | 13 , PUNCT O _
52 | 14 Germany PROPN B-LOC SpaceAfter=No
53 | 15 . PUNCT O _
54 |
55 | # text = The German-American computer scientist Joseph Weizenbaum (8 January 1923 - 5 March 2008) was born in Berlin.
56 | # relations = 21;21;7;8;place_of_birth
57 | 1 The DET O _
58 | 2 German PROPN O SpaceAfter=No
59 | 3 - PUNCT O SpaceAfter=No
60 | 4 American PROPN O _
61 | 5 computer PROPN O _
62 | 6 scientist NOUN O _
63 | 7 Joseph PROPN B-PER _
64 | 8 Weizenbaum PROPN I-PER _
65 | 9 ( PUNCT O SpaceAfter=No
66 | 10 8 NUM O _
67 | 11 January PROPN O _
68 | 12 1923 NUM O _
69 | 13 - SYM O _
70 | 14 5 NUM O _
71 | 15 March PROPN O _
72 | 16 2008 NUM O SpaceAfter=No
73 | 17 ) PUNCT O _
74 | 18 was PRON O _
75 | 19 born ADV O _
76 | 20 in ADP O _
77 | 21 Berlin PROPN B-LOC SpaceAfter=No
78 | 22 . PUNCT O _
79 |
--------------------------------------------------------------------------------
/tests/resources/tasks/conllu/universal_dependencies.conllu:
--------------------------------------------------------------------------------
1 | # text = They buy and sell books.
2 | 1 They they PRON PRP Case=Nom|Number=Plur 2 nsubj 2:nsubj|4:nsubj _
3 | 2 buy buy VERB VBP Number=Plur|Person=3|Tense=Pres 0 root 0:root _
4 | 3 and and CONJ CC _ 4 cc 4:cc _
5 | 4 sell sell VERB VBP Number=Plur|Person=3|Tense=Pres 2 conj 0:root|2:conj _
6 | 5 books book NOUN NNS Number=Plur 2 obj 2:obj|4:obj SpaceAfter=No
7 | 6 . . PUNCT . _ 2 punct 2:punct _
8 |
--------------------------------------------------------------------------------
/tests/resources/tasks/example_images/i_love_berlin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/resources/tasks/example_images/i_love_berlin.png
--------------------------------------------------------------------------------
/tests/resources/tasks/fashion/eng.testa:
--------------------------------------------------------------------------------
1 | Most _ _ O
2 | wedding _ _ B-Occasion
3 | dresses _ _ B-NominalProduct
4 | , _ _ O
5 | for _ _ O
6 | example _ _ O
7 | , _ _ O
8 | are _ _ O
9 | simply _ _ O
10 | too _ _ O
11 | enormous _ _ O
12 | and _ _ O
13 | terrifyingly _ _ O
14 | loaded _ _ O
15 | with _ _ O
16 | sentimental _ _ O
17 | value _ _ O
18 | for _ _ O
19 | DIY _ _ B-ProductDesign
20 | dyeing _ _ I-ProductDesign
21 | . _ _ O
--------------------------------------------------------------------------------
/tests/resources/tasks/fashion/eng.testb:
--------------------------------------------------------------------------------
1 | For _ _ O
2 | my _ _ O
3 | Nurse _ _ B-NamedOrganizationBrand
4 | Ratched _ _ I-NamedOrganizationBrand
5 | dress _ _ B-NominalProduct
6 | , _ _ O
7 | I _ _ O
8 | had _ _ O
9 | brought _ _ O
10 | two _ _ O
11 | dyeing _ _ O
12 | options _ _ O
13 | — _ _ O
14 | one _ _ O
15 | more _ _ O
16 | ambitious _ _ O
17 | than _ _ O
18 | the _ _ O
19 | other _ _ O
20 | . _ _ O
--------------------------------------------------------------------------------
/tests/resources/tasks/fashion/eng.train:
--------------------------------------------------------------------------------
1 | From _ _ O
2 | the _ _ O
3 | charming _ _ O
4 | Arlésienne _ _ B-NamedPerson
5 | to _ _ O
6 | the _ _ O
7 | shepherdess _ _ B-NominalProduct
8 | in _ _ O
9 | a _ _ O
10 | fairy _ _ O
11 | tale _ _ O
12 | , _ _ O
13 | with _ _ O
14 | faille _ _ B-ProductPart
15 | , _ _ O
16 | piqué _ _ B-ProductPart
17 | , _ _ O
18 | taffeta _ _ B-ProductPart
19 | , _ _ O
20 | tulle _ _ B-ProductPart
21 | , _ _ O
22 | embroidery _ _ B-ProductPart
23 | , _ _ O
24 | lace _ _ B-ProductPart
25 | , _ _ O
26 | the _ _ O
27 | repertoire _ _ B-ProductDesign
28 | is _ _ O
29 | inexhaustible _ _ O
30 | . _ _ O
31 |
32 |
33 |
34 |
35 | Subscribe _ _ O
36 | to _ _ O
37 | Highsnobiety _ _ B-NamedOrganizationPublisher
38 | on _ _ O
39 | YouTube _ _ B-NamedOrganizationOther
40 | Eric _ _ B-NamedPerson
41 | Schoenborn _ _ I-NamedPerson
42 | and _ _ O
43 | Ed _ _ B-NamedPerson
44 | Selego _ _ I-NamedPerson
45 | have _ _ O
46 | joined _ _ O
47 | forces _ _ O
48 | with _ _ O
49 | Nocturnal _ _ B-NamedOrganizationBrand
50 | skate _ _ B-Activity
51 | shop _ _ O
52 | to _ _ O
53 | turn _ _ O
54 | Drexel _ _ B-NamedLocation
55 | University _ _ I-NamedLocation
56 | ’ _ _ O
57 | s _ _ O
58 | Leonard _ _ B-NamedLocation
59 | Pearlstein _ _ I-NamedLocation
60 | Gallery _ _ I-NamedLocation
61 | into _ _ O
62 | an _ _ O
63 | interactive _ _ O
64 | skate _ _ B-Activity
65 | pop _ _ O
66 | - _ _ O
67 | up _ _ O
68 | park _ _ O
69 | . _ _ O
70 |
71 | Philly _ _ B-NamedPerson
72 | Radness _ _ I-NamedPerson
73 | accounts _ _ O
74 | for _ _ O
75 | the _ _ O
76 | second _ _ O
77 | installment _ _ O
78 | in _ _ O
79 | the _ _ O
80 | Phenomenal _ _ O
81 | Radness _ _ O
82 | project _ _ O
83 | , _ _ O
84 | after _ _ O
85 | its _ _ O
86 | debut _ _ S-Occasion
87 | in _ _ O
88 | Miami _ _ B-NamedLocation
89 | a _ _ O
90 | few _ _ O
91 | years _ _ O
92 | ago _ _ O
93 | . _ _ O
94 |
95 | Milan _ _ B-NamedLocation
96 | was _ _ O
97 | all _ _ O
98 | the _ _ O
99 | really _ _ O
100 | big _ _ O
101 | girls _ _ O
102 | . _ _ O
103 |
104 | It _ _ O
105 | was _ _ O
106 | the _ _ O
107 | best _ _ O
108 | ! _ _ O
109 |
110 | We _ _ O
111 | go _ _ O
112 | to _ _ O
113 | flea _ _ O
114 | markets _ _ O
115 | together _ _ O
116 | when _ _ O
117 | we _ _ O
118 | ' _ _ O
119 | re _ _ O
120 | in _ _ O
121 | LA _ _ B-NamedLocation
122 | . _ _ O
--------------------------------------------------------------------------------
/tests/resources/tasks/fashion_disjunct/eng.testa:
--------------------------------------------------------------------------------
1 | Most _ _ O
2 | wedding _ _ B-Occasion
3 | dresses _ _ B-NominalProduct
4 | , _ _ O
5 | for _ _ O
6 | example _ _ O
7 | , _ _ O
8 | are _ _ O
9 | simply _ _ O
10 | too _ _ O
11 | enormous _ _ O
12 | and _ _ O
13 | terrifyingly _ _ B-CreativeWord
14 | loaded _ _ O
15 | with _ _ O
16 | sentimental _ _ O
17 | value _ _ O
18 | for _ _ O
19 | DIY _ _ B-ProductDesign
20 | dyeing _ _ I-ProductDesign
21 | . _ _ O
--------------------------------------------------------------------------------
/tests/resources/tasks/fashion_disjunct/eng.testb:
--------------------------------------------------------------------------------
1 | For _ _ O
2 | my _ _ O
3 | Nurse _ _ B-NamedOrganizationBrand
4 | Ratched _ _ I-NamedOrganizationBrand
5 | dress _ _ B-NominalProduct
6 | , _ _ O
7 | I _ _ O
8 | had _ _ O
9 | brought _ _ O
10 | two _ _ O
11 | dyeing _ _ O
12 | options _ _ O
13 | — _ _ O
14 | one _ _ O
15 | more _ _ O
16 | ambitious _ _ B-Ambitiousness
17 | than _ _ O
18 | the _ _ O
19 | other _ _ O
20 | . _ _ O
--------------------------------------------------------------------------------
/tests/resources/tasks/fashion_disjunct/eng.train:
--------------------------------------------------------------------------------
1 | From _ _ O
2 | the _ _ O
3 | charming _ _ O
4 | Arlésienne _ _ B-NamedPerson
5 | to _ _ O
6 | the _ _ O
7 | shepherdess _ _ O
8 | in _ _ O
9 | a _ _ O
10 | fairy _ _ O
11 | tale _ _ O
12 | , _ _ O
13 | with _ _ O
14 | faille _ _ B-ProductPart
15 | , _ _ O
16 | piqué _ _ B-ProductPart
17 | , _ _ O
18 | taffeta _ _ B-ProductPart
19 | , _ _ O
20 | tulle _ _ B-ProductPart
21 | , _ _ O
22 | embroidery _ _ B-ProductPart
23 | , _ _ O
24 | lace _ _ B-ProductPart
25 | , _ _ O
26 | the _ _ O
27 | repertoire _ _ O
28 | is _ _ O
29 | inexhaustible _ _ O
30 | . _ _ O
31 |
32 |
33 |
34 |
35 | Subscribe _ _ O
36 | to _ _ O
37 | Highsnobiety _ _ B-NamedOrganizationPublisher
38 | on _ _ O
39 | YouTube _ _ B-NamedOrganizationOther
40 | Eric _ _ B-NamedPerson
41 | Schoenborn _ _ I-NamedPerson
42 | and _ _ O
43 | Ed _ _ B-NamedPerson
44 | Selego _ _ I-NamedPerson
45 | have _ _ O
46 | joined _ _ O
47 | forces _ _ O
48 | with _ _ O
49 | Nocturnal _ _ B-NamedOrganizationBrand
50 | skate _ _ B-Activity
51 | shop _ _ O
52 | to _ _ O
53 | turn _ _ O
54 | Drexel _ _ B-NamedLocation
55 | University _ _ I-NamedLocation
56 | ’ _ _ O
57 | s _ _ O
58 | Leonard _ _ B-NamedLocation
59 | Pearlstein _ _ I-NamedLocation
60 | Gallery _ _ I-NamedLocation
61 | into _ _ O
62 | an _ _ O
63 | interactive _ _ O
64 | skate _ _ B-Activity
65 | pop _ _ O
66 | - _ _ O
67 | up _ _ O
68 | park _ _ O
69 | . _ _ O
70 |
71 | Philly _ _ B-NamedPerson
72 | Radness _ _ I-NamedPerson
73 | accounts _ _ O
74 | for _ _ O
75 | the _ _ O
76 | second _ _ O
77 | installment _ _ O
78 | in _ _ O
79 | the _ _ O
80 | Phenomenal _ _ O
81 | Radness _ _ O
82 | project _ _ O
83 | , _ _ O
84 | after _ _ O
85 | its _ _ O
86 | debut _ _ O
87 | in _ _ O
88 | Miami _ _ B-NamedLocation
89 | a _ _ O
90 | few _ _ O
91 | years _ _ O
92 | ago _ _ O
93 | . _ _ O
94 |
95 | Milan _ _ B-NamedLocation
96 | was _ _ O
97 | all _ _ O
98 | the _ _ O
99 | really _ _ O
100 | big _ _ O
101 | girls _ _ O
102 | . _ _ O
103 |
104 | It _ _ O
105 | was _ _ O
106 | the _ _ O
107 | best _ _ O
108 | ! _ _ O
109 |
110 | We _ _ O
111 | go _ _ O
112 | to _ _ O
113 | flea _ _ O
114 | markets _ _ O
115 | together _ _ O
116 | when _ _ O
117 | we _ _ O
118 | ' _ _ O
119 | re _ _ O
120 | in _ _ O
121 | LA _ _ B-NamedLocation
122 | . _ _ O
--------------------------------------------------------------------------------
/tests/resources/tasks/fashion_nodev/test.tsv:
--------------------------------------------------------------------------------
1 | Most _ _ O
2 | wedding _ _ B-Occasion
3 | dresses _ _ B-NominalProduct
4 | , _ _ O
5 | for _ _ O
6 | example _ _ O
7 | , _ _ O
8 | are _ _ O
9 | simply _ _ O
10 | too _ _ O
11 | enormous _ _ O
12 | and _ _ O
13 | terrifyingly _ _ O
14 | loaded _ _ O
15 | with _ _ O
16 | sentimental _ _ O
17 | value _ _ O
18 | for _ _ O
19 | DIY _ _ B-ProductDesign
20 | dyeing _ _ I-ProductDesign
21 | . _ _ O
--------------------------------------------------------------------------------
/tests/resources/tasks/fashion_nodev/train.tsv:
--------------------------------------------------------------------------------
1 | From _ _ O
2 | the _ _ O
3 | charming _ _ O
4 | Arlésienne _ _ B-NamedPerson
5 | to _ _ O
6 | the _ _ O
7 | shepherdess _ _ O
8 | in _ _ O
9 | a _ _ O
10 | fairy _ _ O
11 | tale _ _ O
12 | , _ _ O
13 | with _ _ O
14 | faille _ _ B-ProductPart
15 | , _ _ O
16 | piqué _ _ B-ProductPart
17 | , _ _ O
18 | taffeta _ _ B-ProductPart
19 | , _ _ O
20 | tulle _ _ B-ProductPart
21 | , _ _ O
22 | embroidery _ _ B-ProductPart
23 | , _ _ O
24 | lace _ _ B-ProductPart
25 | , _ _ O
26 | the _ _ O
27 | repertoire _ _ O
28 | is _ _ O
29 | inexhaustible _ _ O
30 | . _ _ O
31 |
32 |
33 |
34 |
35 | Subscribe _ _ O
36 | to _ _ O
37 | Highsnobiety _ _ B-NamedOrganizationPublisher
38 | on _ _ O
39 | YouTube _ _ B-NamedOrganizationOther
40 | Eric _ _ B-NamedPerson
41 | Schoenborn _ _ I-NamedPerson
42 | and _ _ O
43 | Ed _ _ B-NamedPerson
44 | Selego _ _ I-NamedPerson
45 | have _ _ O
46 | joined _ _ O
47 | forces _ _ O
48 | with _ _ O
49 | Nocturnal _ _ B-NamedOrganizationBrand
50 | skate _ _ B-Activity
51 | shop _ _ O
52 | to _ _ O
53 | turn _ _ O
54 | Drexel _ _ B-NamedLocation
55 | University _ _ I-NamedLocation
56 | ’ _ _ O
57 | s _ _ O
58 | Leonard _ _ B-NamedLocation
59 | Pearlstein _ _ I-NamedLocation
60 | Gallery _ _ I-NamedLocation
61 | into _ _ O
62 | an _ _ O
63 | interactive _ _ O
64 | skate _ _ B-Activity
65 | pop _ _ O
66 | - _ _ O
67 | up _ _ O
68 | park _ _ O
69 | . _ _ O
70 |
71 | Philly _ _ B-NamedPerson
72 | Radness _ _ I-NamedPerson
73 | accounts _ _ O
74 | for _ _ O
75 | the _ _ O
76 | second _ _ O
77 | installment _ _ O
78 | in _ _ O
79 | the _ _ O
80 | Phenomenal _ _ O
81 | Radness _ _ O
82 | project _ _ O
83 | , _ _ O
84 | after _ _ O
85 | its _ _ O
86 | debut _ _ O
87 | in _ _ O
88 | Miami _ _ B-NamedLocation
89 | a _ _ O
90 | few _ _ O
91 | years _ _ O
92 | ago _ _ O
93 | . _ _ O
94 |
95 | Milan _ _ B-NamedLocation
96 | was _ _ O
97 | all _ _ O
98 | the _ _ O
99 | really _ _ O
100 | big _ _ O
101 | girls _ _ O
102 | . _ _ O
103 |
104 | It _ _ O
105 | was _ _ O
106 | the _ _ O
107 | best _ _ O
108 | ! _ _ O
109 |
110 | We _ _ O
111 | go _ _ O
112 | to _ _ O
113 | flea _ _ O
114 | markets _ _ O
115 | together _ _ O
116 | when _ _ O
117 | we _ _ O
118 | ' _ _ O
119 | re _ _ O
120 | in _ _ O
121 | LA _ _ B-NamedLocation
122 | . _ _ O
--------------------------------------------------------------------------------
/tests/resources/tasks/fewshot_conll/1shot.txt:
--------------------------------------------------------------------------------
1 | Three O
2 | Russian B-MISC
3 | servicemen O
4 | were O
5 | killed O
6 | on O
7 | Saturday O
8 | when O
9 | unidentified O
10 | gunmen O
11 | attacked O
12 | guards O
13 | at O
14 | an O
15 | anti-aircraft O
16 | installation O
17 | outside O
18 | Moscow B-LOC
19 | , O
20 | Interfax B-ORG
21 | news O
22 | agency O
23 | said O
24 | . O
25 |
26 | " O
27 | I O
28 | think O
29 | that O
30 | , O
31 | on O
32 | balance O
33 | , O
34 | it O
35 | is O
36 | looking O
37 | a O
38 | little O
39 | bit O
40 | on O
41 | the O
42 | strong O
43 | side O
44 | , O
45 | " O
46 | Lindsey B-PER
47 | said O
48 | . O
49 |
--------------------------------------------------------------------------------
/tests/resources/tasks/imdb/README.md:
--------------------------------------------------------------------------------
1 | ## IMDB
2 |
3 | Data is taken from [here](http://ai.stanford.edu/~amaas/data/sentiment/).
4 |
5 | The dataset contains data for a binary sentiment classification.
6 | We took a small random sample and converted it to the expected format of our data fetcher:
7 | ```
8 | __label__
9 | ```
10 |
11 | #### Publications Using the Dataset
12 |
13 | * Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng, and Christopher Potts. (2011). Learning Word Vectors for Sentiment Analysis. The 49th Annual Meeting of the Association for Computational Linguistics (ACL 2011).
14 |
--------------------------------------------------------------------------------
/tests/resources/tasks/imdb_underscore/README.md:
--------------------------------------------------------------------------------
1 | ## IMDB
2 |
3 | Data is taken from [here](http://ai.stanford.edu/~amaas/data/sentiment/).
4 |
5 | The dataset contains data for a binary sentiment classification.
6 | We took a small random sample and converted it to the expected format of our data fetcher:
7 | ```
8 | __label__
9 | ```
10 |
11 | #### Publications Using the Dataset
12 |
13 | * Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng, and Christopher Potts. (2011). Learning Word Vectors for Sentiment Analysis. The 49th Annual Meeting of the Association for Computational Linguistics (ACL 2011).
14 |
--------------------------------------------------------------------------------
/tests/resources/tasks/jsonl/testa.jsonl:
--------------------------------------------------------------------------------
1 | {"id": 101319, "data": "This is New Berlin", "label": [[8, 18, "LOC"]], "metadata": [["from", 123]]}
2 | {"id": 101320, "data": "EU rejects German call to boycott British lamb .", "label": [[0, 2, "ORG"], [11, 17, "MISC"], [34, 46, "MISC"]], "metadata": [["from", 124]]}
3 | {"id": 101321, "data": "Peter Blackburn", "label": [[0, 15, "PER"]], "metadata": [["from", 125]]}
4 |
--------------------------------------------------------------------------------
/tests/resources/tasks/jsonl/testb.jsonl:
--------------------------------------------------------------------------------
1 | {"id": 101319, "data": "This is New Berlin", "label": [[8, 18, "LOC"]]}
2 | {"id": 101320, "data": "EU rejects German call to boycott British lamb .", "label": [[0, 2, "ORG"], [11, 17, "MISC"], [34, 46, "MISC"]]}
3 | {"id": 101321, "data": "Peter Blackburn", "label": [[0, 15, "PER"]]}
4 |
--------------------------------------------------------------------------------
/tests/resources/tasks/jsonl/train.jsonl:
--------------------------------------------------------------------------------
1 | {"id": 101319, "data": "This is New Berlin", "label": [[8, 18, "LOC"]]}
2 | {"id": 101319, "data": "This is New Berlin.", "label": [[8, 18, "LOC"]]}
3 | {"id": 101319, "data": "This is New Berlin.", "label": [[8, 19, "LOC"]]}
4 | {"id": 101320, "data": "EU rejects German call to boycott British lamb .", "label": [[0, 2, "ORG"], [11, 17, "MISC"], [34, 46, "MISC"]]}
5 | {"id": 101321, "data": "Peter Blackburn", "label": [[0, 15, "PER"]]}
6 |
--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class/dev.txt:
--------------------------------------------------------------------------------
1 | __label__apple apple
2 | __label__tv tv
3 | __label__guitar guitar
4 | __label__apple __label__tv apple tv
--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class/test.txt:
--------------------------------------------------------------------------------
1 | __label__guitar guitar
2 | __label__apple apple
3 | __label__tv tv
4 | __label__apple __label__tv apple tv
5 | __label__apple __label__guitar apple tv
--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class/train.txt:
--------------------------------------------------------------------------------
1 | __label__tv tv
2 | __label__apple __label__tv apple tv
3 | __label__apple apple
4 | __label__tv tv
5 | __label__apple __label__tv apple tv
6 | __label__guitar guitar
7 | __label__guitar guitar
--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class_negative_examples/dev.txt:
--------------------------------------------------------------------------------
1 | __label__apple apple
2 | __label__tv tv
3 | __label__guitar guitar
4 | __label__apple __label__tv apple tv
5 | dev example without labels
6 |
--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class_negative_examples/test.txt:
--------------------------------------------------------------------------------
1 | __label__guitar guitar
2 | __label__apple apple
3 | __label__tv tv
4 | __label__apple __label__tv apple tv
5 | __label__apple __label__guitar apple tv
6 | test example without labels
7 |
--------------------------------------------------------------------------------
/tests/resources/tasks/multi_class_negative_examples/train.txt:
--------------------------------------------------------------------------------
1 | __label__tv tv
2 | __label__apple __label__tv apple tv
3 | __label__apple apple
4 | __label__tv tv
5 | __label__apple __label__tv apple tv
6 | __label__guitar guitar
7 | __label__guitar guitar
8 | train example without labels
9 |
--------------------------------------------------------------------------------
/tests/resources/tasks/ner_german_germeval/NER-de-dev.tsv:
--------------------------------------------------------------------------------
1 | # http://de.wikipedia.org/wiki/Toyota_Crown [2009-08-13]
2 | 1 1980 O O
3 | 2 kam O O
4 | 3 der O O
5 | 4 Crown B-OTH O
6 | 5 als O O
7 | 6 Versuch O O
8 | 7 von O O
9 | 8 Toyota B-ORG O
10 | 9 , O O
11 | 10 sich O O
12 | 11 in O O
13 | 12 der O O
14 | 13 Oberen O O
15 | 14 Mittelklasse O O
16 | 15 zu O O
17 | 16 etablieren O O
18 | 17 , O O
19 | 18 auch O O
20 | 19 nach O O
21 | 20 Deutschland B-LOC O
22 | 21 . O O
23 |
--------------------------------------------------------------------------------
/tests/resources/tasks/ner_german_germeval/NER-de-test.tsv:
--------------------------------------------------------------------------------
1 | # http://de.wikipedia.org/wiki/Schönburg_(Rhein) [2009-10-23]
2 | 1 1951 O O
3 | 2 bis O O
4 | 3 1953 O O
5 | 4 wurde O O
6 | 5 der O O
7 | 6 nördliche O O
8 | 7 Teil O O
9 | 8 als O O
10 | 9 Jugendburg O O
11 | 10 des O O
12 | 11 Kolpingwerkes B-OTH O
13 | 12 gebaut O O
14 | 13 . O O
--------------------------------------------------------------------------------
/tests/resources/tasks/ner_german_germeval/NER-de-train.tsv:
--------------------------------------------------------------------------------
1 | # n-tv.de vom 26.02.2005 [2005-02-26]
2 | 1 Schartau B-PER O
3 | 2 sagte O O
4 | 3 dem O O
5 | 4 " O O
6 | 5 Tagesspiegel B-ORG O
7 | 6 " O O
8 | 7 vom O O
9 | 8 Freitag O O
10 | 9 , O O
11 | 10 Fischer B-PER O
12 | 11 sei O O
13 | 12 " O O
14 | 13 in O O
15 | 14 einer O O
16 | 15 Weise O O
17 | 16 aufgetreten O O
18 | 17 , O O
19 | 18 die O O
20 | 19 alles O O
21 | 20 andere O O
22 | 21 als O O
23 | 22 überzeugend O O
24 | 23 war O O
25 | 24 " O O
26 | 25 . O O
27 |
28 | # welt.de vom 29.10.2005 [2005-10-29]
29 | 1 Firmengründer O O
30 | 2 Wolf B-PER O
31 | 3 Peter I-PER O
32 | 4 Bree I-PER O
33 | 5 arbeitete O O
34 | 6 Anfang O O
35 | 7 der O O
36 | 8 siebziger O O
37 | 9 Jahre O O
38 | 10 als O O
39 | 11 Möbelvertreter O O
40 | 12 , O O
41 | 13 als O O
42 | 14 er O O
43 | 15 einen O O
44 | 16 fliegenden O O
45 | 17 Händler O O
46 | 18 aus O O
47 | 19 dem O O
48 | 20 Libanon B-LOC O
49 | 21 traf O O
50 | 22 . O O
--------------------------------------------------------------------------------
/tests/resources/tasks/ontonotes/tiny-conll-2012.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flairNLP/flair/ee8596c2bbe737ec9ddeb1c6cb62fa0b161f4d84/tests/resources/tasks/ontonotes/tiny-conll-2012.zip
--------------------------------------------------------------------------------
/tests/resources/tasks/regression/README.md:
--------------------------------------------------------------------------------
1 | ## REGRESSION
2 |
3 | Data is taken from [here](http://saifmohammad.com/WebPages/EmotionIntensity-SharedTask.html).
4 |
5 | The dataset contains a collection of tweets with joy intensity value.
6 | We took the joy dataset and converted it to the expected format of our data fetcher:
7 | ```
8 | __label__
9 | ```
10 |
11 | #### Publication About the Dataset
12 |
13 | * Emotion Intensities in Tweets. Saif M. Mohammad and Felipe Bravo-Marquez. In Proceedings of the sixth joint conference on lexical and computational semantics (*Sem), August 2017, Vancouver, Canada.
14 | * WASSA-2017 Shared Task on Emotion Intensity. Saif M. Mohammad and Felipe Bravo-Marquez. In Proceedings of the EMNLP 2017 Workshop on Computational Approaches to Subjectivity, Sentiment, and Social Media (WASSA), September 2017, Copenhagen, Denmark.
15 |
--------------------------------------------------------------------------------
/tests/resources/tasks/span_labels/span_first.txt:
--------------------------------------------------------------------------------
1 | Vgl. O
2 | Rundschreiben O
3 | RAB PARTA
4 | 1/2010 YEAR
5 | Rz MISC
6 | 8. MISC
--------------------------------------------------------------------------------
/tests/resources/tasks/span_labels/span_second.txt:
--------------------------------------------------------------------------------
1 | -DOCSTART-
2 |
3 | Vgl. O
4 | Rundschreiben O
5 | RAB PARTA
6 | 1/2010 YEAR
7 | Rz MISC
8 | 8. MISC
--------------------------------------------------------------------------------
/tests/resources/tasks/span_labels/span_third.txt:
--------------------------------------------------------------------------------
1 | -DOCSTART-
2 |
3 | Rundschreiben O
4 |
5 | Vgl. O
6 | Rundschreiben O
7 | RAB PARTA
8 | 1/2010 YEAR
9 | Rz MISC
10 | 8. MISC
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes/dev.txt:
--------------------------------------------------------------------------------
1 | this O
2 | is O
3 | New B-LOC
4 | York I-LOC
5 |
6 | this O
7 | is O
8 | Berlin B-LOC
9 |
10 | here O
11 | is O
12 | New B-LOC
13 | York I-LOC
14 |
15 | here O
16 | is O
17 | Berlin B-LOC
18 |
19 | I O
20 | like O
21 | New B-LOC
22 | York I-LOC
23 |
24 | I O
25 | like O
26 | Berlin B-LOC
27 |
28 | we O
29 | like O
30 | New B-LOC
31 | York I-LOC
32 |
33 | we O
34 | like O
35 | Berlin B-LOC
36 |
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes/test.txt:
--------------------------------------------------------------------------------
1 | this O
2 | is O
3 | New B-LOC
4 | York I-LOC
5 |
6 | this O
7 | is O
8 | Berlin B-LOC
9 |
10 | here O
11 | is O
12 | New B-LOC
13 | York I-LOC
14 |
15 | here O
16 | is O
17 | Berlin B-LOC
18 |
19 | I O
20 | like O
21 | New B-LOC
22 | York I-LOC
23 |
24 | I O
25 | like O
26 | Berlin B-LOC
27 |
28 | we O
29 | like O
30 | New B-LOC
31 | York I-LOC
32 |
33 | we O
34 | like O
35 | Berlin B-LOC
36 |
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes/train.txt:
--------------------------------------------------------------------------------
1 | this O
2 | is O
3 | New B-LOC
4 | York I-LOC
5 |
6 | this O
7 | is O
8 | Berlin B-LOC
9 |
10 | here O
11 | is O
12 | New B-LOC
13 | York I-LOC
14 |
15 | here O
16 | is O
17 | Berlin B-LOC
18 |
19 | I O
20 | like O
21 | New B-LOC
22 | York I-LOC
23 |
24 | I O
25 | like O
26 | Berlin B-LOC
27 |
28 | we O
29 | like O
30 | New B-LOC
31 | York I-LOC
32 |
33 | we O
34 | like O
35 | Berlin B-LOC
36 |
37 | this O
38 | is O
39 | New B-LOC
40 | York I-LOC
41 |
42 | this O
43 | is O
44 | Berlin B-LOC
45 |
46 | here O
47 | is O
48 | New B-LOC
49 | York I-LOC
50 |
51 | here O
52 | is O
53 | Berlin B-LOC
54 |
55 | I O
56 | like O
57 | New B-LOC
58 | York I-LOC
59 |
60 | I O
61 | like O
62 | Berlin B-LOC
63 |
64 | we O
65 | like O
66 | New B-LOC
67 | York I-LOC
68 |
69 | we O
70 | like O
71 | Berlin B-LOC
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes_with_boundaries/dev.txt:
--------------------------------------------------------------------------------
1 | this O
2 | is O
3 | New B-LOC
4 | York I-LOC
5 |
6 | here O
7 | is O
8 | New B-LOC
9 | York I-LOC
10 |
11 | I O
12 | like O
13 | New B-LOC
14 | York I-LOC
15 |
16 | we O
17 | like O
18 | New B-LOC
19 | York I-LOC
20 |
21 | -DOCSTART-
22 |
23 | this O
24 | is O
25 | Berlin B-LOC
26 |
27 | here O
28 | is O
29 | Berlin B-LOC
30 |
31 | I O
32 | like O
33 | Berlin B-LOC
34 |
35 | we O
36 | like O
37 | Berlin B-LOC
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes_with_boundaries/test.txt:
--------------------------------------------------------------------------------
1 | this O
2 | is O
3 | New B-LOC
4 | York I-LOC
5 |
6 | here O
7 | is O
8 | New B-LOC
9 | York I-LOC
10 |
11 | I O
12 | like O
13 | New B-LOC
14 | York I-LOC
15 |
16 | we O
17 | like O
18 | New B-LOC
19 | York I-LOC
20 |
21 | -DOCSTART-
22 |
23 | this O
24 | is O
25 | Berlin B-LOC
26 |
27 | here O
28 | is O
29 | Berlin B-LOC
30 |
31 | I O
32 | like O
33 | Berlin B-LOC
34 |
35 | we O
36 | like O
37 | Berlin B-LOC
38 |
39 | -DOCSTART-
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_bioes_with_boundaries/train.txt:
--------------------------------------------------------------------------------
1 | this O
2 | is O
3 | New B-LOC
4 | York I-LOC
5 |
6 | here O
7 | is O
8 | New B-LOC
9 | York I-LOC
10 |
11 | I O
12 | like O
13 | New B-LOC
14 | York I-LOC
15 |
16 | we O
17 | like O
18 | New B-LOC
19 | York I-LOC
20 |
21 | -DOCSTART-
22 |
23 | this O
24 | is O
25 | Berlin B-LOC
26 |
27 | here O
28 | is O
29 | Berlin B-LOC
30 |
31 | I O
32 | like O
33 | Berlin B-LOC
34 |
35 | we O
36 | like O
37 | Berlin B-LOC
38 |
39 | -DOCSTART-
40 |
41 | this O
42 | is O
43 | New B-LOC
44 | York I-LOC
45 |
46 | here O
47 | is O
48 | New B-LOC
49 | York I-LOC
50 |
51 | I O
52 | like O
53 | New B-LOC
54 | York I-LOC
55 |
56 | we O
57 | like O
58 | New B-LOC
59 | York I-LOC
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_multi/dev.txt:
--------------------------------------------------------------------------------
1 | __label__pizza this is pizza
2 | __label__Berlin this is Berlin
3 | __label__Berlin __label__pizza this is Berlin and pizza
4 | __label__pizza here is pizza
5 | __label__Berlin here is Berlin
6 | __label__Berlin __label__pizza here is Berlin and pizza
7 | __label__pizza I like pizza
8 | __label__Berlin I like Berlin
9 | __label__Berlin __label__pizza I like Berlin and pizza
10 | __label__pizza we like pizza
11 | __label__Berlin we like Berlin
12 | __label__Berlin __label__pizza we like Berlin and pizza
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_multi/test.txt:
--------------------------------------------------------------------------------
1 | __label__pizza this is pizza
2 | __label__Berlin this is Berlin
3 | __label__Berlin __label__pizza this is Berlin and pizza
4 | __label__pizza here is pizza
5 | __label__Berlin here is Berlin
6 | __label__Berlin __label__pizza here is Berlin and pizza
7 | __label__pizza I like pizza
8 | __label__Berlin I like Berlin
9 | __label__Berlin __label__pizza I like Berlin and pizza
10 | __label__pizza we like pizza
11 | __label__Berlin we like Berlin
12 | __label__Berlin __label__pizza we like Berlin and pizza
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_multi/train.txt:
--------------------------------------------------------------------------------
1 | __label__pizza this is pizza
2 | __label__Berlin this is Berlin
3 | __label__Berlin __label__pizza this is Berlin and pizza
4 | __label__pizza here is pizza
5 | __label__Berlin here is Berlin
6 | __label__Berlin __label__pizza here is Berlin and pizza
7 | __label__pizza I like pizza
8 | __label__Berlin I like Berlin
9 | __label__Berlin __label__pizza I like Berlin and pizza
10 | __label__pizza we like pizza
11 | __label__Berlin we like Berlin
12 | __label__Berlin __label__pizza we like Berlin and pizza
13 | __label__pizza this is pizza
14 | __label__Berlin this is Berlin
15 | __label__Berlin __label__pizza this is Berlin and pizza
16 | __label__pizza here is pizza
17 | __label__Berlin here is Berlin
18 | __label__Berlin __label__pizza here is Berlin and pizza
19 | __label__pizza I like pizza
20 | __label__Berlin I like Berlin
21 | __label__Berlin __label__pizza I like Berlin and pizza
22 | __label__pizza we like pizza
23 | __label__Berlin we like Berlin
24 | __label__Berlin __label__pizza we like Berlin and pizza
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_single/dev.txt:
--------------------------------------------------------------------------------
1 | __label__New_York this is New York
2 | __label__Berlin this is Berlin
3 | __label__New_York here is New York
4 | __label__Berlin here is Berlin
5 | __label__New_York I like New York
6 | __label__Berlin I like Berlin
7 | __label__New_York we like New York
8 | __label__Berlin we like Berlin
9 |
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_single/test.txt:
--------------------------------------------------------------------------------
1 | __label__New_York this is New York
2 | __label__Berlin this is Berlin
3 | __label__New_York here is New York
4 | __label__Berlin here is Berlin
5 | __label__New_York I like New York
6 | __label__Berlin I like Berlin
7 | __label__New_York we like New York
8 | __label__Berlin we like Berlin
9 |
--------------------------------------------------------------------------------
/tests/resources/tasks/trivial/trivial_text_classification_single/train.txt:
--------------------------------------------------------------------------------
1 | __label__New_York this is New York
2 | __label__Berlin this is Berlin
3 | __label__New_York here is New York
4 | __label__Berlin here is Berlin
5 | __label__New_York I like New York
6 | __label__Berlin I like Berlin
7 | __label__New_York we like New York
8 | __label__Berlin we like Berlin
9 | __label__New_York this is New York
10 | __label__Berlin this is Berlin
11 | __label__New_York here is New York
12 | __label__Berlin here is Berlin
13 | __label__New_York I like New York
14 | __label__Berlin I like Berlin
15 | __label__New_York we like New York
16 | __label__Berlin we like Berlin
--------------------------------------------------------------------------------
/tests/resources/tasks/ud_english/en_ewt-ud-dev.conllu:
--------------------------------------------------------------------------------
1 | # newdoc id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713
2 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0001
3 | # text = From the AP comes this story :
4 | 1 From from ADP IN _ 3 case 3:case _
5 | 2 the the DET DT Definite=Def|PronType=Art 3 det 3:det _
6 | 3 AP AP PROPN NNP Number=Sing 4 obl 4:obl:from _
7 | 4 comes come VERB VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 0 root 0:root _
8 | 5 this this DET DT Number=Sing|PronType=Dem 6 det 6:det _
9 | 6 story story NOUN NN Number=Sing 4 nsubj 4:nsubj _
10 | 7 : : PUNCT : _ 4 punct 4:punct _
11 |
12 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0002
13 | # text = President Bush on Tuesday nominated two individuals to replace retiring jurists on federal courts in the Washington area.
14 | 1 President President PROPN NNP Number=Sing 5 nsubj 5:nsubj _
15 | 2 Bush Bush PROPN NNP Number=Sing 1 flat 1:flat _
16 | 3 on on ADP IN _ 4 case 4:case _
17 | 4 Tuesday Tuesday PROPN NNP Number=Sing 5 obl 5:obl:on _
18 | 5 nominated nominate VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 0 root 0:root _
19 | 6 two two NUM CD NumType=Card 7 nummod 7:nummod _
20 | 7 individuals individual NOUN NNS Number=Plur 5 obj 5:obj _
21 | 8 to to PART TO _ 9 mark 9:mark _
22 | 9 replace replace VERB VB VerbForm=Inf 5 advcl 5:advcl:to _
23 | 10 retiring retire VERB VBG VerbForm=Ger 11 amod 11:amod _
24 | 11 jurists jurist NOUN NNS Number=Plur 9 obj 9:obj _
25 | 12 on on ADP IN _ 14 case 14:case _
26 | 13 federal federal ADJ JJ Degree=Pos 14 amod 14:amod _
27 | 14 courts court NOUN NNS Number=Plur 11 nmod 11:nmod:on _
28 | 15 in in ADP IN _ 18 case 18:case _
29 | 16 the the DET DT Definite=Def|PronType=Art 18 det 18:det _
30 | 17 Washington Washington PROPN NNP Number=Sing 18 compound 18:compound _
31 | 18 area area NOUN NN Number=Sing 14 nmod 14:nmod:in SpaceAfter=No
32 | 19 . . PUNCT . _ 5 punct 5:punct _
--------------------------------------------------------------------------------
/tests/resources/tasks/up_english/en_ewt-up-dev.conllu:
--------------------------------------------------------------------------------
1 | # newdoc id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713
2 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0001
3 | # text = From the AP comes this story :
4 | 1 From from ADP IN _ 3 case 3:case _ _ _
5 | 2 the the DET DT Definite=Def|PronType=Art 3 det 3:det _ _ _
6 | 3 AP AP PROPN NNP Number=Sing 4 obl 4:obl:from _ _ ARG2
7 | 4 comes come VERB VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 0 root 0:root _ come.03 V
8 | 5 this this DET DT Number=Sing|PronType=Dem 6 det 6:det _ _ _
9 | 6 story story NOUN NN Number=Sing 4 nsubj 4:nsubj _ _ ARG1
10 | 7 : : PUNCT : _ 4 punct 4:punct _ _ _
11 |
12 | # sent_id = weblog-blogspot.com_nominations_20041117172713_ENG_20041117_172713-0002
13 | # text = President Bush on Tuesday nominated two individuals to replace retiring jurists on federal courts in the Washington area.
14 | 1 President President PROPN NNP Number=Sing 5 nsubj 5:nsubj _ _ ARG0 _
15 | 2 Bush Bush PROPN NNP Number=Sing 1 flat 1:flat _ _ _ _
16 | 3 on on ADP IN _ 4 case 4:case _ _ _ _
17 | 4 Tuesday Tuesday PROPN NNP Number=Sing 5 obl 5:obl:on _ _ ARGM-TMP _
18 | 5 nominated nominate VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 0 root 0:root _ nominate.01 V _
19 | 6 two two NUM CD NumType=Card 7 nummod 7:nummod _ _ _ _
20 | 7 individuals individual NOUN NNS Number=Plur 5 obj 5:obj _ _ ARG1 ARG0
21 | 8 to to PART TO _ 9 mark 9:mark _ _ _ _
22 | 9 replace replace VERB VB VerbForm=Inf 5 advcl 5:advcl:to _ replace.01 ARG2 V
23 | 10 retiring retire VERB VBG VerbForm=Ger 11 amod 11:amod _ _ _ _
24 | 11 jurists jurist NOUN NNS Number=Plur 9 obj 9:obj _ _ _ ARG1
25 | 12 on on ADP IN _ 14 case 14:case _ _ _ _
26 | 13 federal federal ADJ JJ Degree=Pos 14 amod 14:amod _ _ _ _
27 | 14 courts court NOUN NNS Number=Plur 11 nmod 11:nmod:on _ _ _ _
28 | 15 in in ADP IN _ 18 case 18:case _ _ _ _
29 | 16 the the DET DT Definite=Def|PronType=Art 18 det 18:det _ _ _ _
30 | 17 Washington Washington PROPN NNP Number=Sing 18 compound 18:compound _ _ _ _
31 | 18 area area NOUN NN Number=Sing 14 nmod 14:nmod:in SpaceAfter=No _ _ _
32 | 19 . . PUNCT . _ 5 punct 5:punct _ _ _ _
--------------------------------------------------------------------------------
/tests/resources/tasks/up_english/en_ewt-up-test.conllu:
--------------------------------------------------------------------------------
1 | # newdoc id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200
2 | # sent_id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200-0001
3 | # text = What if Google Morphed Into GoogleOS?
4 | 1 What what PRON WP PronType=Int 0 root 0:root _ _ _
5 | 2 if if SCONJ IN _ 4 mark 4:mark _ _ _
6 | 3 Google Google PROPN NNP Number=Sing 4 nsubj 4:nsubj _ _ ARG1
7 | 4 Morphed morph VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 1 advcl 1:advcl:if _ morph.01 V
8 | 5 Into into ADP IN _ 6 case 6:case _ _ _
9 | 6 GoogleOS GoogleOS PROPN NNP Number=Sing 4 obl 4:obl:into SpaceAfter=No _ ARG2
10 | 7 ? ? PUNCT . _ 4 punct 4:punct _ _ _
11 |
12 | # sent_id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200-0002
13 | # text = What if Google expanded on its search-engine (and now e-mail) wares into a full-fledged operating system?
14 | 1 What what PRON WP PronType=Int 0 root 0:root _ _ _
15 | 2 if if SCONJ IN _ 4 mark 4:mark _ _ _
16 | 3 Google Google PROPN NNP Number=Sing 4 nsubj 4:nsubj _ _ ARG0
17 | 4 expanded expand VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 1 advcl 1:advcl:if _ expand.01 V
18 | 5 on on ADP IN _ 15 case 15:case _ _ _
19 | 6 its its PRON PRP$ Gender=Neut|Number=Sing|Person=3|Poss=Yes|PronType=Prs 15 nmod:poss 15:nmod:poss _ _ _
20 | 7 search search NOUN NN Number=Sing 9 compound 9:compound SpaceAfter=No _ _
21 | 8 - - PUNCT HYPH _ 9 punct 9:punct SpaceAfter=No _ _
22 | 9 engine engine NOUN NN Number=Sing 15 compound 15:compound _ _ _
23 | 10 ( ( PUNCT -LRB- _ 9 punct 9:punct SpaceAfter=No _ _
24 | 11 and and CCONJ CC _ 13 cc 13:cc _ _ _
25 | 12 now now ADV RB _ 13 advmod 13:advmod _ _ _
26 | 13 e-mail e-mail NOUN NN Number=Sing 9 conj 9:conj:and|15:compound SpaceAfter=No _ _
27 | 14 ) ) PUNCT -RRB- _ 15 punct 15:punct _ _ _
28 | 15 wares wares NOUN NNS Number=Plur 4 obl 4:obl:on _ _ ARG1
29 | 16 into into ADP IN _ 22 case 22:case _ _ _
30 | 17 a a DET DT Definite=Ind|PronType=Art 22 det 22:det _ _ _
31 | 18 full full ADV RB _ 20 advmod 20:advmod SpaceAfter=No _ _
32 | 19 - - PUNCT HYPH _ 20 punct 20:punct SpaceAfter=No _ _
33 | 20 fledged fledged ADJ JJ Degree=Pos 22 amod 22:amod _ _ _
34 | 21 operating operating NOUN NN Number=Sing 22 compound 22:compound _ _ _
35 | 22 system system NOUN NN Number=Sing 4 obl 4:obl:into SpaceAfter=No _ ARG4
36 | 23 ? ? PUNCT . _ 4 punct 4:punct _ _ _
--------------------------------------------------------------------------------
/tests/resources/visual/snippet.txt:
--------------------------------------------------------------------------------
1 | The U.S. Centers for Disease Control and Prevention initially advised school systems to close if outbreaks occurred , then reversed itself , saying the apparent mildness of the virus meant most schools and day care centers should stay open , even if they had confirmed cases of swine flu .
2 | When Ms. Winfrey invited Suzanne Somers to share her controversial views about bio-identical hormone treatment on her syndicated show in 2009 , it won Ms. Winfrey a rare dollop of unflattering press , including a Newsweek cover story titled " Crazy Talk : Oprah , Wacky Cures & You . "
3 | Elk calling -- a skill that hunters perfected long ago to lure game with the promise of a little romance -- is now its own sport .
4 | Don 't !
5 | Fish , ranked 98th in the world , fired 22 aces en route to a 6-3 , 6-7 ( 5 / 7 ) , 7-6 ( 7 / 4 ) win over seventh-seeded Argentinian David Nalbandian .
6 | Why does everything have to become such a big issue ?
7 | AMMAN ( Reuters ) - King Abdullah of Jordan will meet U.S. President Barack Obama in Washington on April 21 to lobby on behalf of Arab states for a stronger U.S. role in Middle East peacemaking , palace officials said on Sunday .
8 | To help keep traffic flowing the Congestion Charge will remain in operation through-out the strike and TfL will be suspending road works on major London roads wherever possible .
9 | If no candidate wins an absolute majority , there will be a runoff between the top two contenders , most likely in mid-October .
10 | Authorities previously served search warrants at Murray 's Las Vegas home and his businesses in Las Vegas and Houston .
--------------------------------------------------------------------------------
/tests/test_lemmatizer.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | import flair
4 | from flair.data import Sentence
5 | from flair.models import Lemmatizer
6 |
7 |
8 | def test_words_to_char_indices():
9 | sentence = Sentence("Hello look what a beautiful day!")
10 |
11 | lemmatizer = Lemmatizer() # lemmatizer uses standard char dictionary
12 |
13 | d = lemmatizer.dummy_index
14 | e = lemmatizer.end_index
15 | s = lemmatizer.start_index
16 |
17 | string_list = sentence.to_tokenized_string().split()
18 |
19 | # With end symbol, without start symbol, padding in front
20 | target = torch.tensor(
21 | [
22 | [d, d, d, d, 55, 5, 15, 15, 12, e],
23 | [d, d, d, d, d, 15, 12, 12, 28, e],
24 | [d, d, d, d, d, 23, 13, 9, 8, e],
25 | [d, d, d, d, d, d, d, d, 9, e],
26 | [24, 5, 9, 16, 8, 7, 22, 16, 15, e],
27 | [d, d, d, d, d, d, 14, 9, 27, e],
28 | [d, d, d, d, d, d, d, d, 76, e],
29 | ],
30 | dtype=torch.long,
31 | ).to(flair.device)
32 | out = lemmatizer.words_to_char_indices(string_list, end_symbol=True, start_symbol=False, padding_in_front=True)
33 | assert torch.equal(target, out)
34 |
35 | # Without end symbol, with start symbol, padding in back
36 | target = torch.tensor(
37 | [
38 | [s, 55, 5, 15, 15, 12, d, d, d, d],
39 | [s, 15, 12, 12, 28, d, d, d, d, d],
40 | [s, 23, 13, 9, 8, d, d, d, d, d],
41 | [s, 9, d, d, d, d, d, d, d, d],
42 | [s, 24, 5, 9, 16, 8, 7, 22, 16, 15],
43 | [s, 14, 9, 27, d, d, d, d, d, d],
44 | [s, 76, d, d, d, d, d, d, d, d],
45 | ],
46 | dtype=torch.long,
47 | ).to(flair.device)
48 | out = lemmatizer.words_to_char_indices(string_list, end_symbol=False, start_symbol=True, padding_in_front=False)
49 | assert torch.equal(target, out)
50 |
51 | # Without end symbol, without start symbol, padding in front
52 | assert lemmatizer.words_to_char_indices(
53 | string_list, end_symbol=False, start_symbol=False, padding_in_front=True
54 | ).size() == (7, 9)
55 |
--------------------------------------------------------------------------------
/tests/test_multitask.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import flair
4 | from flair.data import Sentence
5 | from flair.datasets import SENTEVAL_CR, SENTEVAL_SST_GRANULAR
6 | from flair.embeddings import TransformerDocumentEmbeddings
7 | from flair.models import MultitaskModel, TextClassifier
8 | from flair.nn.multitask import make_multitask_model_and_corpus
9 | from flair.trainers import ModelTrainer
10 |
11 |
12 | @pytest.mark.integration()
13 | def test_train_load_use_classifier(results_base_path, tasks_base_path):
14 | # --- Embeddings that are shared by both models --- #
15 | shared_embedding = TransformerDocumentEmbeddings("distilbert-base-uncased", fine_tune=True)
16 |
17 | # --- Task 1: Sentiment Analysis (5-class) --- #
18 | flair.set_seed(123)
19 |
20 | # Define corpus and model
21 | corpus_1 = SENTEVAL_SST_GRANULAR().downsample(0.01)
22 |
23 | model_1 = TextClassifier(
24 | shared_embedding, label_dictionary=corpus_1.make_label_dictionary("class", add_unk=False), label_type="class"
25 | )
26 |
27 | # -- Task 2: Binary Sentiment Analysis on Customer Reviews -- #
28 | flair.set_seed(123)
29 |
30 | # Define corpus and model
31 | corpus_2 = SENTEVAL_CR().downsample(0.01)
32 |
33 | model_2 = TextClassifier(
34 | shared_embedding,
35 | label_dictionary=corpus_2.make_label_dictionary("sentiment", add_unk=False),
36 | label_type="sentiment",
37 | inverse_model=True,
38 | )
39 |
40 | # -- Define mapping (which tagger should train on which model) -- #
41 | multitask_model, multicorpus = make_multitask_model_and_corpus(
42 | [
43 | (model_1, corpus_1),
44 | (model_2, corpus_2),
45 | ]
46 | )
47 |
48 | # -- Create model trainer and train -- #
49 | trainer = ModelTrainer(multitask_model, multicorpus)
50 |
51 | trainer.fine_tune(results_base_path, max_epochs=1)
52 |
53 | del trainer, multitask_model, corpus_1, corpus_2
54 | loaded_model = MultitaskModel.load(results_base_path / "final-model.pt")
55 |
56 | sentence = Sentence("I love Berlin")
57 | sentence_empty = Sentence(" ")
58 |
59 | loaded_model.predict(sentence)
60 | loaded_model.predict([sentence, sentence_empty])
61 | loaded_model.predict([sentence_empty])
62 |
63 | for label in sentence.labels:
64 | assert label.value is not None
65 | assert 0.0 <= label.score <= 1.0
66 | assert isinstance(label.score, float)
67 | del loaded_model
68 |
--------------------------------------------------------------------------------
/tests/test_tars.py:
--------------------------------------------------------------------------------
1 | from flair.data import Sentence
2 | from flair.datasets import ClassificationCorpus
3 | from flair.models import TARSClassifier
4 | from flair.trainers import ModelTrainer
5 |
6 |
7 | def test_init_tars_and_switch(tasks_base_path):
8 | # test corpus
9 | corpus = ClassificationCorpus(tasks_base_path / "imdb")
10 |
11 | # create a TARS classifier
12 | tars = TARSClassifier(
13 | task_name="2_CLASS",
14 | label_dictionary=corpus.make_label_dictionary(label_type="class"),
15 | label_type="class",
16 | )
17 |
18 | # check if right number of classes
19 | assert len(tars.get_current_label_dictionary()) == 2
20 |
21 | # switch to task with only one label
22 | tars.add_and_switch_to_new_task("1_CLASS", "one class", "testlabel")
23 |
24 | # check if right number of classes
25 | assert len(tars.get_current_label_dictionary()) == 1
26 |
27 | # switch to task with three labels provided as list
28 | tars.add_and_switch_to_new_task("3_CLASS", ["list 1", "list 2", "list 3"], "testlabel")
29 |
30 | # check if right number of classes
31 | assert len(tars.get_current_label_dictionary()) == 3
32 |
33 | # switch to task with four labels provided as set
34 | tars.add_and_switch_to_new_task("4_CLASS", {"set 1", "set 2", "set 3", "set 4"}, "testlabel")
35 |
36 | # check if right number of classes
37 | assert len(tars.get_current_label_dictionary()) == 4
38 |
39 | # switch to task with two labels provided as Dictionary
40 | tars.add_and_switch_to_new_task("2_CLASS_AGAIN", corpus.make_label_dictionary(label_type="class"), "testlabel")
41 |
42 | # check if right number of classes
43 | assert len(tars.get_current_label_dictionary()) == 2
44 |
45 |
46 | def test_train_tars(tasks_base_path, results_base_path):
47 | # test corpus
48 | corpus = ClassificationCorpus(tasks_base_path / "imdb_underscore")
49 |
50 | # create a TARS classifier
51 | tars = TARSClassifier(embeddings="sshleifer/tiny-distilroberta-base")
52 |
53 | # switch to a new task (TARS can do multiple tasks so you must define one)
54 | tars.add_and_switch_to_new_task(
55 | task_name="question 2_CLASS",
56 | label_dictionary=corpus.make_label_dictionary(label_type="class"),
57 | label_type="class",
58 | )
59 |
60 | # initialize the text classifier trainer
61 | trainer = ModelTrainer(tars, corpus)
62 |
63 | # start the training
64 | trainer.train(
65 | base_path=results_base_path,
66 | learning_rate=0.02,
67 | mini_batch_size=1,
68 | max_epochs=1,
69 | )
70 |
71 | sentence = Sentence("This is great!")
72 | tars.predict(sentence)
73 |
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | from flair.data import Dictionary
2 | from flair.training_utils import convert_labels_to_one_hot
3 |
4 |
5 | def test_convert_labels_to_one_hot():
6 | label_dict = Dictionary(add_unk=False)
7 | label_dict.add_item("class-1")
8 | label_dict.add_item("class-2")
9 | label_dict.add_item("class-3")
10 |
11 | one_hot = convert_labels_to_one_hot([["class-2"]], label_dict)
12 |
13 | assert one_hot[0][0] == 0
14 | assert one_hot[0][1] == 1
15 | assert one_hot[0][2] == 0
16 |
--------------------------------------------------------------------------------
/tests/test_visual.py:
--------------------------------------------------------------------------------
1 | from flair.data import Sentence, Span, Token
2 | from flair.embeddings import FlairEmbeddings
3 | from flair.visual import Highlighter
4 | from flair.visual.ner_html import HTML_PAGE, PARAGRAPH, TAGGED_ENTITY, render_ner_html
5 | from flair.visual.training_curves import Plotter
6 |
7 |
8 | def test_highlighter(resources_path):
9 | with (resources_path / "visual/snippet.txt").open() as f:
10 | sentences = [x for x in f.read().split("\n") if x]
11 |
12 | embeddings = FlairEmbeddings("news-forward")
13 |
14 | features = embeddings.lm.get_representation(sentences[0], "", "").squeeze()
15 |
16 | Highlighter().highlight_selection(
17 | features,
18 | sentences[0],
19 | n=1000,
20 | file_=str(resources_path / "visual/highligh.html"),
21 | )
22 |
23 | # clean up directory
24 | (resources_path / "visual/highligh.html").unlink()
25 |
26 |
27 | def test_plotting_training_curves_and_weights(resources_path):
28 | plotter = Plotter()
29 | plotter.plot_training_curves(resources_path / "visual/loss.tsv")
30 | plotter.plot_weights(resources_path / "visual/weights.txt")
31 |
32 | # clean up directory
33 | (resources_path / "visual/weights.png").unlink()
34 | (resources_path / "visual/training.png").unlink()
35 |
36 |
37 | def mock_ner_span(text, tag, start, end):
38 | span = Span([]).set_label("class", tag)
39 | span.start_pos = start
40 | span.end_pos = end
41 | span.tokens = [Token(text[start:end])]
42 | return span
43 |
44 |
45 | def test_html_rendering():
46 | text = (
47 | "Boris Johnson has been elected new Conservative leader in "
48 | "a ballot of party members and will become the "
49 | "next UK prime minister. &"
50 | )
51 | sentence = Sentence(text)
52 |
53 | print(sentence[0:2].add_label("ner", "PER"))
54 | print(sentence[6:7].add_label("ner", "MISC"))
55 | print(sentence[19:20].add_label("ner", "LOC"))
56 | colors = {
57 | "PER": "#F7FF53",
58 | "ORG": "#E8902E",
59 | "LOC": "yellow",
60 | "MISC": "#4647EB",
61 | "O": "#ddd",
62 | }
63 | actual = render_ner_html([sentence], colors=colors)
64 |
65 | expected_res = HTML_PAGE.format(
66 | text=PARAGRAPH.format(
67 | sentence=TAGGED_ENTITY.format(color="#F7FF53", entity="Boris Johnson", label="PER")
68 | + " has been elected new "
69 | + TAGGED_ENTITY.format(color="#4647EB", entity="Conservative", label="MISC")
70 | + " leader in a ballot of party members and will become the next "
71 | + TAGGED_ENTITY.format(color="yellow", entity="UK", label="LOC")
72 | + " prime minister. &"
73 | ),
74 | title="Flair",
75 | )
76 |
77 | assert expected_res == actual
78 |
--------------------------------------------------------------------------------