├── .pylintrc
├── runtime.txt
├── MANIFEST.in
├── Procfile
├── test_requirements.txt
├── esperanto_analyzer
    ├── web
    │   ├── __init__.py
    │   ├── .gitignore
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── helpers.py
    │   │   ├── errors.py
    │   │   ├── results.py
    │   │   ├── server.py
    │   │   └── morphological_endpoint.py
    │   ├── context.py
    │   ├── runserver.py
    │   ├── README.md
    │   └── LICENSE
    ├── cli
    │   ├── __init__.py
    │   └── cli.py
    ├── analyzers
    │   ├── __init__.py
    │   ├── morphological
    │   │   ├── analyze_result.py
    │   │   ├── noun.py
    │   │   ├── adjective.py
    │   │   ├── article.py
    │   │   ├── __init__.py
    │   │   ├── interjection.py
    │   │   ├── conjunction.py
    │   │   ├── base.py
    │   │   ├── preposition.py
    │   │   ├── numeral.py
    │   │   ├── adverb.py
    │   │   ├── verb.py
    │   │   └── pronoun.py
    │   └── morphological_analyzer.py
    ├── speech
    │   ├── .DS_Store
    │   ├── numeral.py
    │   ├── verb.py
    │   ├── noun.py
    │   ├── interjection.py
    │   ├── conjunction.py
    │   ├── pronoun.py
    │   ├── __init__.py
    │   ├── preposition.py
    │   ├── adjective.py
    │   ├── adverb.py
    │   ├── article.py
    │   └── word.py
    ├── __init__.py
    └── morphological_sentence_analyzer.py
├── pip-selfcheck.json
├── development_requirements.txt
├── docs
    ├── esperanto_analyzer_screenshot.png
    ├── index.rst
    ├── make.bat
    ├── Makefile
    └── conf.py
├── .travis.yml
├── requirements.txt
├── .coveragerc
├── tests
    ├── context.py
    ├── cli
    │   ├── context.py
    │   └── test_cly.py
    ├── analyzers
    │   ├── context.py
    │   └── morphological
    │   │   ├── context.py
    │   │   ├── test_analyze_result.py
    │   │   ├── test_verb_morphological_analyzer.py
    │   │   ├── test_noun_morphological_analyzer.py
    │   │   ├── test_base_morphological_analyzer.py
    │   │   ├── test_adjective_morphological_analyzer.py
    │   │   ├── test_article_morphological_analyzer.py
    │   │   ├── test_conjuction_morphological_analyzer.py
    │   │   ├── test_adverb_morphological_analyzer.py
    │   │   ├── test_interjection_morphological_analyzer.py
    │   │   ├── test_numeral_morphological_analyzer.py
    │   │   ├── test_preposition_morphological_analyzer.py
    │   │   └── test_pronoun_morphological_analyzer.py
    ├── speech
    │   ├── context.py
    │   ├── test_verb.py
    │   ├── test_numeral.py
    │   ├── test_adjective.py
    │   ├── test_conjunction.py
    │   ├── test_preposition.py
    │   ├── test_interjection.py
    │   ├── test_noun.py
    │   ├── test_pronoun.py
    │   ├── test_adverb.py
    │   ├── test_word.py
    │   └── test_article.py
    ├── test_basic.py
    ├── conftest.py
    ├── web
    │   └── test_web_api_endpoints.py
    └── test_morphological_sentence_analyzer.py
├── bin
    ├── console
    ├── cli
    └── server
├── Makefile
├── .editorconfig
├── TODO.todo
├── LICENSE
├── setup.py
├── .gitignore
└── README.md


/.pylintrc:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.7.0
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: python esperanto_analyzer/web/runserver.py
2 | 


--------------------------------------------------------------------------------
/test_requirements.txt:
--------------------------------------------------------------------------------
1 | # Tests tools
2 | pytest
3 | pytest-cov
4 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/__init__.py:
--------------------------------------------------------------------------------
1 | from .api.server import run_app
2 | 


--------------------------------------------------------------------------------
/pip-selfcheck.json:
--------------------------------------------------------------------------------
1 | {"last_check":"2018-08-29T14:35:06Z","pypi_version":"18.0"}


--------------------------------------------------------------------------------
/esperanto_analyzer/web/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.egg-info
3 | venv
4 | .DS_Store
5 | 
6 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/cli/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=missing-docstring
2 | from .cli import CLI
3 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/api/__init__.py:
--------------------------------------------------------------------------------
1 | from .morphological_endpoint import MorphologicalAnalyzeEndpoint
2 | 


--------------------------------------------------------------------------------
/development_requirements.txt:
--------------------------------------------------------------------------------
1 | ## Development dependencies
2 | pylint
3 | pylint-json2html
4 | sphinx
5 | ipython
6 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/api/helpers.py:
--------------------------------------------------------------------------------
1 | # helpers.py
2 | # This is for helper functions that don't fit in a specific module
3 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/__init__.py:
--------------------------------------------------------------------------------
1 | from esperanto_analyzer.analyzers.morphological_analyzer import MorphologicalAnalyzer
2 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fidelisrafael/esperanto-analyzer/HEAD/esperanto_analyzer/speech/.DS_Store


--------------------------------------------------------------------------------
/docs/esperanto_analyzer_screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fidelisrafael/esperanto-analyzer/HEAD/docs/esperanto_analyzer_screenshot.png


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 |   - "3.7-dev"  # 3.7 development branch
4 | install:
5 |   - make init_dev
6 | script:
7 |   - make test
8 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Entry point to load classes
3 | """
4 | 
5 | from .morphological_sentence_analyzer import MorphologicalSentenceAnalyzer
6 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/api/errors.py:
--------------------------------------------------------------------------------
1 | class SentenceRequiredError(Exception):
2 |     pass
3 | 
4 | 
5 | class SentenceInvalidError(Exception):
6 |     pass
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ## CLI
2 | tableprint==0.8.0
3 | 
4 | ## Web
5 | Flask==1.0.2
6 | Flask-RESTful==0.3.6
7 | flask-restful-swagger==0.20.1
8 | flask-cors==3.0.6
9 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/context.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | 
4 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
5 | 
6 | import esperanto_analyzer
7 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | omit =
 3 |     test/*
 4 |     setup.py
 5 |     esperanto_analyzer/web/runserver.py
 6 |     esperanto_analyzer/web/context.py
 7 | 
 8 | [html]
 9 | directory = coverage
10 | 


--------------------------------------------------------------------------------
/tests/context.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | import sys
4 | import os
5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
6 | 
7 | import esperanto_analyzer
8 | 


--------------------------------------------------------------------------------
/tests/cli/context.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | import sys
4 | import os
5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
6 | 
7 | import esperanto_analyzer
8 | 


--------------------------------------------------------------------------------
/tests/analyzers/context.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | import sys
4 | import os
5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
6 | 
7 | import esperanto_analyzer
8 | 


--------------------------------------------------------------------------------
/tests/speech/context.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | import sys
4 | import os
5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
6 | 
7 | import esperanto_analyzer
8 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/context.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | import sys
4 | import os
5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
6 | 
7 | import esperanto_analyzer
8 | 


--------------------------------------------------------------------------------
/bin/console:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import IPython
 4 | import sys
 5 | import os
 6 | 
 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 8 | 
 9 | import esperanto_analyzer
10 | 
11 | IPython.embed()
12 | 


--------------------------------------------------------------------------------
/bin/cli:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | 
 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 7 | 
 8 | from esperanto_analyzer.cli import CLI
 9 | 
10 | CLI.run(input_sentence=sys.argv[1])
11 | 


--------------------------------------------------------------------------------
/bin/server:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | 
 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 7 | 
 8 | from esperanto_analyzer.web import run_app
 9 | 
10 | run_app(debug=True, port=5000)
11 | 


--------------------------------------------------------------------------------
/tests/test_basic.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | import pytest
 5 | 
 6 | from context import esperanto_analyzer
 7 | 
 8 | from esperanto_analyzer import MorphologicalSentenceAnalyzer
 9 | 
10 | class TestBasic:
11 |     def test_import(self):
12 |         assert MorphologicalSentenceAnalyzer
13 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/runserver.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from context import esperanto_analyzer
 3 | 
 4 | from esperanto_analyzer.web.api.server import run_app
 5 | 
 6 | if __name__ == '__main__':
 7 |     port = int(os.environ.get('PORT', 5000))
 8 |     host = str(os.environ.get('HOST', '0.0.0.0'))
 9 | 
10 |     run_app(debug=True, host=host, port=port)
11 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/numeral.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent one word beloging to grammar class classified as 'Numeral'
 3 | 
 4 | What's a Numeral?
 5 | ===
 6 | In linguistics, a numeral is a member of a part of speech(word) characterized by the
 7 | designation of numbers;
 8 | """
 9 | from .word import Word
10 | 
11 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use
12 | class Numeral(Word):
13 |     pass
14 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/analyze_result.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represents one analyzed raw word transformed in one `Part of Speech` object
 3 | such as `Verb`, `Adverb`.
 4 | 
 5 | Eg: word = AnalyzedWord.
 6 | """
 7 | 
 8 | # pylint: disable=too-few-public-methods,missing-docstring
 9 | class AnalyzeResult:
10 |     def __init__(self, result, raw_word):
11 |         self.result = result
12 |         self.raw_word = raw_word
13 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/api/results.py:
--------------------------------------------------------------------------------
 1 | from flask_restful import Resource, Api, marshal_with, fields, abort
 2 | from flask_restful_swagger import swagger
 3 | 
 4 | @swagger.model
 5 | class MorphologicalAnalyzeResult(object):
 6 |     """The result of a call to /hello"""
 7 |     resource_fields = {
 8 |         'word': fields.String,
 9 |         'value': fields.String,
10 |     }
11 | 
12 |     def __init__(self, results):
13 |         self.results = results
14 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/verb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent one word beloging to grammar class classified as 'Verb'
 3 | 
 4 | What's a Verb?
 5 | ===
 6 | A verb, is a word (part of speech) that in syntax conveys an action (bring, read, walk),
 7 | an occurrence (happen, become), or a state of being (be, exist, stand)
 8 | """
 9 | 
10 | from .word import Word
11 | 
12 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use
13 | class Verb(Word):
14 | 
15 |     def has_plural(self):
16 |         return True
17 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. sample documentation master file, created by
 2 |    sphinx-quickstart on Mon Apr 16 21:22:43 2012.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to sample's documentation!
 7 | ==================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 | 
15 | 
16 | Indices and tables
17 | ==================
18 | 
19 | * :ref:`genindex`
20 | * :ref:`modindex`
21 | * :ref:`search`
22 | 
23 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/noun.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent one word beloging to grammar class classified as 'Noun'
 3 | 
 4 | What's a Noun?
 5 | ===
 6 | A noun is a word(other than a pronoun) that functions as the name of some specific thing
 7 | or set of things, such as living creatures, objects, places, actions, feelings...
 8 | """
 9 | from .word import Word
10 | 
11 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use
12 | class Noun(Word):
13 | 
14 |     def has_gender(self):
15 |         return True
16 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/interjection.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent one word beloging to grammar class classified as 'Interjection'
 3 | 
 4 | What's an Interjection?
 5 | ===
 6 | In linguistics, an interjection is a word or expression that occurs as an utterance on its
 7 | own and expresses a spontaneous feeling or reaction.
 8 | """
 9 | 
10 | from .word import Word
11 | 
12 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use
13 | class Interjection(Word):
14 | 
15 |     def has_plural(self):
16 |         return False
17 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/conjunction.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent one word beloging to grammar class classified as 'Conjuction'
 3 | 
 4 | What's a Conjuction?
 5 | ===
 6 | In grammar, a conjunction is a part of speech(a word) that connects words, phrases, or clauses
 7 | that are called the conjuncts of the conjoining construction.
 8 | """
 9 | 
10 | from .word import Word
11 | 
12 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use
13 | class Conjunction(Word):
14 | 
15 |     def has_plural(self):
16 |         return False
17 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/pronoun.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent one word beloging to grammar class classified as 'Pronoun'
 3 | 
 4 | What's a Pronoun?
 5 | ===
 6 | In linguistics and grammar, a pronoun is a word that substitutes for a noun
 7 | or noun phrase.
 8 | Pronouns are often used to refer to a noun that has already been mentioned.
 9 | """
10 | 
11 | from .word import Word
12 | 
13 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use
14 | class Pronoun(Word):
15 | 
16 |     def has_gender(self):
17 |         return True
18 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | init:
 2 | 	pip install -r requirements.txt
 3 | 
 4 | init_dev:
 5 | 	pip install -r development_requirements.txt
 6 | 	pip install -r test_requirements.txt
 7 | 	pip install -r requirements.txt
 8 | 
 9 | test:
10 | 	pytest tests --cov-config .coveragerc --cov=esperanto_analyzer --cov-report=html
11 | 
12 | lint:
13 | 	pylint esperanto_analyzer/ --reports=n -f json > .lint_results
14 | 
15 | formatted_lint:
16 | 	pylint esperanto_analyzer/ --reports=n -f json | pylint-json2html -o pylint.html
17 | 
18 | web_api:
19 | 	python esperanto_analyzer/web/runserver.py
20 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/noun.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import re
 3 | 
 4 | from esperanto_analyzer.speech import Noun
 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
 6 | 
 7 | class NounMorphologicalAnalyzer(BaseMorphologicalAnalyzer):
 8 |     #  MATCHES: ["patro", "patroj", "patron", "patrojn"]
 9 |     # DONT MATCHES: ["io", "lo"]
10 |     MATCH_REGEXP = re.compile('(^[a-zA-Zĉĝĵĥŝŭ]{2,}(o(j?n?)?)$)', re.IGNORECASE|re.UNICODE)
11 | 
12 | 
13 |     @staticmethod
14 |     def word_class():
15 |         return Noun
16 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/adjective.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import re
 3 | 
 4 | from esperanto_analyzer.speech import Adjective
 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
 6 | 
 7 | class AdjectiveMorphologicalAnalyzer(BaseMorphologicalAnalyzer):
 8 |     # MATCHES: ["bela", "belaj", "belan", "belajn"]
 9 |     # DONT MATCHES: ["la"] => Article
10 |     MATCH_REGEXP = re.compile('(^[a-zA-Zĉĝĵĥŝŭ]{2,}(a(j?n?)?)$)', re.IGNORECASE|re.UNICODE)
11 | 
12 |     @staticmethod
13 |     def word_class():
14 |         return Adjective
15 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Make 'Parts of Speech' available through the namespace: `esperanto_analyzer.speech`
 3 | Eg: `from esperanto_analyzer.speech import Word, Adjective`
 4 | """
 5 | 
 6 | from .word import Word, NotContentError
 7 | from .adverb import Adverb
 8 | from .adjective import Adjective
 9 | from .article import Article, InvalidArticleError
10 | from .conjunction import Conjunction
11 | from .interjection import Interjection
12 | from .noun import Noun
13 | from .numeral import Numeral
14 | from .preposition import Preposition
15 | from .pronoun import Pronoun
16 | from .verb import Verb
17 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/preposition.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent one word beloging to grammar class classified as 'Preposition'
 3 | 
 4 | What's a Preposition?
 5 | ===
 6 | A word governing, and usually preceding, a noun or pronoun and expressing a relation
 7 | to another word or element in the clause.
 8 | Prepositions are often used to express spatial or temporal relations (in, under, towards, before)
 9 | """
10 | 
11 | from .word import Word
12 | 
13 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use
14 | class Preposition(Word):
15 | 
16 |     def has_plural(self):
17 |         return False
18 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/adjective.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent one word beloging to grammar class classified as 'Adjective'
 3 | 
 4 | What's an Adjective?
 5 | ===
 6 | A word belonging to one of the major form classes in any of numerous languages and typically serving
 7 | as a modifier of a noun to denote a quality of the thing named, to indicate its quantity or extent,
 8 | or to specify a thing as distinct from something else The word red in "the red car" is an adjective.
 9 | """
10 | 
11 | from .word import Word
12 | 
13 | # pylint: disable=too-few-public-methods,missing-docstring
14 | class Adjective(Word):
15 |     pass
16 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/article.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import re
 3 | 
 4 | from esperanto_analyzer.speech import Article
 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
 6 | 
 7 | class ArticleMorphologicalAnalyzer(BaseMorphologicalAnalyzer):
 8 |     ARTICLES_LIST = ['la']
 9 | 
10 |     ARTICLES_MATCH_REGEXP = re.compile('|'.join(ARTICLES_LIST), re.IGNORECASE|re.UNICODE)
11 | 
12 |     #  MATCHES: ["la"]
13 |     MATCH_REGEXP = re.compile('^(%s)$' % (ARTICLES_MATCH_REGEXP.pattern), re.IGNORECASE|re.UNICODE)
14 | 
15 |     @staticmethod
16 |     def word_class():
17 |         return Article
18 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | import pytest
 5 | 
 6 | from context import esperanto_analyzer
 7 | 
 8 | from esperanto_analyzer.web.api.server import create_app
 9 | 
10 | @pytest.fixture
11 | def app():
12 |     # create the app with common test config
13 |     app = create_app().app
14 |     app.config['TESTING'] = True
15 | 
16 |     yield app
17 | 
18 |     return app
19 | 
20 | @pytest.fixture
21 | def client(app):
22 |     """A test client for the app."""
23 |     return app.test_client()
24 | 
25 | @pytest.fixture
26 | def runner(app):
27 |     """A test runner for the app's Click commands."""
28 |     return app.test_cli_runner()
29 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/adverb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent one word beloging to grammar class classified as 'Adverb'
 3 | 
 4 | What's an Adverb?
 5 | ===
 6 | A word or phrase that modifies or qualifies an adjective, verb, or other adverb or
 7 | a word group, expressing a relation of place, time, circumstance, manner, cause, degree, etc.
 8 | (e.g., now, yesterday, today, gently, quite, then, there).
 9 | """
10 | 
11 | from .word import Word
12 | 
13 | # pylint: disable=too-few-public-methods,missing-docstring
14 | class Adverb(Word):
15 | 
16 |     def has_plural(self):
17 |         """
18 |          Adverbs are invariable
19 |         """
20 |         return False
21 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/README.md:
--------------------------------------------------------------------------------
 1 | Esperanto Analyzer WEB API
 2 | =================
 3 | 
 4 | Usage
 5 | -----
 6 | 
 7 | Clone the repo:
 8 |     (...)
 9 | 
10 | Create virtualenv:
11 | 
12 |     virtualenv venv
13 |     source venv/bin/activate
14 |     pip install -r requirements.txt
15 |     python setup.py develop # or install if you prefer
16 | 
17 | Run the sample server
18 | 
19 |     python runserver.py
20 | 
21 | Try the endpoints:
22 | 
23 |     curl -XGET http://localhost:5000/analyze?sentence=Mia%20nomo%20estas%20Rafaelo%20kaj%20mi%20venas%20el%20Brazilo
24 | 
25 | 
26 | Swagger docs available at `http://localhost:5000/api/spec.html`
27 | 
28 | 
29 | License
30 | -------
31 | 
32 | MIT, see LICENSE file
33 | 
34 | 


--------------------------------------------------------------------------------
/tests/speech/test_verb.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, Verb, NotContentError
 8 | 
 9 | class TestVerbBasic():
10 |   def test_import(self):
11 |     assert(Verb)
12 | 
13 |   def test_init(self):
14 |     assert(Verb('esti') != None)
15 | 
16 |   def test_superclass(self):
17 |     assert(issubclass(Verb, Word))
18 | 
19 |   def test_valid_content(self):
20 |     assert(Verb('esti'))
21 | 
22 |   def test_invalid_content(self):
23 |     with pytest.raises(NotContentError):
24 |       assert(Verb(''))
25 | 
26 |   def test_content(self):
27 |     word = Verb('content')
28 | 
29 |     assert(word.content == 'content')
30 | 
31 |   def test_metadata_exists(self):
32 |     word = Verb(' ')
33 | 
34 |     assert(word.metadata == dict())
35 | 


--------------------------------------------------------------------------------
/tests/speech/test_numeral.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, Numeral, NotContentError
 8 | 
 9 | class TestNumeralBasic():
10 |   def test_import(self):
11 |     assert(Numeral)
12 | 
13 |   def test_init(self):
14 |     assert(Numeral('10') != None)
15 | 
16 |   def test_superclass(self):
17 |     assert(issubclass(Numeral, Word))
18 | 
19 |   def test_valid_content(self):
20 |     assert(Numeral('dek'))
21 | 
22 |   def test_invalid_content(self):
23 |     with pytest.raises(NotContentError):
24 |       assert(Numeral(''))
25 | 
26 |   def test_content(self):
27 |     word = Numeral('du dek')
28 | 
29 |     assert(word.content == 'du dek')
30 | 
31 |   def test_metadata_exists(self):
32 |     word = Numeral(' ')
33 | 
34 |     assert(word.metadata == dict())
35 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_analyze_result.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=no-self-use,missing-docstring
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.analyzers.morphological import AnalyzeResult
 8 | 
 9 | class TestAdjectiveBasic():
10 |     TEST_WORD = 'kapo'
11 | 
12 |     def test_import(self):
13 |         assert AnalyzeResult
14 | 
15 |     def test_init(self):
16 |         assert AnalyzeResult(result=None, raw_word=None) is not None
17 | 
18 |     def test_result(self):
19 |         analyze_result = AnalyzeResult(dict(some='object'), raw_word=self.TEST_WORD)
20 | 
21 |         assert analyze_result.result == dict(some='object')
22 | 
23 |     def test_raw_word(self):
24 |         analyze_result = AnalyzeResult(dict(some='object'), raw_word=self.TEST_WORD)
25 | 
26 |         assert analyze_result.raw_word == self.TEST_WORD
27 | 


--------------------------------------------------------------------------------
/tests/speech/test_adjective.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, Adjective, NotContentError
 8 | 
 9 | class TestAdjectiveBasic():
10 |   def test_import(self):
11 |     assert(Adjective)
12 | 
13 |   def test_init(self):
14 |     assert(Adjective('bela') != None)
15 | 
16 |   def test_superclass(self):
17 |     assert(issubclass(Adjective, Word))
18 | 
19 |   def test_valid_content(self):
20 |     assert(Adjective('bela'))
21 | 
22 |   def test_invalid_content(self):
23 |     with pytest.raises(NotContentError):
24 |       assert(Adjective(''))
25 | 
26 |   def test_content(self):
27 |     word = Adjective('content')
28 | 
29 |     assert(word.content == 'content')
30 | 
31 |   def test_metadata_exists(self):
32 |     word = Adjective(' ')
33 | 
34 |     assert(word.metadata == dict())
35 | 


--------------------------------------------------------------------------------
/tests/speech/test_conjunction.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, Conjunction, NotContentError
 8 | 
 9 | class TestConjunctionBasic():
10 |   def test_import(self):
11 |     assert(Conjunction)
12 | 
13 |   def test_init(self):
14 |     assert(Conjunction('kaj') != None)
15 | 
16 |   def test_superclass(self):
17 |     assert(issubclass(Conjunction, Word))
18 | 
19 |   def test_valid_content(self):
20 |     assert(Conjunction('kaj'))
21 | 
22 |   def test_invalid_content(self):
23 |     with pytest.raises(NotContentError):
24 |       assert(Conjunction(''))
25 | 
26 |   def test_content(self):
27 |     word = Conjunction('content')
28 | 
29 |     assert(word.content == 'content')
30 | 
31 |   def test_metadata_exists(self):
32 |     word = Conjunction(' ')
33 | 
34 |     assert(word.metadata == dict())
35 | 


--------------------------------------------------------------------------------
/tests/speech/test_preposition.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, Preposition, NotContentError
 8 | 
 9 | class TestPrepositionBasic():
10 |   def test_import(self):
11 |     assert(Preposition)
12 | 
13 |   def test_init(self):
14 |     assert(Preposition('post') != None)
15 | 
16 |   def test_superclass(self):
17 |     assert(issubclass(Preposition, Word))
18 | 
19 |   def test_valid_content(self):
20 |     assert(Preposition('post'))
21 | 
22 |   def test_invalid_content(self):
23 |     with pytest.raises(NotContentError):
24 |       assert(Preposition(''))
25 | 
26 |   def test_content(self):
27 |     word = Preposition('content')
28 | 
29 |     assert(word.content == 'content')
30 | 
31 |   def test_metadata_exists(self):
32 |     word = Preposition(' ')
33 | 
34 |     assert(word.metadata == dict())
35 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Make 'Parts of Speech' available through the namespace: `esperanto_analyzer.analyzers.morphological`
 3 | Eg: `from esperanto_analyzer.analyzers.morphological import AdverbMorphologicalAnalyzer`
 4 | """
 5 | 
 6 | from .base import BaseMorphologicalAnalyzer
 7 | 
 8 | from .adverb import AdverbMorphologicalAnalyzer
 9 | from .adjective import AdjectiveMorphologicalAnalyzer
10 | from .article import ArticleMorphologicalAnalyzer
11 | from .conjunction import ConjunctionMorphologicalAnalyzer
12 | from .interjection import InterjectionMorphologicalAnalyzer
13 | from .noun import NounMorphologicalAnalyzer
14 | from .numeral import NumeralMorphologicalAnalyzer
15 | from .preposition import PrepositionMorphologicalAnalyzer
16 | from .pronoun import PronounMorphologicalAnalyzer
17 | from .verb import VerbMorphologicalAnalyzer
18 | 
19 | from .analyze_result import AnalyzeResult
20 | 


--------------------------------------------------------------------------------
/tests/speech/test_interjection.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, Interjection, NotContentError
 8 | 
 9 | class TestInterjectionBasic():
10 |   def test_import(self):
11 |     assert(Interjection)
12 | 
13 |   def test_init(self):
14 |     assert(Interjection('ek!') != None)
15 | 
16 |   def test_superclass(self):
17 |     assert(issubclass(Interjection, Word))
18 | 
19 |   def test_valid_content(self):
20 |     assert(Interjection('ek!'))
21 | 
22 |   def test_invalid_content(self):
23 |     with pytest.raises(NotContentError):
24 |       assert(Interjection(''))
25 | 
26 |   def test_content(self):
27 |     word = Interjection('content')
28 | 
29 |     assert(word.content == 'content')
30 | 
31 |   def test_metadata_exists(self):
32 |     word = Interjection(' ')
33 | 
34 |     assert(word.metadata == dict())
35 | 


--------------------------------------------------------------------------------
/tests/speech/test_noun.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, Noun, NotContentError
 8 | 
 9 | class TestNounBasic():
10 |   def test_import(self):
11 |     assert(Noun)
12 | 
13 |   def test_init(self):
14 |     assert(Noun('lingvo') != None)
15 | 
16 |   def test_superclass(self):
17 |     assert(issubclass(Noun, Word))
18 | 
19 |   def test_valid_content(self):
20 |     assert(Noun('lingvo'))
21 | 
22 |   def test_invalid_content(self):
23 |     with pytest.raises(NotContentError):
24 |       assert(Noun(''))
25 | 
26 |   def test_content(self):
27 |     word = Noun('content')
28 | 
29 |     assert(word.content == 'content')
30 | 
31 |   def test_metadata_exists(self):
32 |     word = Noun(' ')
33 | 
34 |     assert(word.metadata == dict())
35 | 
36 | 
37 | class TestNounGender():
38 |     def test_has_gender(self):
39 |         assert(Noun('lingvo').has_gender())
40 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | insert_final_newline = true
 9 | trim_trailing_whitespace = true
10 | end_of_line = lf
11 | charset = utf-8
12 | 
13 | # Docstrings and comments use max_line_length = 79
14 | [*.py]
15 | max_line_length = 119
16 | 
17 | # Use 2 spaces for the HTML files
18 | [*.html]
19 | indent_size = 2
20 | 
21 | # The JSON files contain newlines inconsistently
22 | [*.json]
23 | indent_size = 2
24 | insert_final_newline = ignore
25 | 
26 | [**/admin/js/vendor/**]
27 | indent_style = ignore
28 | indent_size = ignore
29 | 
30 | # Minified JavaScript files shouldn't be changed
31 | [**.min.js]
32 | indent_style = ignore
33 | insert_final_newline = ignore
34 | 
35 | # Makefiles always use tabs for indentation
36 | [Makefile]
37 | indent_style = tab
38 | 
39 | # Batch files use tabs for indentation
40 | [*.bat]
41 | indent_style = tab
42 | 
43 | [docs/**.txt]
44 | max_line_length = 79
45 | 


--------------------------------------------------------------------------------
/tests/speech/test_pronoun.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, Pronoun, NotContentError
 8 | 
 9 | class TestPronounBasic():
10 |   def test_import(self):
11 |     assert(Pronoun)
12 | 
13 |   def test_init(self):
14 |     assert(Pronoun('mi') != None)
15 | 
16 |   def test_superclass(self):
17 |     assert(issubclass(Pronoun, Word))
18 | 
19 |   def test_valid_content(self):
20 |     assert(Pronoun('mi'))
21 | 
22 |   def test_invalid_content(self):
23 |     with pytest.raises(NotContentError):
24 |       assert(Pronoun(''))
25 | 
26 |   def test_content(self):
27 |     word = Pronoun('content')
28 | 
29 |     assert(word.content == 'content')
30 | 
31 |   def test_metadata_exists(self):
32 |     word = Pronoun(' ')
33 | 
34 |     assert(word.metadata == dict())
35 | 
36 | 
37 | class TestPronounGender():
38 |     def test_has_gender(self):
39 |         assert(Pronoun('he').has_gender())
40 | 


--------------------------------------------------------------------------------
/TODO.todo:
--------------------------------------------------------------------------------
 1 |  ☐ Better split strategy for sentence received by MorphologicalSentenceAnalyzer @high
 2 |  ☐ Collect some sample data for all kinds of 'Part of Speech' to proper test each analyzer @high
 3 |  ☐ Test all possives cases for acusative, plural and pontuaction in the beginning and end of words(eg: multe., multe!, multe?) @high
 4 |  ☐ Improve Tests Suite (add pytest fixtures, remove duplicated code, better inputs) @medium
 5 |  ☐ Add Coveralls (It is not working with private repositories...) @low
 6 |  ☐ Publish this module to PyPi(Python Package Index) @pipy @medium
 7 |  ☐ Separate web and cli in others modules published at Pypi @pypi @medium
 8 |  ☐ 100% lint warning solved @lint @standard @low
 9 |  ☐ Better documentation for classes @doc @low
10 |  ☐ Normalize all Regexp matching groups @standard @medium
11 |  ☐ More sofisticated way of spliting the sentence in MorphologicalSentenceAnalyzer. (Keeping the pontuaction but really spliting in words, eg: ['Mia nomo, estas Adamo!'] => ['Mia', 'nomo', ',', 'estas', 'Adamo', '!'])
12 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Rafael Fidelis
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/interjection.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import re
 3 | 
 4 | from esperanto_analyzer.speech import Interjection
 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
 6 | 
 7 | class InterjectionMorphologicalAnalyzer(BaseMorphologicalAnalyzer):
 8 |     INTERJECTIONS_LIST = ['Aĥ!', 'Aj!', 'Ba!', 'Baf!', 'Baj!', 'Be!', 'Bis!', 'Diable!', 'Ek!',
 9 |                           'Fi!', 'Fu!', 'Ĝis!', 'Ha!', 'Ha lo!', 'He!', 'Hej!', 'Ho!', 'Ho ve!',
10 |                           'Hoj!', 'Hola!', 'Hu!', 'Hup!', 'Hura!', 'Lo!', 'Lu lu!', 'Nu!', 'Uf!',
11 |                           'Up!', 'Ŭa!', 'Ve!', 'Volapukaĵo!', 'Jen'
12 |                          ]
13 | 
14 |     # Shared regexp flags
15 |     RE_FLAGS = re.IGNORECASE|re.UNICODE
16 | 
17 |     # REGEXP: `/Aĥ!|'Aj!|'Ba!|'Baf!|'Baj!(...)/`
18 |     INTERJECTIONS_MATCH_REGEXP = re.compile('|'.join(INTERJECTIONS_LIST), RE_FLAGS)
19 | 
20 |     #  MATCHES only elements in `INTERJECTIONS_LIST`
21 |     MATCH_REGEXP = re.compile('^(%s)$' % (INTERJECTIONS_MATCH_REGEXP.pattern), RE_FLAGS)
22 | 
23 |     @staticmethod
24 |     def word_class():
25 |         return Interjection
26 | 


--------------------------------------------------------------------------------
/tests/speech/test_adverb.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, Adverb, NotContentError
 8 | 
 9 | class TestAdverbBasic():
10 |   def test_import(self):
11 |     assert(Adverb)
12 | 
13 |   def test_init(self):
14 |     assert(Adverb('multe') != None)
15 | 
16 |   def test_superclass(self):
17 |     assert(issubclass(Adverb, Word))
18 | 
19 |   def test_valid_content(self):
20 |     assert(Adverb('multe'))
21 | 
22 |   def test_invalid_content(self):
23 |     with pytest.raises(NotContentError):
24 |       assert(Adverb(''))
25 | 
26 |   def test_content(self):
27 |     word = Adverb('content')
28 | 
29 |     assert(word.content == 'content')
30 | 
31 |   def test_metadata_exists(self):
32 |     word = Adverb(' ')
33 | 
34 |     assert(word.metadata == dict())
35 | 
36 | 
37 | class TestAdverbPlural():
38 |   def test_has_plural(self):
39 |     assert(Adverb('multe').has_plural() == False)
40 | 
41 |   def test_plural(self):
42 |     word = Adverb('multe')
43 | 
44 |     assert(word.plural == False)
45 | 
46 |   def test_plural_ending_word(self):
47 |     for word in ['multaj', 'multajn']:
48 |       assert(Adverb(word).plural == False)
49 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/conjunction.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import re
 3 | 
 4 | from esperanto_analyzer.speech import Conjunction
 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
 6 | 
 7 | class ConjunctionMorphologicalAnalyzer(BaseMorphologicalAnalyzer):
 8 |     CONJUNCTIONS_LIST = [
 9 |         'antaŭ kiam',
10 |         'antaŭ ol'
11 |         'au',
12 |         'aŭ',
13 |         'ĉar',
14 |         'ĉu',
15 |         'K',
16 |         'k',
17 |         'kaj',
18 |         'kaŭ',
19 |         'ke',
20 |         'kial',
21 |         'kiam',
22 |         'kie',
23 |         'kiel',
24 |         'kune kun',
25 |         'kvankam',
26 |         'kvazau',
27 |         'kvazaŭ',
28 |         'minus',
29 |         'nek',
30 |         'ol',
31 |         'plus',
32 |         'se',
33 |         'sed',
34 |         'tial'
35 |     ]
36 | 
37 |     RE_FLAGS = re.IGNORECASE|re.UNICODE
38 | 
39 |     CONJUCTIONS_MATCH_REGEXP = re.compile('|'.join(CONJUNCTIONS_LIST), RE_FLAGS)
40 | 
41 |     #  MATCHES only elements in `CONJUNCTIONS_LIST`
42 |     MATCH_REGEXP = re.compile('^(%s)$' % (CONJUCTIONS_MATCH_REGEXP.pattern), RE_FLAGS)
43 | 
44 |     @staticmethod
45 |     def word_class():
46 |         return Conjunction
47 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/base.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | class BaseMorphologicalAnalyzer:
 3 |     # OVERWRITING THIS PROPERTY IS REQUIRED FOR ALL SUBCLASSES
 4 |     MATCH_REGEXP = None
 5 | 
 6 |     def __init__(self, raw_word, options=None):
 7 |         # Python dicts() as default argument is not a great idea since Python don't
 8 |         # creates a new version of the default argument in every method call
 9 |         if options is None:
10 |             options = dict()
11 | 
12 |         self.options = options
13 |         self.raw_word = raw_word
14 |         self.word = None
15 |         self.matches = None
16 |         self.processed = False
17 | 
18 |     def match(self):
19 |         return self.MATCH_REGEXP.match(self.raw_word)
20 | 
21 |     def analyze(self):
22 |         if self.processed is True:
23 |             return None
24 | 
25 |         matches = self.match()
26 | 
27 |         # Set as the first time runned
28 |         self.processed = True
29 | 
30 |         if matches:
31 |             self.word = self.word_class()(self.raw_word)
32 |             self.matches = matches
33 | 
34 |             return True
35 | 
36 |         return False
37 | 
38 |     @staticmethod
39 |     def word_class():
40 |         raise NotImplementedError
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018, Rafael Fidelis
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 6 | 
 7 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 8 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/api/server.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, request
 2 | from flask_cors import CORS
 3 | 
 4 | from flask_restful import Resource, Api, marshal_with, fields, abort
 5 | 
 6 | from flask_restful_swagger import swagger
 7 | from esperanto_analyzer.web.api import MorphologicalAnalyzeEndpoint
 8 | 
 9 | API_VERSION_NUMBER = '1.0'
10 | API_VERSION_LABEL = 'v1'
11 | 
12 | class MorphologicalAnalyzesAPI(object):
13 | 
14 |     def __init__(self):
15 |         self.create_app()
16 | 
17 | 
18 |     def create_app(self):
19 |         self.app = Flask(__name__)
20 |         CORS(self.app)
21 | 
22 |         custom_errors = {
23 |             'SentenceInvalidError': {
24 |                 'status': 500,
25 |                 'message': 'Sentence format not valid'
26 |             },
27 |             'SentenceRequiredError': {
28 |                 'status': 400,
29 |                 'message': 'Sentence not provided'
30 |             }
31 |         }
32 | 
33 |         self.api = swagger.docs(Api(self.app, errors=custom_errors), apiVersion=API_VERSION_NUMBER)
34 | 
35 |         self.api.add_resource(MorphologicalAnalyzeEndpoint, '/analyze', endpoint='analyze')
36 | 
37 |         return self.app
38 | 
39 |     def run(self, *args, **kwargs): # pragma: no cover
40 |         self.app.config['PROPAGATE_EXCEPTIONS'] = False
41 |         self.app.run(*args, **kwargs)
42 | 
43 | 
44 | def create_app():
45 |     return MorphologicalAnalyzesAPI()
46 | 
47 | def run_app(*args, **kwargs): # pragma: no cover
48 |     app = create_app()
49 |     app.run(*args, **kwargs)
50 | 
51 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/preposition.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import re
 3 | 
 4 | from esperanto_analyzer.speech import Preposition
 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
 6 | 
 7 | class PrepositionMorphologicalAnalyzer(BaseMorphologicalAnalyzer):
 8 |     PREPOSITIONS_LIST = ['K', 'al', 'anstataŭ', 'antaŭ', 'antaŭ ol', 'apud', 'da', 'de', 'disde',
 9 |                          'du vortoj', 'dum', 'ekde', 'ekster', 'eksteren', 'el', 'en', 'ene',
10 |                          'estiel', 'far', 'fare de', 'flanke de', 'for de', 'graŭ', 'inter', 'je',
11 |                          'kaj ankaŭ', 'kiel', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ',
12 |                          'mala', 'malantaŭ', 'malgraŭ', 'malkiel', 'malsupre de', 'malsupren',
13 |                          'meze de', 'na', 'nome de', 'ol', 'per', 'pere de', 'plus', 'po', 'por',
14 |                          'post', 'preter', 'pri', 'pro', 'proksime de', 'samkiel', 'sed', 'sekva',
15 |                          'sen', 'sub', 'suben', 'super', 'supren', 'sur', 'tiu', 'tiuj', 'tra',
16 |                          'trans', 'tri vortoj', 'tuj post', 'tutĉirkaŭ',
17 |                          'ĉe', 'ĉi tiu', 'ĉi tiuj', 'ĉirkaŭ', 'ĝis']
18 | 
19 |     PROPOSITIONS_MATCH_REGEXP = re.compile('|'.join(PREPOSITIONS_LIST), re.IGNORECASE|re.UNICODE)
20 | 
21 |     #  MATCHES only elements in `PREPOSITIONS_LIST`
22 |     MATCH_REGEXP = re.compile('^(%s)$' % (PROPOSITIONS_MATCH_REGEXP.pattern), re.IGNORECASE|re.UNICODE)
23 | 
24 |     @staticmethod
25 |     def word_class():
26 |         return Preposition
27 | 


--------------------------------------------------------------------------------
/tests/speech/test_word.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, NotContentError
 8 | 
 9 | class TestWordBasic():
10 |   def test_import(self):
11 |     assert(Word)
12 | 
13 |   def test_init(self):
14 |     assert(Word(' ') != None)
15 | 
16 |   def test_valid_content(self):
17 |     assert(Word('a'))
18 | 
19 |   def test_invalid_content(self):
20 |     with pytest.raises(NotContentError):
21 |       assert(Word(''))
22 | 
23 |   def test_content(self):
24 |     word = Word('content')
25 | 
26 |     assert(word.content == 'content')
27 | 
28 |   def test_metadata_exists(self):
29 |     word = Word(' ')
30 | 
31 |     assert(word.metadata == dict())
32 | 
33 | 
34 | class TestValidWordPlural():
35 |   def test_esperanto_words(self):
36 |     for word in ['kaj', 'ajn']:
37 |       assert(Word(word).plural == False)
38 | 
39 |   def test_plural_without_acusative(self):
40 |     word = Word('domoj')
41 | 
42 |     assert(word.plural == True)
43 | 
44 |   def test_plural_with_acusative(self):
45 |     word = Word('domojn')
46 | 
47 |     assert(word.plural == True)
48 | 
49 | class TestInvalidWordPlural():
50 |   def test_plural_without_acusative(self):
51 |     word = Word('domo')
52 | 
53 |     assert(word.plural == False)
54 | 
55 |   def test_plural_with_acusative(self):
56 |     word = Word('domon')
57 | 
58 |     assert(word.plural == False)
59 | 
60 | class TestPluralWord(Word):
61 |   def has_plural(self):
62 |     return False
63 | 
64 | class TestValidWordDontHasPlural:
65 |   def test_valid_word_plural(self):
66 |     word = TestPluralWord('multe')
67 | 
68 |     assert(word.plural == False)
69 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/numeral.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import re
 3 | 
 4 | from esperanto_analyzer.speech import Numeral
 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
 6 | 
 7 | class NumeralMorphologicalAnalyzer(BaseMorphologicalAnalyzer):
 8 |     # These are the basics numbers names in Esperanto
 9 |     BASIC_NUMBERS_LIST = [
10 |         'nul',    # 0
11 |         'unu',    # 1
12 |         'du',     # 2
13 |         'tri',    # 3
14 |         'kvar',   # 4
15 |         'kvin',   # 5
16 |         'ses',    # 6
17 |         'sep',    # 7
18 |         'ok',     # 8
19 |         'naŭ',    # 9
20 |         'dek'     # 10
21 |     ]
22 | 
23 |     # Shared regexp flags
24 |     RE_FLAGS = re.IGNORECASE|re.UNICODE
25 | 
26 |     # TODO: Should this be dynamic?
27 |     # Basically: `re.compile('(nul|unu|du|tri|kvar|kvin|ses|sep|ok|naŭ|dek)')`
28 |     BASIC_NUMBERS_REGEXP = re.compile('|'.join(BASIC_NUMBERS_LIST), RE_FLAGS)
29 | 
30 |     # TODO: This still matches "unudek", solve it!
31 |     # MATCHES: ["tridek", "okdek", "kvin", "sepcent", "tri miliono"]
32 |     OTHERS_NUMBERS_REGEXP = re.compile('(unu|du|tri|kvar|kvin|ses|sep|ok|naŭ|dek)?(dek|cent|milionoj|miliono|miliardoj|miliardo|bilionoj|biliono|mil)', RE_FLAGS)
33 | 
34 |     # MATCHES: ["1", "100", "-123", "9009809809", "-90123283232"]
35 |     NUMBERS_DIGIT_REGEXP = re.compile('-?\d+', re.UNICODE)
36 | 
37 |     # Join regexps to create the final pattern utilized for this analyzer
38 |     FINAL_REGEXP = '^(%s|%s|%s)$' % (NUMBERS_DIGIT_REGEXP.pattern, BASIC_NUMBERS_REGEXP.pattern, OTHERS_NUMBERS_REGEXP.pattern)
39 | 
40 |     # The final regexp utilized internally in `match()`
41 |     MATCH_REGEXP = re.compile(FINAL_REGEXP, RE_FLAGS)
42 | 
43 |     @staticmethod
44 |     def word_class():
45 |         return Numeral
46 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/adverb.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import re
 3 | 
 4 | from esperanto_analyzer.speech import Adverb
 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
 6 | 
 7 | class AdverbMorphologicalAnalyzer(BaseMorphologicalAnalyzer):
 8 |     #  MATCHES: ["multe", "flanke", "rapide"]
 9 |     BASE_MATCH_REGEXP = re.compile('([a-zA-Zĉĝĵĥŝŭ]{2,}(e))', re.IGNORECASE|re.UNICODE)
10 | 
11 |     # Some specials Esperanto adverbs and time related adverbs(now, yesterday, etc) list
12 |     # @see https://www.wikiwand.com/en/Special_Esperanto_adverbs
13 |     # @see: http://mylanguages.org/esperanto_adverbs.php
14 |     SPECIAL_ADVERBS = [
15 |         'almenaŭ',
16 |         'ambaŭ',
17 |         'antaŭ',
18 |         'ankaŭ',
19 |         'ankoraŭ',
20 |         'apenaŭ',
21 |         'baldaŭ',
22 |         'ĉirkaŭ',
23 |         'hieraŭ',
24 |         'hodiaŭ',
25 |         'kvazaŭ',
26 |         'morgaŭ',
27 |         'preskaŭ',
28 |         'nun',
29 |         'tiam',
30 |         'ĉiam',
31 |         'neniam',
32 |         'tuj',
33 |         'jam',
34 |         'tie',
35 |         'tien',
36 |         'ĉie',
37 |         'nenie',
38 |         'for',
39 |         'eksteren',
40 |         'tre',
41 |     ]
42 | 
43 |     # Create one regexp joining all the special adverbs
44 |     SPECIAL_ADVERBS_MATCH_REGEXP = re.compile('|'.join(SPECIAL_ADVERBS), re.IGNORECASE|re.UNICODE)
45 | 
46 |     # Creates one string representation of the final `MATCH_REGEXP` joining two regexps
47 |     FINAL_REGEXP = '^(%s|%s)$' % (BASE_MATCH_REGEXP.pattern, SPECIAL_ADVERBS_MATCH_REGEXP.pattern)
48 | 
49 |     # Finally create the FINAL regexp joining all the regexp need to match Adverbs
50 |     MATCH_REGEXP = re.compile(FINAL_REGEXP, re.IGNORECASE|re.UNICODE)
51 | 
52 |     @staticmethod
53 |     def word_class():
54 |         return Adverb
55 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring,no-self-use,invalid-name
 2 | 
 3 | # Learn more: https://github.com/kennethreitz/setup.py
 4 | 
 5 | from setuptools import setup, find_packages
 6 | 
 7 | 
 8 | with open('README.md') as f:
 9 |     readme = f.read()
10 | 
11 | with open('LICENSE') as f:
12 |     lib_license = f.read()
13 | 
14 | with open('requirements.txt') as fd:
15 |     requirements = [line.rstrip() for line in fd]
16 | 
17 | with open('test_requirements.txt') as fd:
18 |     test_requirements = [line.rstrip() for line in fd]
19 | 
20 | setup(
21 |     name='esperanto-analyzer',
22 |     version='0.0.3',
23 |     description='Morphological and syntactic analysis of Esperanto sentences.',
24 |     long_description=readme,
25 |     author='Rafael Fidelis',
26 |     author_email='rafaelfid3lis@gmail.com',
27 |     url='https://github.com/fidelisrafael/esperanto-analyzer',
28 |     license=lib_license,
29 |     packages=find_packages(exclude=('tests', 'docs')),
30 |     install_requires=requirements,
31 |     tests_require=test_requirements,
32 |     classifiers=[
33 |         'Programming Language :: Python :: 3.7',
34 |         "Programming Language :: Python :: 3",
35 |         'Intended Audience :: Developers',
36 |         'Intended Audience :: Education',
37 |         'Intended Audience :: Information Technology',
38 |         'Intended Audience :: Science/Research',
39 |         'Topic :: Scientific/Engineering',
40 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
41 |         'Topic :: Scientific/Engineering :: Human Machine Interfaces',
42 |         'Topic :: Scientific/Engineering :: Information Analysis',
43 |         'Topic :: Text Processing',
44 |         'Topic :: Text Processing :: Filters',
45 |         'Topic :: Text Processing :: General',
46 |         'Topic :: Text Processing :: Indexing',
47 |         'Topic :: Text Processing :: Linguistic',
48 |         "License :: OSI Approved :: MIT License",
49 |         "Operating System :: OS Independent",
50 |     ]
51 | )
52 | 


--------------------------------------------------------------------------------
/tests/speech/test_article.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pytest
 4 | 
 5 | from context import esperanto_analyzer
 6 | 
 7 | from esperanto_analyzer.speech import Word, Article
 8 | from esperanto_analyzer.speech import NotContentError, InvalidArticleError
 9 | 
10 | class TestArticleBasic():
11 |   def test_import(self):
12 |     assert(Article)
13 | 
14 |   def test_init(self):
15 |     assert(Article('la') != None)
16 | 
17 |   def test_superclass(self):
18 |     assert(issubclass(Article, Word))
19 | 
20 |   def test_valid_content(self):
21 |     assert(Article('la'))
22 | 
23 |   def test_invalid_content(self):
24 |     with pytest.raises(NotContentError):
25 |       assert(Article(''))
26 | 
27 |   def test_content(self):
28 |     word = Article('la')
29 | 
30 |     assert(word.content == 'la')
31 | 
32 |   def test_metadata_exists(self):
33 |     word = Article('la')
34 | 
35 |     assert(word.metadata == dict())
36 | 
37 | class TestInvalidArticleContent():
38 |   def test_empty_content(self):
39 |     with pytest.raises(NotContentError):
40 |       assert(Article(''))
41 | 
42 |   def test_whitespace_content(self):
43 |     with pytest.raises(InvalidArticleError):
44 |       assert(Article(' '))
45 | 
46 |   def test_invalid_content(self):
47 |     for word in [' ', 'lo', 'en', 'laj']:
48 |       with pytest.raises(InvalidArticleError):
49 |         assert(Article(word))
50 | 
51 | 
52 | class TestArticlePlural():
53 |   def test_has_plural(self):
54 |     assert(Article('la').has_plural())
55 | 
56 |   def test_plural(self):
57 |     word = Article('la')
58 | 
59 |     assert(word.plural == False)
60 | 
61 |   def test_plural_with_plural_context(self):
62 |     for word in ['ĉevaloj', 'ĉevalojn', 'belaj', 'belajn']:
63 |       article = Article('la', context = word)
64 |       assert(article.plural == True)
65 | 
66 |   def test_plural_with_singular_context(self):
67 |     for word in ['ĉevalo', 'ĉevalon', 'bela', 'belan', 'multe', 'tre']:
68 |       article = Article('la', context = word)
69 |       assert(article.plural == False)
70 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/cli/cli.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import sys
 3 | import tableprint
 4 | 
 5 | from esperanto_analyzer import MorphologicalSentenceAnalyzer
 6 | 
 7 | class AnalyzerNotProcessedError(BaseException):
 8 |     pass
 9 | 
10 | class CLI():
11 |     COLORS = {
12 |         'Adjective': 92,    # Light Green
13 |         'Adverb': 32,       # Green
14 |         'Article': 33,      # Yellow
15 |         'Conjunction': 35,  # Magenta
16 |         'Interjection': 95, # Light Magenta
17 |         'Noun': 34,         # Blue
18 |         'Numeral': 93,      # Light Yellow
19 |         'Preposition': 36,  # Cian
20 |         'Pronoun': 96,      # Light Cian
21 |         'Verb': 31,         # Red
22 |         'Undefined': 30     # Black
23 |     }
24 | 
25 |     OUTPUT_TABLE_HEADERS = ['Word', 'Part of Speech']
26 | 
27 |     @staticmethod
28 |     def run(input_sentence=None, output=sys.stdout):
29 |         analyzer = MorphologicalSentenceAnalyzer(input_sentence)
30 |         analyzer.analyze()
31 | 
32 |         CLI.display_output_for_analyzer(analyzer, output=output)
33 | 
34 |     @staticmethod
35 |     def display_output_for_analyzer(analyzer, output=sys.stdout):
36 |         if analyzer.processed is False:
37 |             raise AnalyzerNotProcessedError('Analyzer must be processed before output display. You must call `analyze()` in your instance')
38 | 
39 |         CLI.print_results(analyzer.simple_results(), output=output)
40 | 
41 |     @staticmethod
42 |     def format_table_data(results, colorize=True):
43 |         out_data = []
44 | 
45 |         format_color = lambda string, cname: ('\x1b[%sm%s \x1b[0m') % (CLI.COLORS[cname], string)
46 | 
47 |         for result in results:
48 |             out_data.append([
49 |                 format_color(result[0], result[1]) if colorize else result[0],
50 |                 format_color(result[1], result[1]) if colorize else result[1]
51 |             ])
52 | 
53 |         return out_data
54 | 
55 |     @staticmethod
56 |     def print_results(results, width=15, output=sys.stdout):
57 |         table_data = CLI.format_table_data(results)
58 | 
59 |         return tableprint.table(table_data, CLI.OUTPUT_TABLE_HEADERS, width=width, out=output)
60 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/article.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent one word beloging to grammar class classified as 'Article'
 3 | 
 4 | What's an Article?
 5 | ===
 6 | An article is a word used to modify a noun, which is a person, place, object, or idea.
 7 | Technically, an article is an **adjective**, which is any word that modifies a noun.
 8 | 
 9 | TODO: Should Article inherits from `Adjective`?
10 | """
11 | 
12 | from .word import Word
13 | 
14 | # pylint: disable=too-few-public-methods,missing-docstring
15 | class Article(Word):
16 |     # All the articles existents in Esperanto, just the definite article: 'la'
17 |     # This article is invariable.
18 |     # eg:
19 |     #  "La suno brilas" -> "The sun shines"   [Singular]
20 |     #  "La homoj kuiras" -> "The people cook" [Plural]
21 |     VALID_ARTICLES = ['la']
22 | 
23 |     def has_plural(self):
24 |         """
25 |         Articles are ALWAYS written as: "la" but they can be in plural
26 |         """
27 |         return True
28 | 
29 | 
30 |     def _match_plural(self, context=None):
31 |         """
32 |         Depending of the `context` the meaning of the article "la" can change, eg:
33 | 
34 |         "la suno" =>  # Singular
35 |         "la homoj" => # Plurar
36 | 
37 |         So if this method receives `context`(which is basically a string), it can
38 |         foreseen if the article is in singular or plural, eg:
39 | 
40 |         > Article('la')._match_plural(None)     # => False
41 |         > Article('la')._match_plural('domo')   # => False
42 |         > Article('la')._match_plural('domoj')  # => True
43 |         """
44 |         if context:
45 |             return Word(context).plural
46 | 
47 |         return False
48 | 
49 |     def _validate_content(self, content):
50 |         Word._validate_content(self, content)
51 | 
52 | 
53 |         # Since Esperanto ONLY HAS 1 article("la"), we make sure to validate if the current
54 |         # instance is really representing one valid Esperanto article.
55 |         if not content.lower() in self.VALID_ARTICLES:
56 |             raise InvalidArticleError
57 | 
58 | 
59 | class InvalidArticleError(Exception):
60 |     """
61 |     This Exception is raised when Article is created with invalid content.
62 |     Eg: Article('lo') # raise InvalidArticleError
63 |     """
64 |     pass
65 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/web/api/morphological_endpoint.py:
--------------------------------------------------------------------------------
 1 | from flask import request, Response
 2 | from flask_restful import Resource, Api, marshal_with, fields, abort
 3 | from flask_restful_swagger import swagger
 4 | from .results import MorphologicalAnalyzeResult
 5 | from .errors import SentenceRequiredError
 6 | from .errors import SentenceInvalidError
 7 | 
 8 | from esperanto_analyzer import MorphologicalSentenceAnalyzer
 9 | from esperanto_analyzer.cli import CLI
10 | 
11 | class MorphologicalAnalyzeEndpoint(Resource):
12 |     @swagger.operation(
13 |         responseClass=MorphologicalAnalyzeResult.__name__,
14 |         nickname='analyzes',
15 |         responseMessages=[
16 |             {"code": 400, "message": "Input required"},
17 |             {"code": 500, "message": "JSON format not valid"},
18 |         ],
19 |         parameters=[
20 |             {
21 |                 "name": "sentence",
22 |                 "description": "The esperanto sentence to be analyzed morphologically",
23 |                 "required": True,
24 |                 "allowMultiple": False,
25 |                 "dataType": "string",
26 |                 "paramType": "query"
27 |             },
28 |         ])
29 |     @marshal_with(MorphologicalAnalyzeResult.resource_fields)
30 |     def get(self):
31 |         """Return a MorphologicalAnalyzeResult object"""
32 |         sentence = request.args['sentence']
33 | 
34 |         if not sentence:
35 |             raise SentenceRequiredError()
36 | 
37 |         analyzer = MorphologicalSentenceAnalyzer(sentence=sentence)
38 |         analyzer.analyze()
39 | 
40 |         return self._format_results(analyzer.results())
41 | 
42 |     def options(self):
43 |         response = Response('{}')
44 |         response.headers['Content-Type'] = 'application/json'
45 |         response.headers['Access-Control-Allow-Origin'] = '*'
46 |         response.headers['Access-Control-Allow-Headers'] = '*'
47 |         response.headers['Access-Control-Allow-Method'] = 'POST, GET, OPTIONS'
48 | 
49 |         return response
50 | 
51 |     def _format_results(self, results=None):
52 |         if results is None:
53 |             results = []
54 | 
55 |         data = []
56 | 
57 |         for result in results:
58 |             try:
59 |                 # Get the current 'Part of Speech' name, such as: 'Adverb', 'Noun'
60 |                 pos_name = result[1].result.word.__class__.__name__
61 |             except:
62 |                 pos_name = 'Undefined'
63 | 
64 |             data.append(dict(word=result[0], value=pos_name, extra=dict()))
65 | 
66 |         return data
67 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/speech/word.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class represent the smallest unit with pratical meaning of one language. The function
 3 | of one word is to describe parts of humans thoughts, so its one unit of human language.
 4 | 
 5 | What's an Word?
 6 | ===
 7 | 
 8 | A unit of language, consisting of one or more spoken sounds or their written representation,
 9 | that functions as a principal carrier of meaning.
10 | 
11 | Words are composed of one or more morphemes and are either the smallest units susceptible of
12 | independent use or consist of two or three such units combined under certain linking conditions,
13 | as with the loss of primary accent that distinguishes black·bird· from black· bird·.
14 | 
15 | Words are usually separated by spaces in writing, and are distinguished phonologically,
16 | as by accent, in many languages.
17 | 
18 | Technically one word is one set of "Letters"
19 | """
20 | 
21 | import re
22 | 
23 | # pylint: disable=too-few-public-methods,missing-docstring
24 | class Word:
25 |     # Only words with at least 4 characteres(This exclude words such as "ajn" and "kaj") that
26 |     # finish with "j", or "jn" are in plural.
27 |     PLURAL_DETECT_REGEXP = re.compile('.{2,}([^n]j|jn)$', re.IGNORECASE|re.UNICODE)
28 | 
29 |     def __init__(self, content, context=None):
30 |         self._validate_content(content)
31 | 
32 |         self.content = content
33 |         self.context = context
34 |         self.metadata = dict()
35 |         self.plural = (self._match_plural(context) not in [False, None])
36 | 
37 |     def _match_plural(self, _context=None):
38 |         """
39 |         This method determine if one word is in it's plural form.
40 |         Some context can be send to help to determine if some word is in plural or not.
41 |         """
42 | 
43 |         # Some words dont have plural (such as 'Adverb')
44 |         if not self.has_plural():
45 |             return None
46 | 
47 |         return self.PLURAL_DETECT_REGEXP.match(self.content)
48 | 
49 | 
50 |     def has_plural(self): # pylint: disable=no-self-use
51 |         """
52 |         This method determines if one words has the capibility of being in the plural.
53 |         This method should be override for subclasses(eg: Adverb)
54 |         """
55 |         return True
56 | 
57 |     def _validate_content(self, content): # pylint: disable=no-self-use
58 |         if not content:
59 |             raise NotContentError
60 | 
61 | class NotContentError(Exception):
62 |     """
63 |     This Exception is raised when one Word is created with empty content.
64 |     Eg: Word('') # raise InvalidArticleError
65 |     """
66 |     pass
67 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/morphological_sentence_analyzer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | """
 4 | 
 5 | # pylint: disable=too-few-public-methods,missing-docstring
 6 | 
 7 | import re
 8 | 
 9 | from esperanto_analyzer.analyzers import MorphologicalAnalyzer
10 | 
11 | class MorphologicalSentenceAnalyzer:
12 |     # The same as `string.punctuation`
13 |     SENTENCE_CLEAN_REGEXP = re.compile('[!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~]')
14 | 
15 |     def __init__(self, sentence):
16 |         self.sentence = sentence
17 |         self.sentence_words = self._split_sentence(sentence)
18 |         self.processed = False
19 |         self.internal_results = None
20 | 
21 |     def analyze(self):
22 |         # Avoid running the same thing many times returning the previous cached results
23 |         if self.processed is True:
24 |             return None
25 | 
26 |         # Cache the results
27 |         self.internal_results = self._process_words(self.sentence_words)
28 |         self.processed = True
29 | 
30 |         return True
31 | 
32 |     def analyzes_results(self):
33 |         if not self.processed:
34 |             return None
35 | 
36 |         return [result.results for result in self.internal_results]
37 | 
38 |     def simple_results(self):
39 |         return self._format_simple_results(self.results())
40 | 
41 |     def results(self):
42 |         if not self.processed:
43 |             return None
44 | 
45 |         results = []
46 | 
47 |         for analyze in self.analyzes_results():
48 |             results.append([analyze.raw_word, analyze])
49 | 
50 |         return results
51 | 
52 |     def _split_sentence(self, sentence):
53 |         clean_sentence = self._clean_sentence(sentence)
54 | 
55 |         return clean_sentence.split()
56 | 
57 |     def _clean_sentence(self, sentence):
58 |         return re.sub(self.SENTENCE_CLEAN_REGEXP, '', sentence)
59 | 
60 |     def _process_words(self, words):
61 |         results = []
62 | 
63 |         for word in words:
64 |             analyzer = MorphologicalAnalyzer(word)
65 |             analyzer.analyze()
66 | 
67 |             results.append(analyzer)
68 | 
69 |         return results
70 | 
71 |     def _format_simple_results(self, results):
72 |         out_data = []
73 | 
74 |         for data in results:
75 |             try:
76 |                 # Get the current 'Part of Speech' name, such as: 'Adverb', 'Noun'
77 |                 pos_name = data[1].result.word.__class__.__name__
78 |             except:
79 |                 pos_name = 'Undefined'
80 | 
81 |             out_data.append([
82 |                 data[0],
83 |                 pos_name
84 |             ])
85 | 
86 |         return out_data
87 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | pylint.html
  2 | .pylint.html
  3 | 
  4 | 
  5 | ### Code ###
  6 | # Visual Studio Code - https://code.visualstudio.com/
  7 | .settings/
  8 | .vscode/
  9 | tsconfig.json
 10 | jsconfig.json
 11 | 
 12 | 
 13 | ### macOS ###
 14 | # General
 15 | .DS_Store
 16 | .AppleDouble
 17 | .LSOverride
 18 | 
 19 | # Icon must end with two \r
 20 | Icon
 21 | 
 22 | # Thumbnails
 23 | ._*
 24 | 
 25 | # Files that might appear in the root of a volume
 26 | .DocumentRevisions-V100
 27 | .fseventsd
 28 | .Spotlight-V100
 29 | .TemporaryItems
 30 | .Trashes
 31 | .VolumeIcon.icns
 32 | .com.apple.timemachine.donotpre
 33 | 
 34 | 
 35 | # Created by https://www.gitignore.io/api/osx,macos,python
 36 | 
 37 | ### macOS ###
 38 | # General
 39 | .DS_Store
 40 | .AppleDouble
 41 | .LSOverride
 42 | 
 43 | # Icon must end with two \r
 44 | Icon
 45 | 
 46 | # Thumbnails
 47 | ._*
 48 | 
 49 | # Directories potentially created on remote AFP share
 50 | .AppleDB
 51 | .AppleDesktop
 52 | Network Trash Folder
 53 | Temporary Items
 54 | .apdisk
 55 | 
 56 | 
 57 | # Byte-compiled / optimized / DLL files
 58 | __pycache__/
 59 | *.py[cod]
 60 | *$py.class
 61 | .pytest_cache/
 62 | .lint_results
 63 | # C extensions
 64 | *.so
 65 | coverage/
 66 | # Distribution / packaging
 67 | .Python
 68 | env/
 69 | build/
 70 | develop-eggs/
 71 | dist/
 72 | downloads/
 73 | eggs/
 74 | .eggs/
 75 | lib/
 76 | lib64/
 77 | parts/
 78 | sdist/
 79 | var/
 80 | *.egg-info/
 81 | .installed.cfg
 82 | *.egg
 83 | 
 84 | # PyInstaller
 85 | #  Usually these files are written by a python script from a template
 86 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 87 | *.manifest
 88 | *.spec
 89 | 
 90 | # Installer logs
 91 | pip-log.txt
 92 | pip-delete-this-directory.txt
 93 | 
 94 | # Unit test / coverage reports
 95 | htmlcov/
 96 | .tox/
 97 | .coverage
 98 | .coverage.*
 99 | .cache
100 | nosetests.xml
101 | coverage.xml
102 | *,cover
103 | .hypothesis/
104 | 
105 | # Translations
106 | *.mo
107 | *.pot
108 | 
109 | # Django stuff:
110 | *.log
111 | local_settings.py
112 | 
113 | # Flask stuff:
114 | instance/
115 | .webassets-cache
116 | 
117 | # Scrapy stuff:
118 | .scrapy
119 | 
120 | # Sphinx documentation
121 | docs/_build/
122 | 
123 | # PyBuilder
124 | target/
125 | 
126 | # IPython Notebook
127 | .ipynb_checkpoints
128 | 
129 | # pyenv
130 | .python-version
131 | 
132 | # celery beat schedule file
133 | celerybeat-schedule
134 | 
135 | # dotenv
136 | .env
137 | 
138 | # virtualenv
139 | .venv/
140 | venv/
141 | ENV/
142 | 
143 | # Spyder project settings
144 | .spyderproject
145 | 
146 | # Rope project settings
147 | .ropeproject
148 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/verb.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import re
 3 | 
 4 | from esperanto_analyzer.speech import Verb
 5 | from esperanto_analyzer.analyzers.morphological import AdverbMorphologicalAnalyzer
 6 | from esperanto_analyzer.analyzers.morphological import ConjunctionMorphologicalAnalyzer
 7 | from esperanto_analyzer.analyzers.morphological import NumeralMorphologicalAnalyzer
 8 | from esperanto_analyzer.analyzers.morphological import PrepositionMorphologicalAnalyzer
 9 | from esperanto_analyzer.analyzers.morphological import PronounMorphologicalAnalyzer
10 | 
11 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
12 | 
13 | class VerbMorphologicalAnalyzer(BaseMorphologicalAnalyzer):
14 | 
15 |     # https://en.wiktionary.org/wiki/Appendix:Esperanto_verbs
16 |     # Note: Not completed
17 |     VERBS_ENDINGS = [
18 |         'i',     # Infinitive
19 |         'u',     # Volative/Jussive
20 |         'as',    # Present indicative
21 |         'os',    # Future indicative
22 |         'is',    # Past indicative
23 |         'us',    # Conditional
24 |         'ite',   # Past passive adverbial
25 |         'ate',   # Present passive adverbial
26 |         'ote',   # Future passive adverbial
27 |         'inte',  # Past active adverbial
28 |         'ante',  # Present active adverbial
29 |         'onte',  # Future active adverbial
30 |     ]
31 | 
32 |     # Tenses that receives acusative (n) and plural (j) suffix
33 |     VERBS_ENDINGS_ACUSATIVE_PLURAL = [
34 |         'inta',  # Past active participle,
35 |         'anta',  # Present active participle
36 |         'onta',  # Future active participle
37 |         'into',  # Past active nominal
38 |         'anto',  # Present active nominal
39 |         'onto',  # Future active nominal
40 |         'ita',   # Past passive participle
41 |         'ata',   # Present passive participle
42 |         'ota',   # Future passive participle
43 |         'ito',   # Past passive nominal
44 |         'ato',   # Past passive nominal
45 |         'oto',   # Future passive nominal
46 |     ]
47 | 
48 |     RE_FLAGS = re.IGNORECASE|re.UNICODE
49 | 
50 |     VERBS_ENDINGS_REGEXP = re.compile('|'.join(VERBS_ENDINGS), RE_FLAGS)
51 | 
52 |     VERBS_ENDINGS_ACUSATIVE_PLURAL_REGEXP = re.compile('|'.join(VERBS_ENDINGS_ACUSATIVE_PLURAL), RE_FLAGS)
53 | 
54 |     #  MATCHES: ["ŝatis", "ŝatas", "ŝatu", "ŝatus", "ŝati"] and so on
55 |     MATCH_REGEXP = re.compile('^([a-zA-Zĉĝĵĥŝŭ]{2,}(%s|(%s)(j?n?)?))$' % (VERBS_ENDINGS_REGEXP.pattern, VERBS_ENDINGS_ACUSATIVE_PLURAL_REGEXP.pattern), RE_FLAGS)
56 | 
57 |     @staticmethod
58 |     def word_class():
59 |         return Verb
60 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological_analyzer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This class receives one word as input and process it through all Morphological Analyzers
 3 | """
 4 | 
 5 | # pylint: disable=too-few-public-methods,missing-docstring
 6 | 
 7 | from esperanto_analyzer.analyzers.morphological import AdjectiveMorphologicalAnalyzer
 8 | from esperanto_analyzer.analyzers.morphological import AdverbMorphologicalAnalyzer
 9 | from esperanto_analyzer.analyzers.morphological import ArticleMorphologicalAnalyzer
10 | from esperanto_analyzer.analyzers.morphological import ConjunctionMorphologicalAnalyzer
11 | from esperanto_analyzer.analyzers.morphological import InterjectionMorphologicalAnalyzer
12 | from esperanto_analyzer.analyzers.morphological import NounMorphologicalAnalyzer
13 | from esperanto_analyzer.analyzers.morphological import NumeralMorphologicalAnalyzer
14 | from esperanto_analyzer.analyzers.morphological import PrepositionMorphologicalAnalyzer
15 | from esperanto_analyzer.analyzers.morphological import PronounMorphologicalAnalyzer
16 | from esperanto_analyzer.analyzers.morphological import VerbMorphologicalAnalyzer
17 | 
18 | from esperanto_analyzer.analyzers.morphological import AnalyzeResult
19 | 
20 | 
21 | class MorphologicalAnalyzer:
22 |     # TODO: Reorganize this order for better perfomance
23 |     DEFAULT_ANALYZERS = [
24 |         AdverbMorphologicalAnalyzer,
25 |         ArticleMorphologicalAnalyzer,
26 |         ConjunctionMorphologicalAnalyzer,
27 |         InterjectionMorphologicalAnalyzer,
28 |         NumeralMorphologicalAnalyzer,
29 |         PrepositionMorphologicalAnalyzer,
30 |         PronounMorphologicalAnalyzer,
31 |         AdjectiveMorphologicalAnalyzer,
32 |         NounMorphologicalAnalyzer,
33 |         VerbMorphologicalAnalyzer,
34 |     ]
35 | 
36 |     def __init__(self, raw_word):
37 |         self.raw_word = raw_word
38 |         self.processed = False
39 |         self.results = None
40 | 
41 |     def analyze(self):
42 |         if self.processed:
43 |             return None
44 | 
45 |         analyzer = self.__apply_analyzers(self.raw_word, self.DEFAULT_ANALYZERS)
46 | 
47 |         self.results = self.__finish_result(result=analyzer, raw_word=self.raw_word)
48 |         self.processed = True
49 | 
50 |         return True
51 | 
52 |     def __finish_result(self, result, raw_word):
53 |         return AnalyzeResult(result=result, raw_word=raw_word)
54 | 
55 |     def __apply_analyzers(self, word, analyzers=None):
56 |         if analyzers is None or len(analyzers) is 0:
57 |             return None
58 | 
59 |         for analyzer in analyzers:
60 |             analyzer_instance = analyzer(word)
61 | 
62 |             if analyzer_instance.analyze() is True:
63 |                 return analyzer_instance
64 | 


--------------------------------------------------------------------------------
/esperanto_analyzer/analyzers/morphological/pronoun.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring
 2 | import re
 3 | 
 4 | from esperanto_analyzer.speech import Pronoun
 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
 6 | 
 7 | class PronounMorphologicalAnalyzer(BaseMorphologicalAnalyzer):
 8 | 
 9 |     # These are the personal pronouns and also the base for possessive pronouns
10 |     # Ex:
11 |     #   Personal = "mi" Possesive = "mia"
12 |     #   Personal = "ili" Possesive = "ilia"
13 |     PERSONAL_PRONOUNS_LIST = [
14 |         'mi',
15 |         'vi',
16 |         'li',
17 |         'ŝi',
18 |         'ĝi',
19 |         'ni',
20 |         'oni',
21 |         'ili'
22 |     ]
23 | 
24 |     # Interrogative Pronouns
25 |     # Relative Pronouns
26 |     # https://esperanto.lingolia.com/en/grammar/pronouns/indefinite-pronouns
27 |     OTHERS_PRONOUNS_LIST = [
28 |         'kiu',
29 |         'kio',
30 |         'kies',
31 |         'tiu',
32 |         'ĉi tiu',
33 |         'tia',
34 |     ]
35 | 
36 |     # https://esperanto.lingolia.com/en/grammar/pronouns/indefinite-pronouns
37 |     INDEFINITE_PRONOUNS_LIST = [
38 |         'nenio',
39 |         'neniu',
40 |         'ĉio',
41 |         'ĉiu',
42 |         'io',
43 |         'iu',
44 |         'io ajn',
45 |         'iu ajn',
46 |         'io ajn',
47 |         'ĉio ajn',
48 |         'iu ajn',
49 |         'ĉiu ajn'
50 |     ]
51 | 
52 |     # Shared flags
53 |     RE_FLAGS = re.IGNORECASE|re.UNICODE
54 | 
55 |     # /mi|vi|li|ŝi|gi|(...)/
56 |     PERSONAL_PRONOUNS_LIST_REGEXP = re.compile('|'.join(PERSONAL_PRONOUNS_LIST), RE_FLAGS)
57 | 
58 |     # /kiu|kio|kies|tiu|ĉi tiu|tia/
59 |     OTHERS_PRONOUNS_LIST_REGEXP = re.compile('|'.join(OTHERS_PRONOUNS_LIST), RE_FLAGS)
60 | 
61 |     # /nenio|neniu|ĉio|ĉiu|io|iu|io ajn|iu ajn|io ajn|ĉio ajn|iu ajn|ĉiu ajn/
62 |     INDEFINITE_PRONOUNS_LIST_REGEXP = re.compile('|'.join(INDEFINITE_PRONOUNS_LIST), RE_FLAGS)
63 | 
64 |     # ["mia", "via", "lia", (...)]
65 |     PERSONAL_POSSESSIVE_PRONOUNS_LIST = [pronoun + "a" for pronoun in PERSONAL_PRONOUNS_LIST]
66 | 
67 |     # /mia|via|lia|ŝia|gia|(...)/
68 |     PERSONAL_POSSESSIVE_PRONOUNS_LIST_REGEXP = re.compile('|'.join(PERSONAL_POSSESSIVE_PRONOUNS_LIST), RE_FLAGS)
69 | 
70 |     # /mi|vi|li|ŝi|gi|(...)|mia|via|lia|ŝia|gia|(...)/
71 |     ALL_PERSONAL_PRONOUNS_REGEXP = re.compile("(%s|%s|%s|%s)" % (PERSONAL_POSSESSIVE_PRONOUNS_LIST_REGEXP.pattern, PERSONAL_PRONOUNS_LIST_REGEXP.pattern, OTHERS_PRONOUNS_LIST_REGEXP.pattern, INDEFINITE_PRONOUNS_LIST_REGEXP.pattern))
72 | 
73 |     #  MATCHES: ["mi", "via", "viajn", "viaj", "liajn"]
74 |     MATCH_REGEXP = re.compile('(^(%s((j?n?)?))$)' % ALL_PERSONAL_PRONOUNS_REGEXP.pattern, RE_FLAGS)
75 | 
76 |     # breakpoint()
77 | 
78 |     @staticmethod
79 |     def word_class():
80 |         return Pronoun
81 | 


--------------------------------------------------------------------------------
/tests/cli/test_cly.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=missing-docstring,no-self-use
 2 | 
 3 | import pytest
 4 | from io import StringIO
 5 | 
 6 | from context import esperanto_analyzer
 7 | 
 8 | from esperanto_analyzer import MorphologicalSentenceAnalyzer
 9 | from esperanto_analyzer.cli.cli import CLI, AnalyzerNotProcessedError
10 | 
11 | class TestCLIBasic():
12 |     TEST_SENTENCE = 'Mi loĝas en Brazilo'
13 |     EXPECT_OUTPUT_TEST_SENTENCE = '╭─────────────────┬─────────────────╮\n│      Word       │ Part of Speech  │\n├─────────────────┼─────────────────┤\n│             \x1b[96mMi \x1b[0m │        \x1b[96mPronoun \x1b[0m │\n│          \x1b[31mloĝas \x1b[0m │           \x1b[31mVerb \x1b[0m │\n│             \x1b[36men \x1b[0m │    \x1b[36mPreposition \x1b[0m │\n│        \x1b[34mBrazilo \x1b[0m │           \x1b[34mNoun \x1b[0m │\n╰─────────────────┴─────────────────╯\n'
14 | 
15 |     COLORS = {
16 |         'Adjective': 92,    # Light Green
17 |         'Adverb': 32,       # Green
18 |         'Article': 33,      # Yellow
19 |         'Conjunction': 35,  # Magenta
20 |         'Interjection': 95, # Light Magenta
21 |         'Noun': 34,         # Blue
22 |         'Numeral': 93,      # Light Yellow
23 |         'Preposition': 36,  # Cian
24 |         'Pronoun': 96,      # Light Cian
25 |         'Verb': 31,         # Red
26 |         'Undefined': 30     # Black
27 |     }
28 | 
29 |     def test_import(self):
30 |         assert CLI
31 | 
32 |     def test_colors(self):
33 |         assert CLI.COLORS == self.COLORS
34 | 
35 |     def test_output_table_headers(self):
36 |         assert CLI.OUTPUT_TABLE_HEADERS == ['Word', 'Part of Speech']
37 | 
38 |     def test_display_output_for_analyzer_without_executing(self):
39 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
40 | 
41 |         with pytest.raises(AnalyzerNotProcessedError):
42 |             CLI.display_output_for_analyzer(analyzer)
43 | 
44 |     def test_print_results(self):
45 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
46 |         analyzer.analyze()
47 | 
48 |         output = StringIO()
49 | 
50 |         # Execute the method that will write to `output`
51 |         CLI.print_results(analyzer.simple_results(), output=output)
52 | 
53 |         assert output.getvalue() == self.EXPECT_OUTPUT_TEST_SENTENCE
54 | 
55 |     def test_run(self):
56 |         output = StringIO()
57 |         CLI.run(self.TEST_SENTENCE, output)
58 | 
59 |         assert output.getvalue() == self.EXPECT_OUTPUT_TEST_SENTENCE
60 | 
61 |     def test_format_table_data_with_formating(self):
62 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
63 |         analyzer.analyze()
64 | 
65 |         expected = [
66 |             ['\x1b[96mMi \x1b[0m', '\x1b[96mPronoun \x1b[0m'],
67 |             ['\x1b[31mloĝas \x1b[0m', '\x1b[31mVerb \x1b[0m'],
68 |             ['\x1b[36men \x1b[0m', '\x1b[36mPreposition \x1b[0m'],
69 |             ['\x1b[34mBrazilo \x1b[0m', '\x1b[34mNoun \x1b[0m']
70 |         ]
71 | 
72 |         assert CLI.format_table_data(analyzer.simple_results()) == expected
73 | 
74 |     def test_format_table_data_without_formating(self):
75 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
76 |         analyzer.analyze()
77 | 
78 |         expected = [
79 |             ['Mi', 'Pronoun'],
80 |             ['loĝas', 'Verb'],
81 |             ['en', 'Preposition'],
82 |             ['Brazilo', 'Noun']
83 |         ]
84 | 
85 |         assert CLI.format_table_data(analyzer.simple_results(), colorize=False) == expected
86 | 
87 | 


--------------------------------------------------------------------------------
/tests/web/test_web_api_endpoints.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tempfile
  3 | 
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.web.api.server import create_app, run_app
  9 | from esperanto_analyzer.web.api.errors import SentenceRequiredError, SentenceInvalidError
 10 | from esperanto_analyzer.web.api.morphological_endpoint import MorphologicalAnalyzeEndpoint
 11 | from esperanto_analyzer.web.api.results import MorphologicalAnalyzeResult
 12 | 
 13 | class TestWebRoot:
 14 |     def test_get_404(self, client):
 15 |         response = client.get('/')
 16 | 
 17 |         assert(response.status_code == 404)
 18 | 
 19 | class TestSentenceAnalyze:
 20 |     def test_bad_request_without_sentence(self, client):
 21 |         response = client.get('/analyze')
 22 | 
 23 |         assert(response.status_code == 400)
 24 | 
 25 |     def test_ok_with_sentence(self, client):
 26 |         response = client.get('/analyze?sentence=Mia%nomo')
 27 | 
 28 |         assert(response.status_code == 200)
 29 | 
 30 |     def test_response_with_sentence(self, client):
 31 |         response = client.get('/analyze?sentence=Mia%nomo')
 32 | 
 33 |         assert response.get_json() == [{'word': 'Mianomo', 'value': 'Noun'}]
 34 | 
 35 |     def test_response_with_sentence_but_invalid(self, client):
 36 |         response = client.get('/analyze?sentence=```')
 37 | 
 38 |         assert response.get_json() == []
 39 | 
 40 |     def test_response_status_code_with_sentence_but_invalid(self, client):
 41 |         response = client.get('/analyze?sentence=```')
 42 | 
 43 |         assert response.status_code == 200
 44 | 
 45 |     def test_bad_request_without_sentence(self, client):
 46 |         response = client.get('/analyze')
 47 | 
 48 |         assert(response.status_code == 400)
 49 | 
 50 |     def test_exception_with_empty_sentence(self, client):
 51 |         with pytest.raises(SentenceRequiredError):
 52 |             assert client.get('/analyze?sentence=')
 53 | 
 54 |     def test_options_request(self, client):
 55 |         response = client.options('/analyze')
 56 | 
 57 |         assert response.status_code == 200
 58 | 
 59 |     def test_options_response_CORS_origin_header(self, client):
 60 |         response = client.options('/analyze')
 61 | 
 62 |         assert response.headers['Access-Control-Allow-Origin'] == '*'
 63 | 
 64 |     def test_options_response_CORS_headers_header(self, client):
 65 |         response = client.options('/analyze')
 66 |         assert response.headers['Access-Control-Allow-Headers'] == '*'
 67 | 
 68 |     def test_options_response_CORS_origin_header(self, client):
 69 |         response = client.options('/analyze')
 70 | 
 71 |         assert response.headers['Access-Control-Allow-Method'] == 'POST, GET, OPTIONS'
 72 | 
 73 |     def test_unicode_encoded_response(self, client):
 74 |         response = client.get('analyze?sentence=👍👍👍')
 75 | 
 76 |         assert response.get_json() == [{'word': '👍', 'value': 'Undefined'}, {'word': '👍', 'value': 'Undefined'}, {'word': '👍', 'value': 'Undefined'}]
 77 | 
 78 |     def test_unicode_encoded_response(self, client):
 79 |         response = client.get('analyze?sentence=%F0%9F%91%8D%20%F0%9F%91%8E%20%F0%9F%91%8E')
 80 | 
 81 |         assert response.get_json() == [{'word': '👍', 'value': 'Undefined'}, {'word': '👎', 'value': 'Undefined'}, {'word': '👎', 'value': 'Undefined'}]
 82 | 
 83 | class TestMorphologicalAnalyzeEndpoint():
 84 |     def test__format_results_none(self):
 85 |         instance = MorphologicalAnalyzeEndpoint()
 86 | 
 87 |         assert instance._format_results(None) == []
 88 | 
 89 |     def test__format_results_error(self):
 90 |         instance = MorphologicalAnalyzeEndpoint()
 91 |         results = [[dict(), None]]
 92 | 
 93 |         assert instance._format_results(results) == [{'word': {}, 'value': 'Undefined', 'extra': {}}]
 94 | 
 95 | 
 96 | class TestMorphologicalAnalyzeResult():
 97 |     def test_results(self):
 98 |         result = MorphologicalAnalyzeResult(dict(test=1, works=2))
 99 | 
100 |         assert result.results == dict(test=1, works=2)
101 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_verb_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Verb
  9 | from esperanto_analyzer.analyzers.morphological import VerbMorphologicalAnalyzer
 10 | 
 11 | class TestVerbMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'ŝatas'
 13 | 
 14 |     def test_import(self):
 15 |         assert VerbMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp(self):
 51 |         assert VerbMorphologicalAnalyzer.MATCH_REGEXP is not None
 52 | 
 53 |     def test_word_class(self):
 54 |         assert isinstance(VerbMorphologicalAnalyzer.word_class()(self.TEST_WORD), Verb)
 55 | 
 56 | class TestVerbMorphologicalAnalyzerMatchMethod():
 57 |     VALID_WORDS = ['ŝatis', 'ŝatas', 'ŝatu', 'ŝatus', 'ŝati']
 58 |     INVALID_WORDS = ['multe', 'domo', 'hundoj', 'vi', 'bela', 'belajn', 'tio']
 59 | 
 60 |     def test_match(self):
 61 |         for word in self.VALID_WORDS:
 62 |             analyzer = VerbMorphologicalAnalyzer(word)
 63 |             matches = analyzer.match()
 64 | 
 65 |             assert matches is not None
 66 | 
 67 |     def test_match_empty(self):
 68 |         for word in self.INVALID_WORDS:
 69 |             analyzer = VerbMorphologicalAnalyzer(word)
 70 |             matches = analyzer.match()
 71 | 
 72 |             assert matches is None
 73 | 
 74 | 
 75 | class TestVerbMorphologicalAnalyzerAnalyzeMethod():
 76 |     INVALID_WORDS = ['multe', 'domo', 'hundoj', 'vi', 'bela', 'belajn', 'tio'] #, 'kiu']
 77 |     VALID_WORDS = ['ŝatis', 'ŝatas', 'ŝatu', 'ŝatus', 'ŝati', 'amas']
 78 | 
 79 |     def test_invalid_analyze(self):
 80 |         for word in self.INVALID_WORDS:
 81 |             analyzer = VerbMorphologicalAnalyzer(word)
 82 |             result = analyzer.analyze()
 83 | 
 84 |             assert not result
 85 | 
 86 |     def test_invalid_analyze_word(self):
 87 |         for word in self.INVALID_WORDS:
 88 |             analyzer = VerbMorphologicalAnalyzer(word)
 89 |             analyzer.analyze()
 90 | 
 91 |             # if(analyzer.word): breakpoint()
 92 | 
 93 |             assert analyzer.word is None
 94 | 
 95 |     def test_invalid_analyze_match(self):
 96 |         for word in self.INVALID_WORDS:
 97 |             analyzer = VerbMorphologicalAnalyzer(word)
 98 |             analyzer.analyze()
 99 | 
100 |             assert analyzer.matches is None
101 | 
102 |     def test_analyze(self):
103 |         for word in self.VALID_WORDS:
104 |             analyzer = VerbMorphologicalAnalyzer(word)
105 | 
106 |             assert analyzer.analyze()
107 | 
108 |     def test_analyze_word(self):
109 |         for word in self.VALID_WORDS:
110 |             analyzer = VerbMorphologicalAnalyzer(word)
111 |             analyzer.analyze()
112 | 
113 |             assert isinstance(analyzer.word, Verb)
114 |             assert analyzer.word.content == word
115 | 
116 |     def test_analyze_match(self):
117 |         for word in self.VALID_WORDS:
118 |             analyzer = VerbMorphologicalAnalyzer(word)
119 |             analyzer.analyze()
120 | 
121 |             assert analyzer.matches is not None
122 | 
123 |     def test_analyze_return_false(self):
124 |         for word in self.INVALID_WORDS:
125 |             analyzer = VerbMorphologicalAnalyzer(word)
126 | 
127 |             assert analyzer.analyze() is False
128 | 
129 |     def test_analyze_return_true(self):
130 |         for word in self.VALID_WORDS:
131 |             analyzer = VerbMorphologicalAnalyzer(word)
132 | 
133 |             assert analyzer.analyze()
134 | 
135 | 
136 |     def test_analyze_processed(self):
137 |         for word in self.VALID_WORDS:
138 |             analyzer = VerbMorphologicalAnalyzer(word)
139 | 
140 |             assert analyzer.processed is False
141 | 
142 |             analyzer.analyze()
143 | 
144 |             assert analyzer.processed is True
145 | 
146 |     def test_analyze_processed_response(self):
147 |         for word in self.VALID_WORDS:
148 |             analyzer = VerbMorphologicalAnalyzer(word)
149 |             analyzer.analyze()
150 | 
151 |             assert analyzer.analyze() is None
152 |             assert analyzer.analyze() is None
153 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_noun_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Noun
  9 | from esperanto_analyzer.analyzers.morphological import NounMorphologicalAnalyzer
 10 | 
 11 | class TestNounMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'kaj'
 13 | 
 14 |     def test_import(self):
 15 |         assert NounMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = NounMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = NounMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = NounMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = NounMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = NounMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = NounMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp(self):
 51 |         assert NounMorphologicalAnalyzer.MATCH_REGEXP is not None
 52 | 
 53 |     def test_word_class(self):
 54 |         assert isinstance(NounMorphologicalAnalyzer.word_class()(self.TEST_WORD), Noun)
 55 | 
 56 |     def test_regexp_value(self):
 57 |         assert NounMorphologicalAnalyzer.MATCH_REGEXP == re.compile('(^[a-zA-Zĉĝĵĥŝŭ]{2,}(o(j?n?)?)$)', re.IGNORECASE|re.UNICODE)
 58 | 
 59 | class TestNounMorphologicalAnalyzerMatchMethod():
 60 |     VALID_WORDS = [
 61 |         'domo', 'domoj', 'homon', 'homojn', 'ĉambro'
 62 |     ]
 63 | 
 64 |     INVALID_WORDS = ['io', 'lo', 'bela', 'la', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ']
 65 | 
 66 |     def test_match(self):
 67 |         for word in self.VALID_WORDS:
 68 |             analyzer = NounMorphologicalAnalyzer(word)
 69 |             matches = analyzer.match()
 70 | 
 71 |             assert matches is not None
 72 |             assert len(matches.span()) == 2
 73 | 
 74 |     def test_match_empty(self):
 75 |         for word in self.INVALID_WORDS:
 76 |             analyzer = NounMorphologicalAnalyzer(word)
 77 |             matches = analyzer.match()
 78 | 
 79 |             assert matches is None
 80 | 
 81 | class TestNounMorphologicalAnalyzerAnalyzeMethod():
 82 |     VALID_WORDS = [
 83 |         'domo', 'domoj', 'homon', 'homojn', 'ĉambro'
 84 |     ]
 85 | 
 86 |     INVALID_WORDS = ['io', 'lo', 'bela', 'la', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ']
 87 | 
 88 |     def test_invalid_analyze(self):
 89 |         for word in self.INVALID_WORDS:
 90 |             analyzer = NounMorphologicalAnalyzer(word)
 91 |             result = analyzer.analyze()
 92 | 
 93 |             assert not result
 94 | 
 95 |     def test_invalid_analyze_word(self):
 96 |         for word in self.INVALID_WORDS:
 97 |             analyzer = NounMorphologicalAnalyzer(word)
 98 |             analyzer.analyze()
 99 | 
100 |             assert analyzer.word is None
101 | 
102 |     def test_invalid_analyze_match(self):
103 |         for word in self.INVALID_WORDS:
104 |             analyzer = NounMorphologicalAnalyzer(word)
105 |             analyzer.analyze()
106 | 
107 |             assert analyzer.matches is None
108 | 
109 |     def test_analyze(self):
110 |         for word in self.VALID_WORDS:
111 |             analyzer = NounMorphologicalAnalyzer(word)
112 | 
113 |             assert analyzer.analyze()
114 | 
115 |     def test_analyze_word(self):
116 |         for word in self.VALID_WORDS:
117 |             analyzer = NounMorphologicalAnalyzer(word)
118 |             analyzer.analyze()
119 | 
120 |             assert isinstance(analyzer.word, Noun)
121 |             assert analyzer.word.content == word
122 | 
123 |     def test_analyze_match(self):
124 |         for word in self.VALID_WORDS:
125 |             analyzer = NounMorphologicalAnalyzer(word)
126 |             analyzer.analyze()
127 | 
128 |             assert analyzer.matches is not None
129 | 
130 |     def test_analyze_return_false(self):
131 |         for word in self.INVALID_WORDS:
132 |             analyzer = NounMorphologicalAnalyzer(word)
133 | 
134 |             assert analyzer.analyze() is False
135 | 
136 |     def test_analyze_return_true(self):
137 |         for word in self.VALID_WORDS:
138 |             analyzer = NounMorphologicalAnalyzer(word)
139 | 
140 |             assert analyzer.analyze()
141 | 
142 | 
143 |     def test_analyze_processed(self):
144 |         for word in self.VALID_WORDS:
145 |             analyzer = NounMorphologicalAnalyzer(word)
146 | 
147 |             assert analyzer.processed is False
148 | 
149 |             analyzer.analyze()
150 | 
151 |             assert analyzer.processed is True
152 | 
153 |     def test_analyze_processed_response(self):
154 |         for word in self.VALID_WORDS:
155 |             analyzer = NounMorphologicalAnalyzer(word)
156 |             analyzer.analyze()
157 | 
158 |             assert analyzer.analyze() is None
159 |             assert analyzer.analyze() is None
160 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_base_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Word
  9 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer
 10 | 
 11 | class TestBaseMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'komputilo'
 13 | 
 14 |     def test_import(self):
 15 |         assert BaseMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp(self):
 51 |         assert BaseMorphologicalAnalyzer.MATCH_REGEXP is None
 52 | 
 53 |     def test_word_class(self):
 54 |         with pytest.raises(NotImplementedError):
 55 |             BaseMorphologicalAnalyzer.word_class()
 56 | 
 57 | class TestBaseMorphologicalAnalyzerMatchMethod():
 58 |     TEST_WORD = 'komputilo'
 59 | 
 60 |     def test_match(self):
 61 |         analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD)
 62 | 
 63 |         with pytest.raises(AttributeError, match="'NoneType' object has no attribute 'match'"):
 64 |             analyzer.match()
 65 | 
 66 | class TestAnalyzer(BaseMorphologicalAnalyzer):
 67 |     # Only words with MINIMUM 9 letters
 68 |     MATCH_REGEXP = re.compile('.{9,}')
 69 | 
 70 |     @staticmethod
 71 |     def word_class():
 72 |         return Word
 73 | 
 74 | class TestChildMorphologicalAnalyzerMatchMethod():
 75 |     TEST_WORD = 'komputilo'
 76 | 
 77 |     def test_match(self):
 78 |         analyzer = TestAnalyzer(self.TEST_WORD)
 79 |         matches = analyzer.match()
 80 | 
 81 |         assert matches is not None
 82 | 
 83 |     def test_match_empty(self):
 84 |         analyzer = TestAnalyzer('vorto')
 85 |         matches = analyzer.match()
 86 | 
 87 |         assert matches is None
 88 | 
 89 | 
 90 | class TestBaseMorphologicalAnalyzerAnalyzeMethod():
 91 |     TEST_WORD = 'komputilo'
 92 | 
 93 |     def test_analyze(self):
 94 |         analyzer = TestAnalyzer(self.TEST_WORD)
 95 |         result = analyzer.analyze()
 96 | 
 97 |         assert result
 98 |         assert isinstance(analyzer.word, Word)
 99 |         assert analyzer.matches is not None
100 | 
101 |     def test_analyze_word(self):
102 |         analyzer = TestAnalyzer(self.TEST_WORD)
103 |         result = analyzer.analyze()
104 | 
105 |         assert result
106 |         assert isinstance(analyzer.word, Word)
107 |         assert analyzer.raw_word == self.TEST_WORD
108 |         assert analyzer.word.content == self.TEST_WORD
109 |         assert analyzer.raw_word == analyzer.word.content
110 | 
111 |     def test_analyze_word_invalid(self):
112 |         analyzer = TestAnalyzer('io')
113 |         result = analyzer.analyze()
114 | 
115 |         assert result is False
116 |         assert analyzer.word is None
117 |         assert analyzer.raw_word == 'io'
118 | 
119 |     def test_analyze_matches(self):
120 |         analyzer = TestAnalyzer(self.TEST_WORD)
121 |         result = analyzer.analyze()
122 | 
123 |         assert result
124 |         assert isinstance(analyzer.matches, re.Match)
125 |         assert analyzer.matches
126 | 
127 |     def test_analyze_matches_span(self):
128 |         analyzer = TestAnalyzer(self.TEST_WORD)
129 |         result = analyzer.analyze()
130 | 
131 |         assert result
132 |         assert len(analyzer.matches.span()) == 2
133 |         assert analyzer.matches.span() == (0, 9)
134 | 
135 |     def test_analyze_matches_invalid(self):
136 |         analyzer = TestAnalyzer('io')
137 |         result = analyzer.analyze()
138 | 
139 |         assert result is False
140 |         assert analyzer.matches is None
141 | 
142 |     def test_analyze_matches_span_invalid(self):
143 |         analyzer = TestAnalyzer('io')
144 |         result = analyzer.analyze()
145 | 
146 |         assert result is False
147 |         assert analyzer.matches is None
148 |         assert not analyzer.matches
149 | 
150 |     def test_analyze_return_true(self):
151 |         analyzer = TestAnalyzer(self.TEST_WORD)
152 | 
153 |         assert analyzer.analyze()
154 | 
155 |     def test_analyze_return_false(self):
156 |         analyzer = TestAnalyzer('io')
157 | 
158 |         assert analyzer.analyze() is False
159 | 
160 |     def test_analyze_processed(self):
161 |         analyzer = TestAnalyzer(self.TEST_WORD)
162 | 
163 |         assert analyzer.processed is False
164 | 
165 |         analyzer.analyze()
166 | 
167 |         assert analyzer.processed is True
168 | 
169 |     def test_analyze_processed_response(self):
170 |         analyzer = TestAnalyzer(self.TEST_WORD)
171 |         analyzer.analyze()
172 | 
173 |         assert analyzer.analyze() is None
174 |         assert analyzer.analyze() is None
175 | 
176 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "qthelp" (
 97 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
101 | .qhcp project file in %BUILDDIR%/qthelp, like this:
102 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\sample.qhcp
103 | 	echo.To view the help file:
104 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\sample.ghc
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "devhelp" (
109 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "epub" (
117 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "latex" (
125 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "text" (
133 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "man" (
141 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "texinfo" (
149 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
150 | 	if errorlevel 1 exit /b 1
151 | 	echo.
152 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
153 | 	goto end
154 | )
155 | 
156 | if "%1" == "gettext" (
157 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
158 | 	if errorlevel 1 exit /b 1
159 | 	echo.
160 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
161 | 	goto end
162 | )
163 | 
164 | if "%1" == "changes" (
165 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
166 | 	if errorlevel 1 exit /b 1
167 | 	echo.
168 | 	echo.The overview file is in %BUILDDIR%/changes.
169 | 	goto end
170 | )
171 | 
172 | if "%1" == "linkcheck" (
173 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
174 | 	if errorlevel 1 exit /b 1
175 | 	echo.
176 | 	echo.Link check complete; look for any errors in the above output ^
177 | or in %BUILDDIR%/linkcheck/output.txt.
178 | 	goto end
179 | )
180 | 
181 | if "%1" == "doctest" (
182 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
183 | 	if errorlevel 1 exit /b 1
184 | 	echo.
185 | 	echo.Testing of doctests in the sources finished, look at the ^
186 | results in %BUILDDIR%/doctest/output.txt.
187 | 	goto end
188 | )
189 | 
190 | :end
191 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_adjective_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Adjective
  9 | from esperanto_analyzer.analyzers.morphological import AdjectiveMorphologicalAnalyzer
 10 | 
 11 | class TestAdjectiveMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'bela'
 13 | 
 14 |     def test_import(self):
 15 |         assert AdjectiveMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp_value(self):
 51 |         assert AdjectiveMorphologicalAnalyzer.MATCH_REGEXP == re.compile('(^[a-zA-Zĉĝĵĥŝŭ]{2,}(a(j?n?)?)$)', re.IGNORECASE)
 52 | 
 53 |     def test_match_regexp(self):
 54 |         assert AdjectiveMorphologicalAnalyzer.MATCH_REGEXP is not None
 55 | 
 56 |     def test_word_class(self):
 57 |         assert isinstance(AdjectiveMorphologicalAnalyzer.word_class()(self.TEST_WORD), Adjective)
 58 | 
 59 | class TestAdjectiveMorphologicalAnalyzerMatchMethod():
 60 |     VALID_WORDS = ['bela', 'belan', 'belaj', 'belajn']
 61 |     INVALID_WORDS = ['domo', 'la', '?', '!']
 62 | 
 63 |     def test_match(self):
 64 |         for word in self.VALID_WORDS:
 65 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
 66 |             matches = analyzer.match()
 67 | 
 68 |             assert matches is not None
 69 | 
 70 |     def test_match_empty(self):
 71 |         for word in self.INVALID_WORDS:
 72 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
 73 |             matches = analyzer.match()
 74 | 
 75 |             assert matches is None
 76 | 
 77 | class TestAdjectiveMorphologicalAnalyzerAnalyzeMethod():
 78 |     INVALID_WORDS = [
 79 |         'io', 'multe', 'domo', 'hundoj', 'kiu', 'vi',
 80 |         '[', ']', '{', '}', '|', '\\', '(', ')', '=', '+', '*',
 81 |         '&', '^', '%', '$', '#', '@', '`', '~', ';', ':', ',', '.',
 82 |         '<', '>', '/',
 83 |         '.!', '!', 'n!', 'jn!', 'j!',
 84 |         '..!', '..!', '..n!', '..jn!',
 85 |         '..aj!', '..ajn!', '..aj', '..ajn', 'ajn',
 86 |         '.!', '?', 'n?', 'jn?', 'j?',
 87 |         '90a', '000an', '999ajn', '000aj', '__ajn', '__an', '__a',
 88 |         'bel0an', 'bel9ajn', '9belajn', '9bela',
 89 |     ]
 90 | 
 91 |     VALID_WORDS = [
 92 |         'ĝusta', 'bela', 'belan', 'belaj', 'belajn', 'bongusta'
 93 |     ]
 94 | 
 95 |     def test_invalid_analyze(self):
 96 |         for word in self.INVALID_WORDS:
 97 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
 98 |             result = analyzer.analyze()
 99 | 
100 |             assert not result
101 | 
102 |     def test_invalid_analyze_word(self):
103 |         for word in self.INVALID_WORDS:
104 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
105 |             analyzer.analyze()
106 | 
107 |             assert analyzer.word is None
108 | 
109 |     def test_invalid_analyze_match(self):
110 |         for word in self.INVALID_WORDS:
111 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
112 |             analyzer.analyze()
113 | 
114 |             assert analyzer.matches is None
115 | 
116 |     def test_analyze(self):
117 |         for word in self.VALID_WORDS:
118 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
119 | 
120 |             assert analyzer.analyze()
121 | 
122 |     def test_analyze_word(self):
123 |         for word in self.VALID_WORDS:
124 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
125 |             analyzer.analyze()
126 | 
127 |             assert isinstance(analyzer.word, Adjective)
128 |             assert analyzer.word.content == word
129 | 
130 |     def test_analyze_match(self):
131 |         for word in self.VALID_WORDS:
132 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
133 |             analyzer.analyze()
134 | 
135 |             assert analyzer.matches is not None
136 | 
137 |     def test_analyze_return_false(self):
138 |         for word in self.INVALID_WORDS:
139 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
140 | 
141 |             assert analyzer.analyze() is False
142 | 
143 |     def test_analyze_return_true(self):
144 |         for word in self.VALID_WORDS:
145 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
146 | 
147 |             assert analyzer.analyze()
148 | 
149 | 
150 |     def test_analyze_processed(self):
151 |         for word in self.VALID_WORDS:
152 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
153 | 
154 |             assert analyzer.processed is False
155 | 
156 |             analyzer.analyze()
157 | 
158 |             assert analyzer.processed is True
159 | 
160 |     def test_analyze_processed_response(self):
161 |         for word in self.VALID_WORDS:
162 |             analyzer = AdjectiveMorphologicalAnalyzer(word)
163 |             analyzer.analyze()
164 | 
165 |             assert analyzer.analyze() is None
166 |             assert analyzer.analyze() is None
167 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/sample.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/sample.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/sample"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/sample"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_article_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Article
  9 | from esperanto_analyzer.analyzers.morphological import ArticleMorphologicalAnalyzer
 10 | 
 11 | class TestArticleMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'la'
 13 | 
 14 |     def test_import(self):
 15 |         assert ArticleMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp(self):
 51 |         assert ArticleMorphologicalAnalyzer.MATCH_REGEXP is not None
 52 | 
 53 |     def test_word_class(self):
 54 |         assert isinstance(ArticleMorphologicalAnalyzer.word_class()(self.TEST_WORD), Article)
 55 | 
 56 | class TestArticleMorphologicalAnalyzerMatchMethod():
 57 |     VALID_WORDS = ['la']
 58 |     INVALID_WORDS = ['io', 'lo', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', '?', '!']
 59 | 
 60 |     def test_match(self):
 61 |         for word in self.VALID_WORDS:
 62 |             analyzer = ArticleMorphologicalAnalyzer(word)
 63 |             matches = analyzer.match()
 64 | 
 65 |             assert matches is not None
 66 |             assert len(matches.span()) == 2
 67 | 
 68 |     def test_match_empty(self):
 69 |         for word in self.INVALID_WORDS:
 70 |             analyzer = ArticleMorphologicalAnalyzer(word)
 71 |             matches = analyzer.match()
 72 | 
 73 |             assert matches is None
 74 | 
 75 | class TestArticleMorphologicalAnalyzerAnalyzeMethod():
 76 |     VALID_WORDS = ['la']
 77 |     INVALID_WORDS = ['io', 'lo', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', '?', '!']
 78 | 
 79 |     def test_invalid_analyze(self):
 80 |         for word in self.INVALID_WORDS:
 81 |             analyzer = ArticleMorphologicalAnalyzer(word)
 82 |             result = analyzer.analyze()
 83 | 
 84 |             assert not result
 85 | 
 86 |     def test_invalid_analyze_word(self):
 87 |         for word in self.INVALID_WORDS:
 88 |             analyzer = ArticleMorphologicalAnalyzer(word)
 89 |             analyzer.analyze()
 90 | 
 91 |             assert analyzer.word is None
 92 | 
 93 |     def test_invalid_analyze_match(self):
 94 |         for word in self.INVALID_WORDS:
 95 |             analyzer = ArticleMorphologicalAnalyzer(word)
 96 |             analyzer.analyze()
 97 | 
 98 |             assert analyzer.matches is None
 99 | 
100 |     def test_analyze(self):
101 |         for word in self.VALID_WORDS:
102 |             analyzer = ArticleMorphologicalAnalyzer(word)
103 | 
104 |             assert analyzer.analyze()
105 | 
106 |     def test_conjunctions_list(self):
107 |         for word in ArticleMorphologicalAnalyzer.ARTICLES_LIST:
108 |             analyzer = ArticleMorphologicalAnalyzer(word)
109 | 
110 |             assert analyzer.analyze()
111 | 
112 |     def test_analyze_word(self):
113 |         for word in self.VALID_WORDS:
114 |             analyzer = ArticleMorphologicalAnalyzer(word)
115 |             analyzer.analyze()
116 | 
117 |             assert isinstance(analyzer.word, Article)
118 |             assert analyzer.word.content == word
119 | 
120 |     def test_analyze_match(self):
121 |         for word in self.VALID_WORDS:
122 |             analyzer = ArticleMorphologicalAnalyzer(word)
123 |             analyzer.analyze()
124 | 
125 |             assert analyzer.matches is not None
126 | 
127 |     def test_analyze_return_false(self):
128 |         for word in self.INVALID_WORDS:
129 |             analyzer = ArticleMorphologicalAnalyzer(word)
130 | 
131 |             assert analyzer.analyze() is False
132 | 
133 |     def test_analyze_return_true(self):
134 |         for word in self.VALID_WORDS:
135 |             analyzer = ArticleMorphologicalAnalyzer(word)
136 | 
137 |             assert analyzer.analyze()
138 | 
139 | 
140 |     def test_analyze_processed(self):
141 |         for word in self.VALID_WORDS:
142 |             analyzer = ArticleMorphologicalAnalyzer(word)
143 | 
144 |             assert analyzer.processed is False
145 | 
146 |             analyzer.analyze()
147 | 
148 |             assert analyzer.processed is True
149 | 
150 |     def test_analyze_processed_response(self):
151 |         for word in self.VALID_WORDS:
152 |             analyzer = ArticleMorphologicalAnalyzer(word)
153 |             analyzer.analyze()
154 | 
155 |             assert analyzer.analyze() is None
156 |             assert analyzer.analyze() is None
157 | 
158 | class TestArticleMorphologicalAnalyzerConjuctionList:
159 |     def test_conjunctions_not_empty(self):
160 |         assert ArticleMorphologicalAnalyzer.ARTICLES_LIST is not None
161 | 
162 |     def test_conjunctions_not_size(self):
163 |         assert len(ArticleMorphologicalAnalyzer.ARTICLES_LIST) == 1
164 | 
165 |     def test_conjunctions_match_list(self):
166 |         for word in ArticleMorphologicalAnalyzer.ARTICLES_LIST:
167 |             assert ArticleMorphologicalAnalyzer.ARTICLES_MATCH_REGEXP.match(word)
168 | 
169 |     def test_conjunctions_match_final_regexp_list(self):
170 |         for word in ArticleMorphologicalAnalyzer.ARTICLES_LIST:
171 |             assert ArticleMorphologicalAnalyzer.MATCH_REGEXP.match(word)
172 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_conjuction_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Conjunction
  9 | from esperanto_analyzer.analyzers.morphological import ConjunctionMorphologicalAnalyzer
 10 | 
 11 | class TestConjunctionMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'kaj'
 13 | 
 14 |     def test_import(self):
 15 |         assert ConjunctionMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp(self):
 51 |         assert ConjunctionMorphologicalAnalyzer.MATCH_REGEXP is not None
 52 | 
 53 |     def test_word_class(self):
 54 |         assert isinstance(ConjunctionMorphologicalAnalyzer.word_class()(self.TEST_WORD), Conjunction)
 55 | 
 56 | class TestConjunctionMorphologicalAnalyzerMatchMethod():
 57 |     VALID_WORDS = [
 58 |         'ĉar', 'aŭ', 'kaj', 'kiel', 'kiam', 'minus', 'nek', 'sed', 'tial',
 59 | 
 60 |     ]
 61 | 
 62 |     INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', '?', '!']
 63 | 
 64 |     def test_match(self):
 65 |         for word in self.VALID_WORDS:
 66 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
 67 |             matches = analyzer.match()
 68 | 
 69 |             assert matches is not None
 70 |             assert len(matches.span()) == 2
 71 | 
 72 |     def test_match_empty(self):
 73 |         for word in self.INVALID_WORDS:
 74 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
 75 |             matches = analyzer.match()
 76 | 
 77 |             assert matches is None
 78 | 
 79 | class TestConjunctionMorphologicalAnalyzerAnalyzeMethod():
 80 |     VALID_WORDS = [
 81 |         'ĉar', 'aŭ', 'kaj', 'kiel', 'kiam', 'minus', 'nek', 'sed', 'tial'
 82 |     ]
 83 | 
 84 |     INVALID_WORDS = [
 85 |         'io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', '?', '!'
 86 |     ]
 87 | 
 88 |     def test_invalid_analyze(self):
 89 |         for word in self.INVALID_WORDS:
 90 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
 91 |             result = analyzer.analyze()
 92 | 
 93 |             assert not result
 94 | 
 95 |     def test_invalid_analyze_word(self):
 96 |         for word in self.INVALID_WORDS:
 97 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
 98 |             analyzer.analyze()
 99 | 
100 |             assert analyzer.word is None
101 | 
102 |     def test_invalid_analyze_match(self):
103 |         for word in self.INVALID_WORDS:
104 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
105 |             analyzer.analyze()
106 | 
107 |             assert analyzer.matches is None
108 | 
109 |     def test_analyze(self):
110 |         for word in self.VALID_WORDS:
111 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
112 | 
113 |             assert analyzer.analyze()
114 | 
115 |     def test_conjunctions_list(self):
116 |         for word in ConjunctionMorphologicalAnalyzer.CONJUNCTIONS_LIST:
117 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
118 | 
119 |             assert analyzer.analyze()
120 | 
121 |     def test_analyze_word(self):
122 |         for word in self.VALID_WORDS:
123 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
124 |             analyzer.analyze()
125 | 
126 |             assert isinstance(analyzer.word, Conjunction)
127 |             assert analyzer.word.content == word
128 | 
129 |     def test_analyze_match(self):
130 |         for word in self.VALID_WORDS:
131 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
132 |             analyzer.analyze()
133 | 
134 |             assert analyzer.matches is not None
135 | 
136 |     def test_analyze_return_false(self):
137 |         for word in self.INVALID_WORDS:
138 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
139 | 
140 |             assert analyzer.analyze() is False
141 | 
142 |     def test_analyze_return_true(self):
143 |         for word in self.VALID_WORDS:
144 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
145 | 
146 |             assert analyzer.analyze()
147 | 
148 | 
149 |     def test_analyze_processed(self):
150 |         for word in self.VALID_WORDS:
151 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
152 | 
153 |             assert analyzer.processed is False
154 | 
155 |             analyzer.analyze()
156 | 
157 |             assert analyzer.processed is True
158 | 
159 |     def test_analyze_processed_response(self):
160 |         for word in self.VALID_WORDS:
161 |             analyzer = ConjunctionMorphologicalAnalyzer(word)
162 |             analyzer.analyze()
163 | 
164 |             assert analyzer.analyze() is None
165 |             assert analyzer.analyze() is None
166 | 
167 | class TestConjunctionMorphologicalAnalyzerConjuctionList:
168 |     def test_conjunctions_not_empty(self):
169 |         assert ConjunctionMorphologicalAnalyzer.CONJUNCTIONS_LIST is not None
170 | 
171 |     def test_conjunctions_not_size(self):
172 |         assert len(ConjunctionMorphologicalAnalyzer.CONJUNCTIONS_LIST) == 25
173 | 
174 |     def test_conjunctions_match_list(self):
175 |         for word in ConjunctionMorphologicalAnalyzer.CONJUNCTIONS_LIST:
176 |             assert ConjunctionMorphologicalAnalyzer.CONJUCTIONS_MATCH_REGEXP.match(word)
177 | 
178 |     def test_conjunctions_match_final_regexp_list(self):
179 |         for word in ConjunctionMorphologicalAnalyzer.CONJUNCTIONS_LIST:
180 |             assert ConjunctionMorphologicalAnalyzer.MATCH_REGEXP.match(word)
181 | 


--------------------------------------------------------------------------------
/tests/test_morphological_sentence_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Word
  9 | from esperanto_analyzer.speech import Adverb
 10 | from esperanto_analyzer.speech import Adjective
 11 | from esperanto_analyzer.speech import Article, InvalidArticleError
 12 | from esperanto_analyzer.speech import Conjunction
 13 | from esperanto_analyzer.speech import Interjection
 14 | from esperanto_analyzer.speech import Noun
 15 | from esperanto_analyzer.speech import Numeral
 16 | from esperanto_analyzer.speech import Preposition
 17 | from esperanto_analyzer.speech import Pronoun
 18 | from esperanto_analyzer.speech import Verb
 19 | 
 20 | from esperanto_analyzer import MorphologicalSentenceAnalyzer
 21 | 
 22 | 
 23 | class TestMorphologicalSentenceAnalyzerBasic():
 24 |     TEST_SENTENCE = 'Mi loĝas en Brazilo'
 25 | 
 26 |     def test_import(self):
 27 |         assert MorphologicalSentenceAnalyzer
 28 | 
 29 |     def test_initialize(self):
 30 |         assert MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 31 | 
 32 |     def test_initialize_sentence(self):
 33 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 34 | 
 35 |         assert analyzer.sentence is self.TEST_SENTENCE
 36 | 
 37 |     def test_initialize_sentence_words(self):
 38 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 39 | 
 40 |         assert analyzer.sentence_words == ['Mi', 'loĝas', 'en', 'Brazilo']
 41 | 
 42 |     def test_initialize_results(self):
 43 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 44 | 
 45 |         assert analyzer.results() is None
 46 | 
 47 |     def test_initialize_processed(self):
 48 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 49 | 
 50 |         assert analyzer.processed is False
 51 | 
 52 |     def test_analyze(self):
 53 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 54 | 
 55 |         assert analyzer.analyze()
 56 | 
 57 |     def test_analyze_results(self):
 58 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 59 | 
 60 |         assert analyzer.analyze()
 61 |         assert analyzer.results() is not None
 62 | 
 63 |     def test_analyze_results_size(self):
 64 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 65 | 
 66 |         assert analyzer.analyze()
 67 |         assert len(analyzer.results()) == 4
 68 |         assert len(analyzer.results()[1]) == 2
 69 | 
 70 |     def test_analyze_processed(self):
 71 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 72 | 
 73 |         assert analyzer.processed is False
 74 |         assert analyzer.analyze()
 75 |         assert analyzer.processed is True
 76 | 
 77 |     def test_analyze_processed_multiples_times(self):
 78 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 79 | 
 80 |         assert analyzer.processed is False
 81 |         assert analyzer.analyze() # First analyze
 82 |         assert analyzer.processed is True
 83 |         assert analyzer.analyze() is None
 84 |         assert analyzer.analyze() is None
 85 | 
 86 |     def test_analyze_internal_results_class(self):
 87 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 88 |         analyzer.analyze()
 89 | 
 90 |         classes_names = [an.__class__.__name__ for an in analyzer.internal_results]
 91 | 
 92 |         assert classes_names == ['MorphologicalAnalyzer', 'MorphologicalAnalyzer', 'MorphologicalAnalyzer', 'MorphologicalAnalyzer']
 93 | 
 94 |     def test_analyzes_results_not_processed(self):
 95 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
 96 | 
 97 |         assert analyzer.analyzes_results() is None
 98 | 
 99 |     def test_analyzes_internals_results_processed(self):
100 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
101 |         analyzer.analyze()
102 | 
103 |         assert analyzer.analyzes_results() == [result.results for result in analyzer.internal_results]
104 | 
105 |     def test_analyzes_results_class(self):
106 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
107 |         analyzer.analyze()
108 | 
109 |         classes_names = [analyze.__class__.__name__ for analyze in analyzer.analyzes_results()]
110 | 
111 |         assert classes_names == ['AnalyzeResult', 'AnalyzeResult', 'AnalyzeResult', 'AnalyzeResult']
112 | 
113 |     def test_analyzes_results_class_result(self):
114 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
115 |         analyzer.analyze()
116 | 
117 |         result_classes = [analyze.result.__class__.__name__ for analyze in analyzer.analyzes_results()]
118 | 
119 |         assert result_classes == ['PronounMorphologicalAnalyzer', 'VerbMorphologicalAnalyzer', 'PrepositionMorphologicalAnalyzer', 'NounMorphologicalAnalyzer']
120 | 
121 |     def test_analyzes_results_word_classnames(self):
122 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
123 |         analyzer.analyze()
124 | 
125 |         result_classes = [analyze.result.word.__class__.__name__ for analyze in analyzer.analyzes_results()]
126 | 
127 |         assert result_classes == ['Pronoun', 'Verb', 'Preposition', 'Noun']
128 | 
129 |     def test_analyzes_results_raw_word(self):
130 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
131 |         analyzer.analyze()
132 | 
133 |         words = [analyze.result.raw_word for analyze in analyzer.analyzes_results()]
134 | 
135 |         assert words == ['Mi', 'loĝas', 'en', 'Brazilo']
136 | 
137 |     def test_analyzes_results_processed(self):
138 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
139 |         analyzer.analyze()
140 | 
141 |         processed_status = [an.result.processed for an in analyzer.analyzes_results()]
142 | 
143 |         assert processed_status == [True, True, True, True]
144 | 
145 |     def test_analyzes_results_word_class(self):
146 |         analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE)
147 |         analyzer.analyze()
148 | 
149 |         words_classes = [an.result.word_class() for an in analyzer.analyzes_results()]
150 | 
151 |         assert words_classes == [Pronoun, Verb, Preposition, Noun]
152 | 
153 | 
154 |     def test_sentence_clean_regexp(self):
155 |         sentence = '(Mia) [nomo] estas, Esperanto. Hodiau estas la jaro 2018. jes'
156 |         new_sentence = re.sub(MorphologicalSentenceAnalyzer.SENTENCE_CLEAN_REGEXP, '', sentence)
157 | 
158 |         assert new_sentence == 'Mia nomo estas Esperanto Hodiau estas la jaro 2018 jes'
159 | 
160 |     def test_undefined_token(self):
161 |         analyzer = MorphologicalSentenceAnalyzer('Mia asdiosdsds')
162 |         analyzer.analyze()
163 | 
164 |         assert analyzer.simple_results() == [['Mia', 'Pronoun'], ['asdiosdsds', 'Undefined']]
165 |         assert analyzer.simple_results()[1][1] == 'Undefined'
166 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_adverb_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Adverb
  9 | from esperanto_analyzer.analyzers.morphological import AdverbMorphologicalAnalyzer
 10 | 
 11 | class TestAdverbMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'bonege'
 13 | 
 14 |     def test_import(self):
 15 |         assert AdverbMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp(self):
 51 |         assert AdverbMorphologicalAnalyzer.MATCH_REGEXP is not None
 52 | 
 53 |     def test_word_class(self):
 54 |         assert isinstance(AdverbMorphologicalAnalyzer.word_class()(self.TEST_WORD), Adverb)
 55 | 
 56 | class TestAdverbMorphologicalAnalyzerMatchMethod():
 57 |     VALID_WORDS = [
 58 |         'multe', 'bone', 'rapide', 'almenaŭ', 'ankoraŭ', 'ĝuste'
 59 |     ]
 60 | 
 61 |     INVALID_WORDS = [
 62 |         'io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', '?', '!',
 63 |         '[', ']', '{', '}', '|', '\\', '(', ')', '=', '+', '*',
 64 |         '&', '^', '%', '$', '#', '@', '`', '~', ';', ':', ',', '.',
 65 |         '<', '>', '/',
 66 |         '.!', '!', 'n!', 'jn!', 'j!',
 67 |         '..!', '..!', '..n!', '..jn!',
 68 |         '..ej!', '..ejn!', '..ej', '..ejn', 'ejn',
 69 |         '.!', '?', 'n?', 'jn?', 'j?',
 70 |         '90e', '000en', '999ejn', '000ej', '__ejn', '__en', '__e',
 71 |         'bel0en', 'bel9ejn', '9belejn', '9bele', 'almen9ŭ', '.lmenaŭ',
 72 |     ]
 73 | 
 74 |     def test_match(self):
 75 |         for word in self.VALID_WORDS:
 76 |             analyzer = AdverbMorphologicalAnalyzer(word)
 77 |             matches = analyzer.match()
 78 | 
 79 |             assert matches is not None
 80 |             assert len(matches.span()) == 2
 81 | 
 82 |     def test_match_empty(self):
 83 |         for word in self.INVALID_WORDS:
 84 |             analyzer = AdverbMorphologicalAnalyzer(word)
 85 |             matches = analyzer.match()
 86 | 
 87 |             assert matches is None
 88 | 
 89 |     def test_match_regexp_value(self):
 90 |         assert AdverbMorphologicalAnalyzer.MATCH_REGEXP == re.compile('^(([a-zA-Zĉĝĵĥŝŭ]{2,}(e))|almenaŭ|ambaŭ|antaŭ|ankaŭ|ankoraŭ|apenaŭ|baldaŭ|ĉirkaŭ|hieraŭ|hodiaŭ|kvazaŭ|morgaŭ|preskaŭ|nun|tiam|ĉiam|neniam|tuj|jam|tie|tien|ĉie|nenie|for|eksteren|tre)$', re.IGNORECASE)
 91 | 
 92 | class TestAdverbMorphologicalAnalyzerAnalyzeMethod():
 93 |     INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi']
 94 | 
 95 |     VALID_WORDS = [
 96 |         'multe', 'bone', 'rapide', 'almenaŭ', 'ankoraŭ', 'ĝuste'
 97 |     ]
 98 | 
 99 |     def test_invalid_analyze(self):
100 |         for word in self.INVALID_WORDS:
101 |             analyzer = AdverbMorphologicalAnalyzer(word)
102 |             result = analyzer.analyze()
103 | 
104 |             assert not result
105 | 
106 |     def test_invalid_analyze_word(self):
107 |         for word in self.INVALID_WORDS:
108 |             analyzer = AdverbMorphologicalAnalyzer(word)
109 |             analyzer.analyze()
110 | 
111 |             assert analyzer.word is None
112 | 
113 |     def test_invalid_analyze_match(self):
114 |         for word in self.INVALID_WORDS:
115 |             analyzer = AdverbMorphologicalAnalyzer(word)
116 |             analyzer.analyze()
117 | 
118 |             assert analyzer.matches is None
119 | 
120 |     def test_analyze(self):
121 |         for word in self.VALID_WORDS:
122 |             analyzer = AdverbMorphologicalAnalyzer(word)
123 | 
124 |             assert analyzer.analyze()
125 | 
126 |     def test_adverbs_list(self):
127 |         for word in AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS:
128 |             analyzer = AdverbMorphologicalAnalyzer(word)
129 | 
130 |             assert analyzer.analyze()
131 | 
132 |     def test_analyze_word(self):
133 |         for word in self.VALID_WORDS:
134 |             analyzer = AdverbMorphologicalAnalyzer(word)
135 |             analyzer.analyze()
136 | 
137 |             assert isinstance(analyzer.word, Adverb)
138 |             assert analyzer.word.content == word
139 | 
140 |     def test_analyze_match(self):
141 |         for word in self.VALID_WORDS:
142 |             analyzer = AdverbMorphologicalAnalyzer(word)
143 |             analyzer.analyze()
144 | 
145 |             assert analyzer.matches is not None
146 | 
147 |     def test_analyze_return_false(self):
148 |         for word in self.INVALID_WORDS:
149 |             analyzer = AdverbMorphologicalAnalyzer(word)
150 | 
151 |             assert analyzer.analyze() is False
152 | 
153 |     def test_analyze_return_true(self):
154 |         for word in self.VALID_WORDS:
155 |             analyzer = AdverbMorphologicalAnalyzer(word)
156 | 
157 |             assert analyzer.analyze()
158 | 
159 | 
160 |     def test_analyze_processed(self):
161 |         for word in self.VALID_WORDS:
162 |             analyzer = AdverbMorphologicalAnalyzer(word)
163 | 
164 |             assert analyzer.processed is False
165 | 
166 |             analyzer.analyze()
167 | 
168 |             assert analyzer.processed is True
169 | 
170 |     def test_analyze_processed_response(self):
171 |         for word in self.VALID_WORDS:
172 |             analyzer = AdverbMorphologicalAnalyzer(word)
173 |             analyzer.analyze()
174 | 
175 |             assert analyzer.analyze() is None
176 |             assert analyzer.analyze() is None
177 | 
178 | class TestAdverbMorphologicalAnalyzerAdversList:
179 |     def test_adverbs_not_empty(self):
180 |         assert AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS is not None
181 | 
182 |     def test_adverbs_not_size(self):
183 |         assert len(AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS) == 26
184 | 
185 |     def test_adverbs_match_list(self):
186 |         for word in AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS:
187 |             assert AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS_MATCH_REGEXP.match(word)
188 | 
189 |     def test_adverbs_match_final_regexp_list(self):
190 |         for word in AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS:
191 |             assert AdverbMorphologicalAnalyzer.MATCH_REGEXP.match(word)
192 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_interjection_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Interjection
  9 | from esperanto_analyzer.analyzers.morphological import InterjectionMorphologicalAnalyzer
 10 | 
 11 | class TestInterjectionMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'volapukaĵo!'
 13 | 
 14 |     def test_import(self):
 15 |         assert InterjectionMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp(self):
 51 |         assert InterjectionMorphologicalAnalyzer.MATCH_REGEXP is not None
 52 | 
 53 |     def test_word_class(self):
 54 |         assert isinstance(InterjectionMorphologicalAnalyzer.word_class()(self.TEST_WORD), Interjection)
 55 | 
 56 | class TestInterjectionMorphologicalAnalyzerMatchMethod():
 57 |     VALID_WORDS = ['Aĥ!', 'Aj!', 'Ba!', 'Baf!', 'Baj!', 'Be!', 'Bis!', 'Diable!', 'Ek!',
 58 |                    'Fi!', 'Fu!', 'Ĝis!', 'Ha!', 'Ha lo!', 'He!', 'Hej!', 'Ho!', 'Ho ve!',
 59 |                    'Hoj!', 'Hola!', 'Hu!', 'Hup!', 'Hura!', 'Lo!', 'Lu lu!', 'Nu!', 'Uf!',
 60 |                    'Up!', 'Ŭa!', 'Ve!', 'Volapukaĵo!', 'Jen'
 61 |                    ]
 62 | 
 63 |     INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'multe', 'ankoraŭ', 'dek',
 64 |                      'du', 'ĉar', 'aŭ', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ']
 65 | 
 66 |     def test_match(self):
 67 |         for word in self.VALID_WORDS:
 68 |             analyzer = InterjectionMorphologicalAnalyzer(word)
 69 |             matches = analyzer.match()
 70 | 
 71 |             assert matches is not None
 72 |             assert len(matches.span()) == 2
 73 | 
 74 |     def test_match_empty(self):
 75 |         for word in self.INVALID_WORDS:
 76 |             analyzer = InterjectionMorphologicalAnalyzer(word)
 77 |             matches = analyzer.match()
 78 | 
 79 |             assert matches is None
 80 | 
 81 | class TestInterjectionMorphologicalAnalyzerAnalyzeMethod():
 82 |     VALID_WORDS = ['Aĥ!', 'Aj!', 'Ba!', 'Baf!', 'Baj!', 'Be!', 'Bis!', 'Diable!', 'Ek!',
 83 |                    'Fi!', 'Fu!', 'Ĝis!', 'Ha!', 'Ha lo!', 'He!', 'Hej!', 'Ho!', 'Ho ve!',
 84 |                    'Hoj!', 'Hola!', 'Hu!', 'Hup!', 'Hura!', 'Lo!', 'Lu lu!', 'Nu!', 'Uf!',
 85 |                    'Up!', 'Ŭa!', 'Ve!', 'Volapukaĵo!', 'Jen'
 86 |                    ]
 87 | 
 88 |     INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'multe', 'ankoraŭ', 'dek',
 89 |                      'du', 'ĉar', 'aŭ', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ']
 90 | 
 91 |     def test_invalid_analyze(self):
 92 |         for word in self.INVALID_WORDS:
 93 |             analyzer = InterjectionMorphologicalAnalyzer(word)
 94 |             result = analyzer.analyze()
 95 | 
 96 |             assert not result
 97 | 
 98 |     def test_invalid_analyze_word(self):
 99 |         for word in self.INVALID_WORDS:
100 |             analyzer = InterjectionMorphologicalAnalyzer(word)
101 |             analyzer.analyze()
102 | 
103 |             assert analyzer.word is None
104 | 
105 |     def test_invalid_analyze_match(self):
106 |         for word in self.INVALID_WORDS:
107 |             analyzer = InterjectionMorphologicalAnalyzer(word)
108 |             analyzer.analyze()
109 | 
110 |             assert analyzer.matches is None
111 | 
112 |     def test_analyze(self):
113 |         for word in self.VALID_WORDS:
114 |             analyzer = InterjectionMorphologicalAnalyzer(word)
115 | 
116 |             assert analyzer.analyze()
117 | 
118 |     def test_prepositions_list(self):
119 |         for word in InterjectionMorphologicalAnalyzer.INTERJECTIONS_LIST:
120 |             analyzer = InterjectionMorphologicalAnalyzer(word)
121 | 
122 |             assert analyzer.analyze()
123 | 
124 |     def test_analyze_word(self):
125 |         for word in self.VALID_WORDS:
126 |             analyzer = InterjectionMorphologicalAnalyzer(word)
127 |             analyzer.analyze()
128 | 
129 |             assert isinstance(analyzer.word, Interjection)
130 |             assert analyzer.word.content == word
131 | 
132 |     def test_analyze_match(self):
133 |         for word in self.VALID_WORDS:
134 |             analyzer = InterjectionMorphologicalAnalyzer(word)
135 |             analyzer.analyze()
136 | 
137 |             assert analyzer.matches is not None
138 | 
139 |     def test_analyze_return_false(self):
140 |         for word in self.INVALID_WORDS:
141 |             analyzer = InterjectionMorphologicalAnalyzer(word)
142 | 
143 |             assert analyzer.analyze() is False
144 | 
145 |     def test_analyze_return_true(self):
146 |         for word in self.VALID_WORDS:
147 |             analyzer = InterjectionMorphologicalAnalyzer(word)
148 | 
149 |             assert analyzer.analyze()
150 | 
151 | 
152 |     def test_analyze_processed(self):
153 |         for word in self.VALID_WORDS:
154 |             analyzer = InterjectionMorphologicalAnalyzer(word)
155 | 
156 |             assert analyzer.processed is False
157 | 
158 |             analyzer.analyze()
159 | 
160 |             assert analyzer.processed is True
161 | 
162 |     def test_analyze_processed_response(self):
163 |         for word in self.VALID_WORDS:
164 |             analyzer = InterjectionMorphologicalAnalyzer(word)
165 |             analyzer.analyze()
166 | 
167 |             assert analyzer.analyze() is None
168 |             assert analyzer.analyze() is None
169 | 
170 | class TestInterjectionMorphologicalAnalyzerPrepositionsList:
171 |     def test_prepositions_not_empty(self):
172 |         assert InterjectionMorphologicalAnalyzer.INTERJECTIONS_LIST is not None
173 | 
174 |     def test_prepositions_not_size(self):
175 |         assert len(InterjectionMorphologicalAnalyzer.INTERJECTIONS_LIST) == 32
176 | 
177 |     def test_prepositions_match_list(self):
178 |         for word in InterjectionMorphologicalAnalyzer.INTERJECTIONS_LIST:
179 |             assert InterjectionMorphologicalAnalyzer.INTERJECTIONS_MATCH_REGEXP.match(word)
180 | 
181 |     def test_prepositions_match_final_regexp_list(self):
182 |         for word in InterjectionMorphologicalAnalyzer.INTERJECTIONS_LIST:
183 |             assert InterjectionMorphologicalAnalyzer.MATCH_REGEXP.match(word)
184 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # sample documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Apr 16 21:22:43 2012.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #sys.path.insert(0, os.path.abspath('.'))
 20 | 
 21 | # -- General configuration -----------------------------------------------------
 22 | 
 23 | # If your documentation needs a minimal Sphinx version, state it here.
 24 | #needs_sphinx = '1.0'
 25 | 
 26 | # Add any Sphinx extension module names here, as strings. They can be extensions
 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 28 | extensions = []
 29 | 
 30 | # Add any paths that contain templates here, relative to this directory.
 31 | templates_path = ['_templates']
 32 | 
 33 | # The suffix of source filenames.
 34 | source_suffix = '.rst'
 35 | 
 36 | # The encoding of source files.
 37 | #source_encoding = 'utf-8-sig'
 38 | 
 39 | # The master toctree document.
 40 | master_doc = 'index'
 41 | 
 42 | # General information about the project.
 43 | project = u'sample'
 44 | copyright = u'2012, Kenneth Reitz'
 45 | 
 46 | # The version info for the project you're documenting, acts as replacement for
 47 | # |version| and |release|, also used in various other places throughout the
 48 | # built documents.
 49 | #
 50 | # The short X.Y version.
 51 | version = 'v0.0.1'
 52 | # The full version, including alpha/beta/rc tags.
 53 | release = 'v0.0.1'
 54 | 
 55 | # The language for content autogenerated by Sphinx. Refer to documentation
 56 | # for a list of supported languages.
 57 | #language = None
 58 | 
 59 | # There are two options for replacing |today|: either, you set today to some
 60 | # non-false value, then it is used:
 61 | #today = ''
 62 | # Else, today_fmt is used as the format for a strftime call.
 63 | #today_fmt = '%B %d, %Y'
 64 | 
 65 | # List of patterns, relative to source directory, that match files and
 66 | # directories to ignore when looking for source files.
 67 | exclude_patterns = ['_build']
 68 | 
 69 | # The reST default role (used for this markup: `text`) to use for all documents.
 70 | #default_role = None
 71 | 
 72 | # If true, '()' will be appended to :func: etc. cross-reference text.
 73 | #add_function_parentheses = True
 74 | 
 75 | # If true, the current module name will be prepended to all description
 76 | # unit titles (such as .. function::).
 77 | #add_module_names = True
 78 | 
 79 | # If true, sectionauthor and moduleauthor directives will be shown in the
 80 | # output. They are ignored by default.
 81 | #show_authors = False
 82 | 
 83 | # The name of the Pygments (syntax highlighting) style to use.
 84 | pygments_style = 'sphinx'
 85 | 
 86 | # A list of ignored prefixes for module index sorting.
 87 | #modindex_common_prefix = []
 88 | 
 89 | 
 90 | # -- Options for HTML output ---------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | html_theme = 'default'
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #html_theme_options = {}
100 | 
101 | # Add any paths that contain custom themes here, relative to this directory.
102 | #html_theme_path = []
103 | 
104 | # The name for this set of Sphinx documents.  If None, it defaults to
105 | # "<project> v<release> documentation".
106 | #html_title = None
107 | 
108 | # A shorter title for the navigation bar.  Default is the same as html_title.
109 | #html_short_title = None
110 | 
111 | # The name of an image file (relative to this directory) to place at the top
112 | # of the sidebar.
113 | #html_logo = None
114 | 
115 | # The name of an image file (within the static path) to use as favicon of the
116 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
117 | # pixels large.
118 | #html_favicon = None
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 | 
125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
126 | # using the given strftime format.
127 | #html_last_updated_fmt = '%b %d, %Y'
128 | 
129 | # If true, SmartyPants will be used to convert quotes and dashes to
130 | # typographically correct entities.
131 | #html_use_smartypants = True
132 | 
133 | # Custom sidebar templates, maps document names to template names.
134 | #html_sidebars = {}
135 | 
136 | # Additional templates that should be rendered to pages, maps page names to
137 | # template names.
138 | #html_additional_pages = {}
139 | 
140 | # If false, no module index is generated.
141 | #html_domain_indices = True
142 | 
143 | # If false, no index is generated.
144 | #html_use_index = True
145 | 
146 | # If true, the index is split into individual pages for each letter.
147 | #html_split_index = False
148 | 
149 | # If true, links to the reST sources are added to the pages.
150 | #html_show_sourcelink = True
151 | 
152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
153 | #html_show_sphinx = True
154 | 
155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
156 | #html_show_copyright = True
157 | 
158 | # If true, an OpenSearch description file will be output, and all pages will
159 | # contain a <link> tag referring to it.  The value of this option must be the
160 | # base URL from which the finished HTML is served.
161 | #html_use_opensearch = ''
162 | 
163 | # This is the file name suffix for HTML files (e.g. ".xhtml").
164 | #html_file_suffix = None
165 | 
166 | # Output file base name for HTML help builder.
167 | htmlhelp_basename = 'sampledoc'
168 | 
169 | 
170 | # -- Options for LaTeX output --------------------------------------------------
171 | 
172 | latex_elements = {
173 | # The paper size ('letterpaper' or 'a4paper').
174 | #'papersize': 'letterpaper',
175 | 
176 | # The font size ('10pt', '11pt' or '12pt').
177 | #'pointsize': '10pt',
178 | 
179 | # Additional stuff for the LaTeX preamble.
180 | #'preamble': '',
181 | }
182 | 
183 | # Grouping the document tree into LaTeX files. List of tuples
184 | # (source start file, target name, title, author, documentclass [howto/manual]).
185 | latex_documents = [
186 |   ('index', 'sample.tex', u'sample Documentation',
187 |    u'Kenneth Reitz', 'manual'),
188 | ]
189 | 
190 | # The name of an image file (relative to this directory) to place at the top of
191 | # the title page.
192 | #latex_logo = None
193 | 
194 | # For "manual" documents, if this is true, then toplevel headings are parts,
195 | # not chapters.
196 | #latex_use_parts = False
197 | 
198 | # If true, show page references after internal links.
199 | #latex_show_pagerefs = False
200 | 
201 | # If true, show URL addresses after external links.
202 | #latex_show_urls = False
203 | 
204 | # Documents to append as an appendix to all manuals.
205 | #latex_appendices = []
206 | 
207 | # If false, no module index is generated.
208 | #latex_domain_indices = True
209 | 
210 | 
211 | # -- Options for manual page output --------------------------------------------
212 | 
213 | # One entry per manual page. List of tuples
214 | # (source start file, name, description, authors, manual section).
215 | man_pages = [
216 |     ('index', 'sample', u'sample Documentation',
217 |      [u'Kenneth Reitz'], 1)
218 | ]
219 | 
220 | # If true, show URL addresses after external links.
221 | #man_show_urls = False
222 | 
223 | 
224 | # -- Options for Texinfo output ------------------------------------------------
225 | 
226 | # Grouping the document tree into Texinfo files. List of tuples
227 | # (source start file, target name, title, author,
228 | #  dir menu entry, description, category)
229 | texinfo_documents = [
230 |   ('index', 'sample', u'sample Documentation',
231 |    u'Kenneth Reitz', 'sample', 'One line description of project.',
232 |    'Miscellaneous'),
233 | ]
234 | 
235 | # Documents to append as an appendix to all manuals.
236 | #texinfo_appendices = []
237 | 
238 | # If false, no module index is generated.
239 | #texinfo_domain_indices = True
240 | 
241 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
242 | #texinfo_show_urls = 'footnote'
243 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_numeral_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Numeral
  9 | from esperanto_analyzer.analyzers.morphological import NumeralMorphologicalAnalyzer
 10 | 
 11 | class TestNumeralMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'dek'
 13 | 
 14 |     def test_import(self):
 15 |         assert NumeralMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp(self):
 51 |         assert NumeralMorphologicalAnalyzer.MATCH_REGEXP is not None
 52 | 
 53 |     def test_regexp_value(self):
 54 |         assert NumeralMorphologicalAnalyzer.MATCH_REGEXP == re.compile('^(-?\\d+|nul|unu|du|tri|kvar|kvin|ses|sep|ok|naŭ|dek|(unu|du|tri|kvar|kvin|ses|sep|ok|naŭ|dek)?(dek|cent|milionoj|miliono|miliardoj|miliardo|bilionoj|biliono|mil))$', re.IGNORECASE)
 55 | 
 56 |     def test_word_class(self):
 57 |         isinstance(NumeralMorphologicalAnalyzer.word_class()(self.TEST_WORD), Numeral)
 58 | 
 59 | class TestNumeralMorphologicalAnalyzerMatchMethod():
 60 |     VALID_WORDS = [
 61 |         'unu', 'du', 'tri', 'kvar', 'kvin', 'ses', 'sep', 'ok', 'naŭ', 'dek',
 62 |         'dudek', 'tridek', 'kvardek', 'kvindek', 'sesdek', 'sepdek', 'okdek', 'naŭdek',
 63 |         'cent', 'ducent', 'tricent', 'kvarcent', 'kvincent', 'sescent', 'sepcent', 'okcent', 'naŭcent',
 64 |         'mil', 'dumil', 'miliardo', 'miliono', 'miliardoj', 'milionoj'
 65 |     ]
 66 | 
 67 |     VALID_DIGITS = ['10', '20', '-1', '0', '102041', '9992232213']
 68 | 
 69 |     INVALID_DIGITS = ['a10', '2a0', '-1x', '01#', '102041@', '!9992232213']
 70 | 
 71 |     INVALID_WORDS = ['io', 'lo', 'bela', 'la', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', '?', '!']
 72 | 
 73 |     def test_match(self):
 74 |         for word in self.VALID_WORDS:
 75 |             analyzer = NumeralMorphologicalAnalyzer(word)
 76 |             matches = analyzer.match()
 77 | 
 78 |             assert matches is not None
 79 |             assert len(matches.span()) == 2
 80 | 
 81 |     def test_match_digits(self):
 82 |         for word in self.VALID_DIGITS:
 83 |             analyzer = NumeralMorphologicalAnalyzer(word)
 84 |             matches = analyzer.match()
 85 | 
 86 |             assert matches is not None
 87 |             assert len(matches.span()) == 2
 88 | 
 89 |     def test_match_invalid_digits(self):
 90 |         for word in self.INVALID_DIGITS:
 91 |             analyzer = NumeralMorphologicalAnalyzer(word)
 92 |             matches = analyzer.match()
 93 | 
 94 |             assert matches is None
 95 | 
 96 |     def test_match_empty(self):
 97 |         for word in self.INVALID_WORDS:
 98 |             analyzer = NumeralMorphologicalAnalyzer(word)
 99 |             matches = analyzer.match()
100 | 
101 |             assert matches is None
102 | 
103 | class TestNumeralMorphologicalAnalyzerAnalyzeMethod():
104 |     VALID_WORDS = [
105 |         'unu', 'du', 'tri', 'kvar', 'kvin', 'ses', 'sep', 'ok', 'naŭ', 'dek',
106 |         'dudek', 'tridek', 'kvardek', 'kvindek', 'sesdek', 'sepdek', 'okdek', 'naŭdek',
107 |         'cent', 'ducent', 'tricent', 'kvarcent', 'kvincent', 'sescent', 'sepcent', 'okcent', 'naŭcent',
108 |         'mil', 'dumil', 'miliardo', 'miliono', 'miliardoj', 'milionoj',
109 |         '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
110 |         '11', '20', '-1', '0', '102041', '9992232213'
111 |     ]
112 | 
113 |     INVALID_WORDS = ['io', 'lo', 'bela', 'la', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ',
114 |                      'a10', '2a0', '-1x', '01#', '102041@', '!9992232213'
115 |                     ]
116 | 
117 |     def test_invalid_analyze(self):
118 |         for word in self.INVALID_WORDS:
119 |             analyzer = NumeralMorphologicalAnalyzer(word)
120 |             result = analyzer.analyze()
121 | 
122 |             assert not result
123 | 
124 |     def test_invalid_analyze_word(self):
125 |         for word in self.INVALID_WORDS:
126 |             analyzer = NumeralMorphologicalAnalyzer(word)
127 |             analyzer.analyze()
128 | 
129 |             assert analyzer.word is None
130 | 
131 |     def test_invalid_analyze_match(self):
132 |         for word in self.INVALID_WORDS:
133 |             analyzer = NumeralMorphologicalAnalyzer(word)
134 |             analyzer.analyze()
135 | 
136 |             assert analyzer.matches is None
137 | 
138 |     def test_analyze(self):
139 |         for word in self.VALID_WORDS:
140 |             analyzer = NumeralMorphologicalAnalyzer(word)
141 | 
142 |             assert analyzer.analyze()
143 | 
144 |     def test_analyze_word(self):
145 |         for word in self.VALID_WORDS:
146 |             analyzer = NumeralMorphologicalAnalyzer(word)
147 |             analyzer.analyze()
148 | 
149 |             assert isinstance(analyzer.word, Numeral)
150 |             assert analyzer.word.content == word
151 | 
152 |     def test_analyze_match(self):
153 |         for word in self.VALID_WORDS:
154 |             analyzer = NumeralMorphologicalAnalyzer(word)
155 |             analyzer.analyze()
156 | 
157 |             assert analyzer.matches is not None
158 | 
159 |     def test_analyze_return_false(self):
160 |         for word in self.INVALID_WORDS:
161 |             analyzer = NumeralMorphologicalAnalyzer(word)
162 | 
163 |             assert analyzer.analyze() is False
164 | 
165 |     def test_analyze_return_true(self):
166 |         for word in self.VALID_WORDS:
167 |             analyzer = NumeralMorphologicalAnalyzer(word)
168 | 
169 |             assert analyzer.analyze()
170 | 
171 | 
172 |     def test_analyze_processed(self):
173 |         for word in self.VALID_WORDS:
174 |             analyzer = NumeralMorphologicalAnalyzer(word)
175 | 
176 |             assert analyzer.processed is False
177 | 
178 |             analyzer.analyze()
179 | 
180 |             assert analyzer.processed is True
181 | 
182 |     def test_analyze_processed_response(self):
183 |         for word in self.VALID_WORDS:
184 |             analyzer = NumeralMorphologicalAnalyzer(word)
185 |             analyzer.analyze()
186 | 
187 |             assert analyzer.analyze() is None
188 |             assert analyzer.analyze() is None
189 | 
190 | class TestNumeralMorphologicalAnalyzerBasicNumbersList:
191 |     def test_numbers_not_empty(self):
192 |         assert NumeralMorphologicalAnalyzer.BASIC_NUMBERS_LIST is not None
193 | 
194 |     def test_basic_numbers_included(self):
195 |         for number in ['nul', 'unu', 'du', 'tri', 'kvar', 'kvin', 'ses', 'sep', 'ok', 'naŭ', 'dek']:
196 |             assert number in NumeralMorphologicalAnalyzer.BASIC_NUMBERS_LIST
197 | 
198 |     def test_numbers_not_size(self):
199 |         assert len(NumeralMorphologicalAnalyzer.BASIC_NUMBERS_LIST) == 11
200 | 
201 |     def test_numbers_match_list(self):
202 |         for word in NumeralMorphologicalAnalyzer.BASIC_NUMBERS_LIST:
203 |             assert NumeralMorphologicalAnalyzer.BASIC_NUMBERS_REGEXP.match(word)
204 | 
205 |     def test_numbers_match_final_regexp_list(self):
206 |         for word in NumeralMorphologicalAnalyzer.BASIC_NUMBERS_LIST:
207 |             assert NumeralMorphologicalAnalyzer.MATCH_REGEXP.match(word)
208 | 
209 |     def test_others_numbers_regexp(self):
210 |         for word in ['dudek', 'tridek', 'ducent', 'dumil', 'trimil', 'mil', 'miliono', 'milionoj', 'cent', 'dek', 'miliardo']:
211 |             assert NumeralMorphologicalAnalyzer.OTHERS_NUMBERS_REGEXP.match(word)
212 | 
213 |     def test_numbers_digit_regexp(self):
214 |         for word in ['1', '20', '300', '999999', '-10']:
215 |             assert NumeralMorphologicalAnalyzer.NUMBERS_DIGIT_REGEXP.match(word)
216 | 
217 |     def test_invalid_others_numbers_regexp(self):
218 |         for word in ['domo', 'la', 'multe', 'bela', 'belajn', 'a0x']:
219 |             assert NumeralMorphologicalAnalyzer.OTHERS_NUMBERS_REGEXP.match(word) is None
220 | 
221 |     def test_invalid_numbers_digit_regexp(self):
222 |         for word in ['@', '!10', '*10*']:
223 |             assert NumeralMorphologicalAnalyzer.NUMBERS_DIGIT_REGEXP.match(word) is None
224 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_preposition_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Preposition
  9 | from esperanto_analyzer.analyzers.morphological import PrepositionMorphologicalAnalyzer
 10 | 
 11 | class TestPrepositionMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'anstataŭ'
 13 | 
 14 |     def test_import(self):
 15 |         assert PrepositionMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp(self):
 51 |         assert PrepositionMorphologicalAnalyzer.MATCH_REGEXP is not None
 52 | 
 53 |     def test_word_class(self):
 54 |         assert isinstance(PrepositionMorphologicalAnalyzer.word_class()(self.TEST_WORD), Preposition)
 55 | 
 56 | class TestPrepositionMorphologicalAnalyzerMatchMethod():
 57 |     VALID_WORDS = ['K', 'al', 'anstataŭ', 'antaŭ', 'antaŭ ol', 'apud', 'da', 'de', 'disde',
 58 |                    'du vortoj', 'dum', 'ekde', 'ekster', 'eksteren', 'el', 'en', 'ene',
 59 |                    'estiel', 'far', 'fare de', 'flanke de', 'for de', 'graŭ', 'inter', 'je',
 60 |                    'kaj ankaŭ', 'kiel', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ',
 61 |                    'mala', 'malantaŭ', 'malgraŭ', 'malkiel', 'malsupre de', 'malsupren',
 62 |                    'meze de', 'na', 'nome de', 'ol', 'per', 'pere de', 'plus', 'po', 'por',
 63 |                    'post', 'preter', 'pri', 'pro', 'proksime de', 'samkiel', 'sed', 'sekva',
 64 |                    'sen', 'sub', 'suben', 'super', 'supren', 'sur', 'tiu', 'tiuj', 'tra',
 65 |                    'trans', 'tri vortoj', 'tuj post', 'tutĉirkaŭ',
 66 |                    'ĉe', 'ĉi tiu', 'ĉi tiuj', 'ĉirkaŭ', 'ĝis'
 67 |                    ]
 68 | 
 69 |     INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'multe', 'ankoraŭ', 'dek',
 70 |                      'du', 'ĉar', 'aŭ', '?', '!']
 71 | 
 72 |     def test_match(self):
 73 |         for word in self.VALID_WORDS:
 74 |             analyzer = PrepositionMorphologicalAnalyzer(word)
 75 |             matches = analyzer.match()
 76 | 
 77 |             assert matches is not None
 78 |             assert len(matches.span()) == 2
 79 | 
 80 |     def test_match_empty(self):
 81 |         for word in self.INVALID_WORDS:
 82 |             analyzer = PrepositionMorphologicalAnalyzer(word)
 83 |             matches = analyzer.match()
 84 | 
 85 |             assert matches is None
 86 | 
 87 | class TestPrepositionMorphologicalAnalyzerAnalyzeMethod():
 88 |     VALID_WORDS = ['K', 'al', 'anstataŭ', 'antaŭ', 'antaŭ ol', 'apud', 'da', 'de', 'disde',
 89 |                    'du vortoj', 'dum', 'ekde', 'ekster', 'eksteren', 'el', 'en', 'ene',
 90 |                    'estiel', 'far', 'fare de', 'flanke de', 'for de', 'graŭ', 'inter', 'je',
 91 |                    'kaj ankaŭ', 'kiel', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ',
 92 |                    'mala', 'malantaŭ', 'malgraŭ', 'malkiel', 'malsupre de', 'malsupren',
 93 |                    'meze de', 'na', 'nome de', 'ol', 'per', 'pere de', 'plus', 'po', 'por',
 94 |                    'post', 'preter', 'pri', 'pro', 'proksime de', 'samkiel', 'sed', 'sekva',
 95 |                    'sen', 'sub', 'suben', 'super', 'supren', 'sur', 'tiu', 'tiuj', 'tra',
 96 |                    'trans', 'tri vortoj', 'tuj post', 'tutĉirkaŭ',
 97 |                    'ĉe', 'ĉi tiu', 'ĉi tiuj', 'ĉirkaŭ', 'ĝis',
 98 |                    ]
 99 | 
100 |     INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'multe', 'ankoraŭ', 'dek',
101 |                      'du', 'ĉar', 'aŭ', '?', '!']
102 | 
103 |     def test_invalid_analyze(self):
104 |         for word in self.INVALID_WORDS:
105 |             analyzer = PrepositionMorphologicalAnalyzer(word)
106 |             result = analyzer.analyze()
107 | 
108 |             assert not result
109 | 
110 |     def test_invalid_analyze_word(self):
111 |         for word in self.INVALID_WORDS:
112 |             analyzer = PrepositionMorphologicalAnalyzer(word)
113 |             analyzer.analyze()
114 | 
115 |             assert analyzer.word is None
116 | 
117 |     def test_invalid_analyze_match(self):
118 |         for word in self.INVALID_WORDS:
119 |             analyzer = PrepositionMorphologicalAnalyzer(word)
120 |             analyzer.analyze()
121 | 
122 |             assert analyzer.matches is None
123 | 
124 |     def test_analyze(self):
125 |         for word in self.VALID_WORDS:
126 |             analyzer = PrepositionMorphologicalAnalyzer(word)
127 | 
128 |             assert analyzer.analyze()
129 | 
130 |     def test_prepositions_list(self):
131 |         for word in PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST:
132 |             analyzer = PrepositionMorphologicalAnalyzer(word)
133 | 
134 |             assert analyzer.analyze()
135 | 
136 |     def test_analyze_word(self):
137 |         for word in self.VALID_WORDS:
138 |             analyzer = PrepositionMorphologicalAnalyzer(word)
139 |             analyzer.analyze()
140 | 
141 |             assert isinstance(analyzer.word, Preposition)
142 |             assert analyzer.word.content == word
143 | 
144 |     def test_analyze_match(self):
145 |         for word in self.VALID_WORDS:
146 |             analyzer = PrepositionMorphologicalAnalyzer(word)
147 |             analyzer.analyze()
148 | 
149 |             assert analyzer.matches is not None
150 | 
151 |     def test_analyze_return_false(self):
152 |         for word in self.INVALID_WORDS:
153 |             analyzer = PrepositionMorphologicalAnalyzer(word)
154 | 
155 |             assert analyzer.analyze() is False
156 | 
157 |     def test_analyze_return_true(self):
158 |         for word in self.VALID_WORDS:
159 |             analyzer = PrepositionMorphologicalAnalyzer(word)
160 | 
161 |             assert analyzer.analyze()
162 | 
163 | 
164 |     def test_analyze_processed(self):
165 |         for word in self.VALID_WORDS:
166 |             analyzer = PrepositionMorphologicalAnalyzer(word)
167 | 
168 |             assert analyzer.processed is False
169 | 
170 |             analyzer.analyze()
171 | 
172 |             assert analyzer.processed is True
173 | 
174 |     def test_analyze_processed_response(self):
175 |         for word in self.VALID_WORDS:
176 |             analyzer = PrepositionMorphologicalAnalyzer(word)
177 |             analyzer.analyze()
178 | 
179 |             assert analyzer.analyze() is None
180 |             assert analyzer.analyze() is None
181 | 
182 | class TestPrepositionMorphologicalAnalyzerPrepositionsList:
183 |     PREPOSITIONS_LIST = ['K', 'al', 'anstataŭ', 'antaŭ', 'antaŭ ol', 'apud', 'da', 'de', 'disde',
184 |                          'du vortoj', 'dum', 'ekde', 'ekster', 'eksteren', 'el', 'en', 'ene',
185 |                          'estiel', 'far', 'fare de', 'flanke de', 'for de', 'graŭ', 'inter', 'je',
186 |                          'kaj ankaŭ', 'kiel', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ',
187 |                          'mala', 'malantaŭ', 'malgraŭ', 'malkiel', 'malsupre de', 'malsupren',
188 |                          'meze de', 'na', 'nome de', 'ol', 'per', 'pere de', 'plus', 'po', 'por',
189 |                          'post', 'preter', 'pri', 'pro', 'proksime de', 'samkiel', 'sed', 'sekva',
190 |                          'sen', 'sub', 'suben', 'super', 'supren', 'sur', 'tiu', 'tiuj', 'tra',
191 |                          'trans', 'tri vortoj', 'tuj post', 'tutĉirkaŭ',
192 |                          'ĉe', 'ĉi tiu', 'ĉi tiuj', 'ĉirkaŭ', 'ĝis']
193 | 
194 |     def test_preposition_list_not_changed(self):
195 |         assert PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST == self.PREPOSITIONS_LIST
196 | 
197 |     def test_prepositions_not_empty(self):
198 |         assert PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST is not None
199 | 
200 |     def test_prepositions_not_size(self):
201 |         assert len(PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST) == 73
202 | 
203 |     def test_prepositions_match_list(self):
204 |         for word in PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST:
205 |             assert PrepositionMorphologicalAnalyzer.PROPOSITIONS_MATCH_REGEXP.match(word)
206 | 
207 |     def test_prepositions_match_final_regexp_list(self):
208 |         for word in PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST:
209 |             assert PrepositionMorphologicalAnalyzer.MATCH_REGEXP.match(word)
210 | 


--------------------------------------------------------------------------------
/tests/analyzers/morphological/test_pronoun_morphological_analyzer.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-docstring,no-self-use
  2 | 
  3 | import re
  4 | import pytest
  5 | 
  6 | from context import esperanto_analyzer
  7 | 
  8 | from esperanto_analyzer.speech import Pronoun
  9 | from esperanto_analyzer.analyzers.morphological import PronounMorphologicalAnalyzer
 10 | 
 11 | class TestPronounMorphologicalAnalyzerBasic():
 12 |     TEST_WORD = 'mi'
 13 | 
 14 |     def test_import(self):
 15 |         assert PronounMorphologicalAnalyzer
 16 | 
 17 |     def test_initialize_default_options(self):
 18 |         analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD)
 19 | 
 20 |         assert analyzer.options == dict()
 21 | 
 22 |     def test_initialize_overwrite_options(self):
 23 |         analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok'))
 24 | 
 25 |         assert analyzer.options == dict(option='ok')
 26 | 
 27 |     def test_initialize_raw_word(self):
 28 |         analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD)
 29 | 
 30 |         assert analyzer.raw_word == self.TEST_WORD
 31 | 
 32 |     def test_initialize_word(self):
 33 |         analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD)
 34 | 
 35 |         # analyzer.word is only populated after calling `analyze()` method
 36 |         assert analyzer.word is None
 37 | 
 38 |     def test_initialize_matches(self):
 39 |         analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD)
 40 | 
 41 |         # analyzer.matches is only populated after calling `analyze()` method
 42 |         assert analyzer.matches is None
 43 | 
 44 |     def test_initialize_processed(self):
 45 |         analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD)
 46 | 
 47 |         # analyzer.matches is only populated after calling `analyze()` method
 48 |         assert analyzer.processed is False
 49 | 
 50 |     def test_match_regexp(self):
 51 |         assert PronounMorphologicalAnalyzer.MATCH_REGEXP is not None
 52 | 
 53 |     def test_word_class(self):
 54 |         isinstance(PronounMorphologicalAnalyzer.word_class()(self.TEST_WORD), Pronoun)
 55 | 
 56 | class TestPronounMorphologicalAnalyzerMatchMethod():
 57 |     VALID_WORDS = [
 58 |         'mi', 'vi', 'li', 'ŝi', 'ĝi', 'oni', 'ili', 'ni',
 59 |         'min', 'vin', 'lin', 'ŝin', 'ĝin', 'onin', 'ilin', 'nin',
 60 |         'mia', 'via', 'lia', 'ŝia', 'ĝia', 'onia', 'ilia', 'nia',
 61 |         'miaj', 'viaj', 'liaj', 'ŝiaj', 'ĝiaj', 'oniaj', 'iliaj', 'niaj',
 62 |         'mian', 'vian', 'lian', 'ŝian', 'ĝian', 'onian', 'ilian', 'nian',
 63 |         'miajn', 'viajn', 'liajn', 'ŝiajn', 'ĝiajn', 'oniajn', 'iliajn', 'niajn',
 64 |         'kiu', 'kio', 'kies', 'tiu', 'ĉi tiu', 'tia',
 65 |         'nenio', 'neniu', 'ĉio', 'ĉiu', 'io', 'iu', 'io ajn', 'iu ajn',
 66 |         'nenion', 'neniun', 'ĉion', 'ĉiun', 'ion', 'iun', 'io ajn', 'iu ajn',
 67 |         'io ajn', 'ĉio ajn', 'iu ajn', 'ĉiu ajn'
 68 |     ]
 69 | 
 70 |     INVALID_WORDS = ['lo', 'bela', 'la', 'kun', 'multe', 'ankoraŭ',
 71 |                      'a10', '2a0', '-1x', '01#', '102041@', '!9992232213', 'ilianj',
 72 |                      'ilimia', 'miaan', 'miani', 'vianj'
 73 |                     ]
 74 | 
 75 |     def test_match(self):
 76 |         for word in self.VALID_WORDS:
 77 |             analyzer = PronounMorphologicalAnalyzer(word)
 78 |             matches = analyzer.match()
 79 | 
 80 |             assert matches is not None
 81 |             assert len(matches.span()) == 2
 82 | 
 83 |     def test_match_empty(self):
 84 |         for word in self.INVALID_WORDS:
 85 |             analyzer = PronounMorphologicalAnalyzer(word)
 86 |             matches = analyzer.match()
 87 |             assert matches is None
 88 | 
 89 | class TestPronounMorphologicalAnalyzerAnalyzeMethod():
 90 |     VALID_WORDS = [
 91 |         'mi', 'vi', 'li', 'ŝi', 'ĝi', 'oni', 'ili', 'ni',
 92 |         'min', 'vin', 'lin', 'ŝin', 'ĝin', 'onin', 'ilin', 'nin',
 93 |         'mia', 'via', 'lia', 'ŝia', 'ĝia', 'onia', 'ilia', 'nia',
 94 |         'miaj', 'viaj', 'liaj', 'ŝiaj', 'ĝiaj', 'oniaj', 'iliaj', 'niaj',
 95 |         'mian', 'vian', 'lian', 'ŝian', 'ĝian', 'onian', 'ilian', 'nian',
 96 |         'miajn', 'viajn', 'liajn', 'ŝiajn', 'ĝiajn', 'oniajn', 'iliajn', 'niajn',
 97 |         'kiu', 'kio', 'kies', 'tiu', 'ĉi tiu', 'tia',
 98 |         'nenio', 'neniu', 'ĉio', 'ĉiu', 'io', 'iu', 'io ajn', 'iu ajn',
 99 |         'nenion', 'neniun', 'ĉion', 'ĉiun', 'ion', 'iun', 'io ajn', 'iu ajn',
100 |         'io ajn', 'ĉio ajn', 'iu ajn', 'ĉiu ajn'
101 |     ]
102 | 
103 |     INVALID_WORDS = ['lo', 'bela', 'la', 'kun', 'multe', 'ankoraŭ',
104 |                      'a10', '2a0', '-1x', '01#', '102041@', '!9992232213', 'ilianj',
105 |                      'ilimia', 'miaan', 'miani', 'vianj'
106 |                     ]
107 | 
108 |     def test_invalid_analyze(self):
109 |         for word in self.INVALID_WORDS:
110 |             analyzer = PronounMorphologicalAnalyzer(word)
111 |             result = analyzer.analyze()
112 | 
113 |             assert not result
114 | 
115 |     def test_invalid_analyze_word(self):
116 |         for word in self.INVALID_WORDS:
117 |             analyzer = PronounMorphologicalAnalyzer(word)
118 |             analyzer.analyze()
119 | 
120 |             assert analyzer.word is None
121 | 
122 |     def test_invalid_analyze_match(self):
123 |         for word in self.INVALID_WORDS:
124 |             analyzer = PronounMorphologicalAnalyzer(word)
125 |             analyzer.analyze()
126 | 
127 |             assert analyzer.matches is None
128 | 
129 |     def test_analyze(self):
130 |         for word in self.VALID_WORDS:
131 |             analyzer = PronounMorphologicalAnalyzer(word)
132 | 
133 |             assert analyzer.analyze()
134 | 
135 |     def test_analyze_word(self):
136 |         for word in self.VALID_WORDS:
137 |             analyzer = PronounMorphologicalAnalyzer(word)
138 |             analyzer.analyze()
139 | 
140 |             assert isinstance(analyzer.word, Pronoun)
141 |             assert analyzer.word.content == word
142 | 
143 |     def test_analyze_match(self):
144 |         for word in self.VALID_WORDS:
145 |             analyzer = PronounMorphologicalAnalyzer(word)
146 |             analyzer.analyze()
147 | 
148 |             assert analyzer.matches is not None
149 | 
150 |     def test_analyze_return_false(self):
151 |         for word in self.INVALID_WORDS:
152 |             analyzer = PronounMorphologicalAnalyzer(word)
153 | 
154 |             assert analyzer.analyze() is False
155 | 
156 |     def test_analyze_return_true(self):
157 |         for word in self.VALID_WORDS:
158 |             analyzer = PronounMorphologicalAnalyzer(word)
159 | 
160 |             assert analyzer.analyze()
161 | 
162 | 
163 |     def test_analyze_processed(self):
164 |         for word in self.VALID_WORDS:
165 |             analyzer = PronounMorphologicalAnalyzer(word)
166 | 
167 |             assert analyzer.processed is False
168 | 
169 |             analyzer.analyze()
170 | 
171 |             assert analyzer.processed is True
172 | 
173 |     def test_analyze_processed_response(self):
174 |         for word in self.VALID_WORDS:
175 |             analyzer = PronounMorphologicalAnalyzer(word)
176 |             analyzer.analyze()
177 | 
178 |             assert analyzer.analyze() is None
179 |             assert analyzer.analyze() is None
180 | 
181 | class TestPronounMorphologicalAnalyzerPersonalPronounsList:
182 |     BASIC_PERSONAL_PRONOUNS = ['mi', 'vi','li', 'ŝi', 'ĝi', 'oni', 'ili']
183 | 
184 |     def test_pronouns_not_empty(self):
185 |         assert PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST is not None
186 | 
187 |     def test_basic_pronouns_included(self):
188 |         for number in self.BASIC_PERSONAL_PRONOUNS:
189 |             assert number in PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST
190 | 
191 |     def test_pronouns_not_size(self):
192 |         assert len(PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST) == 8
193 | 
194 |     def test_pronouns_list_match_regexp(self):
195 |         for word in PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST:
196 |             assert PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST_REGEXP.match(word)
197 | 
198 |     def test_pronouns_match_hardcoded_list(self):
199 |         for word in self.BASIC_PERSONAL_PRONOUNS:
200 |             assert PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST_REGEXP.match(word)
201 | 
202 |     def test_pronouns_match_final_regexp_list(self):
203 |         for word in PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST:
204 |             assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word)
205 | 
206 |     def test_pronouns_acusative_match_final_regexp(self):
207 |         for word in self.BASIC_PERSONAL_PRONOUNS:
208 |             assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word)
209 | 
210 |     def test_pronouns_acusative_match_final_regexp_list(self):
211 |         for word in self.BASIC_PERSONAL_PRONOUNS:
212 |             assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word + 'n')
213 | 
214 | class TestPronounMorphologicalAnalyzerPossessivePronounsList:
215 |     BASIC_POSSESSIVE_PRONOUNS = [
216 |         'mia', 'via', 'lia', 'ŝia', 'ĝia', 'onia', 'nia', 'ilia'
217 |     ]
218 | 
219 |     def test_pronouns_not_empty(self):
220 |         assert PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST is not None
221 | 
222 |     def test_basic_pronouns_included(self):
223 |         for number in self.BASIC_POSSESSIVE_PRONOUNS:
224 |             assert number in PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST
225 | 
226 |     def test_pronouns_not_size(self):
227 |         assert len(PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST) == 8
228 | 
229 |     def test_pronouns_list_match_regexp(self):
230 |         for word in PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST:
231 |             assert PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST_REGEXP.match(word)
232 | 
233 |     def test_pronouns_match_hardcoded_list(self):
234 |         for word in self.BASIC_POSSESSIVE_PRONOUNS:
235 |             assert PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST_REGEXP.match(word)
236 | 
237 |     def test_pronouns_match_final_regexp_list(self):
238 |         for word in PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST:
239 |             assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word)
240 | 
241 |     def test_pronouns_plural_match_final_regexp_list(self):
242 |         for word in self.BASIC_POSSESSIVE_PRONOUNS:
243 |             assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word + 'j')
244 | 
245 |     def test_pronouns_plural_acusative_match_final_regexp_list(self):
246 |         for word in self.BASIC_POSSESSIVE_PRONOUNS:
247 |             assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word + 'jn')
248 | 
249 |     def test_pronouns_acusative_match_final_regexp_list(self):
250 |         for word in self.BASIC_POSSESSIVE_PRONOUNS:
251 |             assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word + 'n')
252 | 
253 | 
254 | class TestPronounMorphologicalAnalyzerAllBasicPersonalPronounsList:
255 |     ALL_BASIC_PRONOUNS = [
256 |         'mi', 'vi', 'li', 'ŝi', 'ĝi', 'oni', 'ni', 'ili',
257 |         'mia', 'via', 'lia', 'ŝia', 'ĝia', 'onia', 'nia', 'ilia',
258 |         'miaj', 'viaj', 'liaj', 'ŝiaj', 'ĝiaj', 'oniaj', 'niaj', 'iliaj'
259 |     ]
260 | 
261 |     def test_pronouns_not_empty(self):
262 |         assert PronounMorphologicalAnalyzer.ALL_PERSONAL_PRONOUNS_REGEXP is not None
263 | 
264 |     def test_pronouns_list_match_regexp(self):
265 |         for word in PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST:
266 |             assert PronounMorphologicalAnalyzer.ALL_PERSONAL_PRONOUNS_REGEXP.match(word)
267 | 
268 |     def test_pronouns_match_hardcoded_list(self):
269 |         for word in self.ALL_BASIC_PRONOUNS:
270 |             assert PronounMorphologicalAnalyzer.ALL_PERSONAL_PRONOUNS_REGEXP.match(word)
271 | 
272 |     def test_pronouns_match_final_regexp_list(self):
273 |         for word in self.ALL_BASIC_PRONOUNS:
274 |             assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word)
275 | 
276 |     def test_pronouns_acusative_match_final_regexp_list(self):
277 |         for word in self.ALL_BASIC_PRONOUNS:
278 |             assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word + 'n')
279 | 
280 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Esperanto Analyzer
  2 | 
  3 | ----
  4 | 
  5 | ![Esperanto Flag](https://upload.wikimedia.org/wikipedia/commons/thumb/f/f5/Flag_of_Esperanto.svg/640px-Flag_of_Esperanto.svg.png?1535986891157)
  6 | 
  7 | ## Build Status:
  8 | 
  9 | ### Development:
 10 | 
 11 | [![Build Status](https://travis-ci.com/fidelisrafael/esperanto-analyzer.svg?token=k5uMpn3U564QqWar8oA1&branch=development)](https://travis-ci.com/fidelisrafael/esperanto-analyzer)
 12 | 
 13 | [![codecov](https://codecov.io/gh/fidelisrafael/esperanto-analyzer/branch/development/graph/badge.svg)](https://codecov.io/gh/fidelisrafael/esperanto-analyzer)
 14 | 
 15 | ### Master:
 16 | 
 17 | [![Build Status](https://travis-ci.com/fidelisrafael/esperanto-analyzer.svg?token=k5uMpn3U564QqWar8oA1&branch=master)](https://travis-ci.com/fidelisrafael/esperanto-analyzer)
 18 | 
 19 | [![codecov](https://codecov.io/gh/fidelisrafael/esperanto-analyzer/branch/master/graph/badge.svg)](https://codecov.io/gh/fidelisrafael/esperanto-analyzer)
 20 | 
 21 | ---
 22 | 
 23 | ### Atendu! Kio estas Esperanto? (_Wait! What is Esperanto?_)
 24 | 
 25 | That is a fair question! Esperanto is the most widely spoken  constructed international auxiliary language
 26 | (_conlang_) in the world. It was created back in **1887** by a polish-jewish guy named "Ludwik Lejzer Zamenhof"_(often refered as L.L Zamenhof)_.  Zamenhof's goal was to **create an easy and flexible language** that would serve as a universal second language to foster peace and international understanding of people from all around the world.
 27 | 
 28 | The phonology, grammar, vocabulary, and semantics are based on the **Indo-European**(_Italian_,_Spanish_,_French_, _Catalan_, _Russian_, _German_...) languages spoken in Europe. The sound inventory is essentially **Slavic**, as is much of the semantics, whereas the vocabulary derives primarily from the **Romance languages**, with a lesser contribution from **Germanic languages** and minor contributions from **Slavic languages** and **Greek**.
 29 | 
 30 | The language has more than **130 years of history** and culture now, and a very active community  as well.
 31 | 
 32 | Esperanto is a SUPER  regular language, this means that the language does not have **irregular verbs** or **gender distinction for articles**, beside this Esperanto has only **16 grammar rules**.
 33 | For example, one of the rules:  ALL **Nouns** MUST end with the vowel `o`, eg:
 34 | 
 35 | - `domo`
 36 | - `homo`
 37 | - `komputilo`
 38 | - `komputilisto`
 39 | 
 40 | Or  **Adjectives** MUST end with the letter `a`, eg:
 41 | 
 42 | - `bela`
 43 | - `granda`
 44 | - `varma`
 45 | - `malvarma`
 46 | 
 47 | If you want to know (or learn) more about Esperanto, you should read the following links:
 48 | 
 49 |  - [Esperanto at Wikipedia](https://www.wikiwand.com/en/Esperanto)
 50 |  - [Kio estas Esperanto? (in Esperanto)](https://lernu.net/eo/esperanto) or in [English](https://lernu.net/es/esperanto)
 51 | - Esperanto course at Duolingo for: [[English speakers]](https://www.duolingo.com/course/eo/en/Learn-Esperanto-Online), [[Portuguese speakers]](https://www.duolingo.com/course/eo/pt/Learn-Esperanto-Online), [[Spanish speakers]](https://www.duolingo.com/course/eo/es/Learn-Esperanto-Online)
 52 |  - [Esperanto course at Lernu.net](http://lernu.net/kurso)
 53 |  - [Youtube serie: Esperanto estas...](https://www.youtube.com/watch?v=RlftmTm8I18&list=PL83728C14BFC5822F)
 54 | 
 55 | ---
 56 | 
 57 | ## About this project
 58 | 
 59 | The aim of this project is to create one tool that can read and grammarly classify Esperanto sentences.
 60 | 
 61 | The first part of project consists in **Morphological Analyzes** of Esperanto words, the next step is to create a **Syntactical Analyzer** for the language as well.
 62 | 
 63 | ---
 64 | 
 65 | ## How to use it?
 66 | 
 67 | ### Demo
 68 | 
 69 | You can check it out the demo application built with React: [Online Demo](https://fidelisrafael.github.io/esperanto-analyzer-react/) or [Github Repository](https://github.com/fidelisrafael/esperanto-analyzer-react/)
 70 | 
 71 | [![Frontend application](./docs/esperanto_analyzer_screenshot.png)](https://fidelisrafael.github.io/esperanto-analyzer-react/)
 72 | 
 73 | 
 74 | Or you can try the demo API hosted on Heroku:
 75 | 
 76 | [https://esperanto-analyzer-api.herokuapp.com/analyze?sentence=Esperanto%20estas%20tre%20facila%20lingvo%20al%20lerni](
 77 | https://esperanto-analyzer-api.herokuapp.com/analyze?sentence=Esperanto%20estas%20tre%20facila%20lingvo%20al%20lerni)
 78 | 
 79 | ---
 80 | 
 81 | ## Installation
 82 | 
 83 | First, install it:
 84 | 
 85 | ```bash
 86 | $ pip install esperanto-analyzer
 87 | ```
 88 | 
 89 | ## CLI usage:
 90 | 
 91 | [TODO] (Skip it for now)
 92 | 
 93 | Now you will have the libraries source-code files in your system, and also the executable `binary` through CLI, test it:
 94 | 
 95 | ```bash
 96 | $ eo-analyzer --version
 97 | > Version: 0.0.1
 98 | ```
 99 | 
100 | 
101 | ```sh
102 | $ eo-analyzer "Jen la alfabeto de Esperanto. Ĉiu litero ĉiam sonas same kaj literumado estas perfekte regula. Klaku la ekzemplojn por aŭdi la elparolon!"
103 | ```
104 | 
105 | ![eo-analyzer response](https://i.imgur.com/4hWUcWY.png)
106 | 
107 | Pretty cool humn?
108 | 
109 | ## Python library usage
110 | 
111 | Ok, so now you want to import this library in your project, right? That's super simple, just drop these lines in your project:
112 | 
113 | ### Morphological analyzes of sentences
114 | 
115 | ```py
116 | from esperanto_analyzer import MorphologicalSentenceAnalyzer
117 | 
118 | # Creates one instance to morphologically analyzes one sentence
119 | analyzer = MorphologicalSentenceAnalyzer("Esperanto estas tre facila lingvo al lerni.")
120 | analyzer.analyze() # => Returns True/False
121 | 
122 | # This is the simplest human-readable response of the morphological analyzes' results
123 | print(analyzer.simple_results())
124 | # => [['Esperanto', 'Noun'], ['estas', 'Verb'], ['tre', 'Adverb'], ['facila', 'Adjective'], ['lingvo', 'Noun'], ['al', 'Preposition'], ['lerni', 'Verb']]
125 | 
126 | ```
127 | 
128 | But you can always deal with a more complex results set if you (or better, your software) want/need to:
129 | 
130 | ```py
131 | # The `#results()` method returns a Array object wirh a more complex structure than `#simple_results()` method
132 | results = analyzer.analyzes_results()
133 | first_analyze = results[0]
134 | 
135 | # Returns and Array object with `AnalyzeResult` objects
136 | print(results)
137 | # => [<esperanto_analyzer.analyzers.morphological.analyze_result.AnalyzeResult at 0x106888470>, <esperanto_analyzer.analyzers.morphological.analyze_result.AnalyzeResult at 0x106888710>,(...)]
138 | 
139 | print(first_analyze)
140 | # => <esperanto_analyzer.analyzers.morphological.analyze_result.AnalyzeResult object at 0x106888470>
141 | 
142 | # Rich and detailed results using `AnalyzeResult`
143 | print(first_analyze.result)
144 | # => <esperanto_analyzer.analyzers.morphological.noun.NounMorphologicalAnalyzer object at 0x106888898>
145 | 
146 | # Get any information that you might need using the response objects API
147 | print((first_analyze.result.raw_word, first_analyze.result.matches, first_analyze.result.word_class() ))
148 | # => ('Esperanto', <re.Match object; span=(0, 9), match='Esperanto'>, <class 'esperanto_analyzer.speech.noun.Noun'>)
149 | 
150 | ```
151 | ---
152 | 
153 | ### Morphological analyze of a single WORD
154 | 
155 | You can also use the internal analyzers of **words** if you want so, ex:
156 | 
157 | ```py
158 | from esperanto_analyzer.analyzers.morphological import AdjectiveMorphologicalAnalyzer, NumeralMorphologicalAnalyzer
159 | 
160 | # There's the total of `10` morphological analyzers, such as `VerbMorphologicalAnalyzer`, `NumeralMorphologicalAnalyzer`
161 | analyzer = AdjectiveMorphologicalAnalyzer('belajn')
162 | # If it returns true, that means that the inputed word is a valid adjective. False otherwise
163 | analyzer.analyze() # => returns True/False
164 | 
165 | print(analyzer.matches)
166 | # => <re.Match object; span=(0, 6), match='belajn'>
167 | print(analyzer.raw_word) # => 'belajn'
168 | 
169 | # The `word` property is one class object that inherits from the `Word` class.
170 | print(analyzer.word)
171 | # => <esperanto_analyzer.speech.adjective.Adjective at 0x1069079e8>
172 | 
173 | # Get the base class name for the detected 'Part of Speech' class
174 | print(analyzer.word.__class__.__name__) # => 'Adjective'
175 | 
176 | numeral_analyzer = NumeralMorphologicalAnalyzer('naŭcent')
177 | numeral_analyzer.analyze() # => True
178 | 
179 | print(numeral_analyzer.word)
180 | # => <esperanto_analyzer.speech.numeral.Numeral at 0x106964cf8>
181 | 
182 | print(numeral_analyzer.matches)
183 | # => <re.Match object; span=(0, 7), match='naŭcent'>
184 | 
185 | ```
186 | 
187 | ---
188 | 
189 | ### Parts of Speech:  Word, Article, Adverb, Adjective, Verb...
190 | 
191 | You can even use the **Parts of Speech**(such as `Article`, `Adverb`, `Pronoun`, `Conjunction`) of the language:
192 | 
193 | ```py
194 | # `esperanto_analyzer.speech` is home for all parts-of-speech classes
195 | from esperanto_analyzer.speech import Article
196 | 
197 | # Raises an `InvalidArticleError` Exception, since 'lo' is not an Esperanto article
198 | article = Article('lo')
199 | 
200 | # 'La' is the ONLY valid article in Esperanto
201 | valid_article = Article('la')
202 | 
203 | 
204 | # All `esperanto_analyzer.speech` objects inherits from `esperanto_analyzer.speech.word.Word` class
205 | print(valid_article.__class__.__bases__) # => (esperanto_analyzer.speech.word.Word,)
206 | 
207 | # La is invariable article, it's the same for plural and singular sentences, ex:
208 | # 'La domo' # The house
209 | # 'La domoj' # The houses
210 | print(valid_article.plural) # => False
211 | 
212 | # You can provide some `context` when creating the `Part of Speech` so it can determine if the word should be in plural or singular, eg:
213 | print(Article('la', 'domoj').plural) # => True
214 | 
215 | 
216 | ```
217 | 
218 | ---
219 | 
220 | ## Development Setup
221 | 
222 | Clone this repository:
223 | 
224 | ```bash
225 | $ git clone https://github.com/fidelisrafael/esperanto-analyzer.git
226 | $ cd esperanto-analyzer
227 | ```
228 | 
229 | Make sure you have `python` >= `3.7.0` and  `virtualenv` >= `16.0.0` installed:
230 | 
231 | ```bash
232 | $ python --version
233 | > Python 3.7.0
234 | $ virtualenv --version
235 | > 16.0.0
236 | ```
237 | 
238 | Otherwise, [install it](https://virtualenv.pypa.io/en/stable/installation/).
239 | 
240 | Then, create one new `virtualenv` and activate it:
241 | 
242 | ```bash
243 | $ virtualenv venv
244 | $ source venv/bin/activate
245 | ```
246 | 
247 | Install the dependencies for development and test enviroments:
248 | 
249 | ```bash
250 | # If you just want to install the needed dependencies for production, just run: `make init`
251 | $ make init_dev
252 | > pip install -r development_requirements.txt
253 | > pip install -r test_requirements.txt
254 | > pip install -r requirements.txt
255 | ```
256 | 
257 | Run the tests:
258 | 
259 | ```bash
260 | $ make test
261 | > pytest tests --cov-config .coveragerc --cov=esperanto_analyzer --cov-report=html
262 | > =============================================================================== test session starts ================================================================================
263 | > platform darwin -- Python 3.7.0, pytest-3.7.4, py-1.6.0, pluggy-0.7.1
264 | > rootdir: /(...)/esperanto_analyzer, inifile:
265 | > plugins: cov-2.5.1
266 | > collected 492 items
267 | 
268 | > (...)
269 | 
270 | > ====================================================================== 492 passed, 2 warnings in 2.61 seconds ======================================================================
271 | ```
272 | 
273 | You can follow the code coverage stats opening: `coverage/index.html`
274 | 
275 | ### OBS: This library has **100%** code coverage at the time of this writing!
276 | 
277 | ---
278 | 
279 | ### Built-in JSON Web API
280 | 
281 | **_Note: This web API will be published as a separated package in a near future._**
282 | 
283 | This library cames with a very simple HTTP Server built on top of Flask to provide an WEB API interface for integration with others systems. You can run the HTTP server running the following make task in the root folder of the project:
284 | 
285 | ```bash
286 | $ make web_api # or simply running: python web/runserver.py
287 | > python esperanto_analyzer/web/runserver.py
288 | > * Serving Flask app "esperanto_analyzer.web.api.server" (lazy loading)
289 | > * Environment: production
290 | >   WARNING: Do not use the development server in a production environment.
291 | >   Use a production WSGI server instead.
292 | > * Debug mode: on
293 | > * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)
294 | ```
295 | 
296 | Or you can just run it from inside any python project with:
297 | 
298 | ```py
299 | from esperanto_analyzer.web import run_app
300 | 
301 | run_app(debug=True, port=9090)
302 | # * Serving Flask app "esperanto_analyzer.web.api.server" (lazy loading)
303 | # * Environment: production
304 | #   WARNING: Do not use the development server in a production environment.
305 | #   Use a production WSGI server instead.
306 | # * Debug mode: off
307 | # * Running on http://127.0.0.1:9090/ (Press CTRL+C to quit)
308 | 
309 | ```
310 | 
311 | This server has auto-reload(or hot-reload) enabled by default, so you don't need to restart the server when you change the source code.
312 | 
313 | To test it:
314 | 
315 | ```bash
316 | curl http://127.0.0.1:5000/analyze?sentence=Kio%20estas%20Esperanto%3F%20%C4%9Ci%20estas%20lingvo%20tre%20ta%C5%ADga%20por%20internacia%20komunikado.
317 | ```
318 | 
319 | ### HTTP API Deploy
320 | 
321 | If you need an API(like [this one](https://esperanto-analyzer-api.herokuapp.com/analyze?sentence=Esperanto%20estas%20tre%20facila%20lingvo%20al%20lerni)) you can just easily deploy this project to `Heroku` since it comes with a `Procfile` file, this will take no more than 4 commands:
322 | 
323 | OBS: You will need [Heroku's CLI](https://devcenter.heroku.com/articles/heroku-cli) for this.
324 | 
325 | ```bash
326 | $ git clone https://github.com/fidelisrafael/esperanto-analyzer.git
327 | $ cd esperanto-analyzer
328 | $ heroku create my-esperanto-analyzer
329 | > Creating ⬢ my-analyzer-test... done
330 | $ git push heroku master:master
331 | # Open https://my-esperanto-analyzer.herokuapp.com/analyze?sentence=Kiel%20%vi%fartas
332 | $ heroku open '/analyze?sentence=Kiel%20vi%20fartas?'
333 | ```
334 | 
335 | ---
336 | 
337 | ## How it works?
338 | 
339 | This library can be used in a miriad of ways to analyze Esperanto sentences and words, for a complete reference of the API and all the possibilities you should check the 'Full API' section.
340 | 
341 | [TODO]
342 | 
343 | ---
344 | 
345 | 
346 | ## :calendar: Roadmap <a name="roadmap"></a>
347 | 
348 | - :white_medium_small_square: Create syntactical analyzers
349 | - :white_medium_small_square: Update this Roadmap with more plans
350 | - :white_check_mark: Front-end application. (Done, [check it out](https://fidelisrafael.github.io/esperanto-analyzer-react/))
351 | 
352 | 
353 | ---
354 | 
355 | ## :thumbsup: Contributing
356 | 
357 | Bug reports and pull requests are welcome on GitHub at http://github.com/fidelisrafael/esperanto-analyzer. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](contributor-covenant.org) code of conduct.
358 | 
359 | ---
360 | 
361 | ## :memo: License
362 | 
363 | The library is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
364 | 


--------------------------------------------------------------------------------