├── .pylintrc ├── runtime.txt ├── MANIFEST.in ├── Procfile ├── test_requirements.txt ├── esperanto_analyzer ├── web │ ├── __init__.py │ ├── .gitignore │ ├── api │ │ ├── __init__.py │ │ ├── helpers.py │ │ ├── errors.py │ │ ├── results.py │ │ ├── server.py │ │ └── morphological_endpoint.py │ ├── context.py │ ├── runserver.py │ ├── README.md │ └── LICENSE ├── cli │ ├── __init__.py │ └── cli.py ├── analyzers │ ├── __init__.py │ ├── morphological │ │ ├── analyze_result.py │ │ ├── noun.py │ │ ├── adjective.py │ │ ├── article.py │ │ ├── __init__.py │ │ ├── interjection.py │ │ ├── conjunction.py │ │ ├── base.py │ │ ├── preposition.py │ │ ├── numeral.py │ │ ├── adverb.py │ │ ├── verb.py │ │ └── pronoun.py │ └── morphological_analyzer.py ├── speech │ ├── .DS_Store │ ├── numeral.py │ ├── verb.py │ ├── noun.py │ ├── interjection.py │ ├── conjunction.py │ ├── pronoun.py │ ├── __init__.py │ ├── preposition.py │ ├── adjective.py │ ├── adverb.py │ ├── article.py │ └── word.py ├── __init__.py └── morphological_sentence_analyzer.py ├── pip-selfcheck.json ├── development_requirements.txt ├── docs ├── esperanto_analyzer_screenshot.png ├── index.rst ├── make.bat ├── Makefile └── conf.py ├── .travis.yml ├── requirements.txt ├── .coveragerc ├── tests ├── context.py ├── cli │ ├── context.py │ └── test_cly.py ├── analyzers │ ├── context.py │ └── morphological │ │ ├── context.py │ │ ├── test_analyze_result.py │ │ ├── test_verb_morphological_analyzer.py │ │ ├── test_noun_morphological_analyzer.py │ │ ├── test_base_morphological_analyzer.py │ │ ├── test_adjective_morphological_analyzer.py │ │ ├── test_article_morphological_analyzer.py │ │ ├── test_conjuction_morphological_analyzer.py │ │ ├── test_adverb_morphological_analyzer.py │ │ ├── test_interjection_morphological_analyzer.py │ │ ├── test_numeral_morphological_analyzer.py │ │ ├── test_preposition_morphological_analyzer.py │ │ └── test_pronoun_morphological_analyzer.py ├── speech │ ├── context.py │ ├── test_verb.py │ ├── test_numeral.py │ ├── test_adjective.py │ ├── test_conjunction.py │ ├── test_preposition.py │ ├── test_interjection.py │ ├── test_noun.py │ ├── test_pronoun.py │ ├── test_adverb.py │ ├── test_word.py │ └── test_article.py ├── test_basic.py ├── conftest.py ├── web │ └── test_web_api_endpoints.py └── test_morphological_sentence_analyzer.py ├── bin ├── console ├── cli └── server ├── Makefile ├── .editorconfig ├── TODO.todo ├── LICENSE ├── setup.py ├── .gitignore └── README.md /.pylintrc: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.7.0 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: python esperanto_analyzer/web/runserver.py 2 | -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | # Tests tools 2 | pytest 3 | pytest-cov 4 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/__init__.py: -------------------------------------------------------------------------------- 1 | from .api.server import run_app 2 | -------------------------------------------------------------------------------- /pip-selfcheck.json: -------------------------------------------------------------------------------- 1 | {"last_check":"2018-08-29T14:35:06Z","pypi_version":"18.0"} -------------------------------------------------------------------------------- /esperanto_analyzer/web/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.egg-info 3 | venv 4 | .DS_Store 5 | 6 | -------------------------------------------------------------------------------- /esperanto_analyzer/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | from .cli import CLI 3 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/api/__init__.py: -------------------------------------------------------------------------------- 1 | from .morphological_endpoint import MorphologicalAnalyzeEndpoint 2 | -------------------------------------------------------------------------------- /development_requirements.txt: -------------------------------------------------------------------------------- 1 | ## Development dependencies 2 | pylint 3 | pylint-json2html 4 | sphinx 5 | ipython 6 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/api/helpers.py: -------------------------------------------------------------------------------- 1 | # helpers.py 2 | # This is for helper functions that don't fit in a specific module 3 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/__init__.py: -------------------------------------------------------------------------------- 1 | from esperanto_analyzer.analyzers.morphological_analyzer import MorphologicalAnalyzer 2 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fidelisrafael/esperanto-analyzer/HEAD/esperanto_analyzer/speech/.DS_Store -------------------------------------------------------------------------------- /docs/esperanto_analyzer_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fidelisrafael/esperanto-analyzer/HEAD/docs/esperanto_analyzer_screenshot.png -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.7-dev" # 3.7 development branch 4 | install: 5 | - make init_dev 6 | script: 7 | - make test 8 | -------------------------------------------------------------------------------- /esperanto_analyzer/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Entry point to load classes 3 | """ 4 | 5 | from .morphological_sentence_analyzer import MorphologicalSentenceAnalyzer 6 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/api/errors.py: -------------------------------------------------------------------------------- 1 | class SentenceRequiredError(Exception): 2 | pass 3 | 4 | 5 | class SentenceInvalidError(Exception): 6 | pass 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ## CLI 2 | tableprint==0.8.0 3 | 4 | ## Web 5 | Flask==1.0.2 6 | Flask-RESTful==0.3.6 7 | flask-restful-swagger==0.20.1 8 | flask-cors==3.0.6 9 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/context.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) 5 | 6 | import esperanto_analyzer 7 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | test/* 4 | setup.py 5 | esperanto_analyzer/web/runserver.py 6 | esperanto_analyzer/web/context.py 7 | 8 | [html] 9 | directory = coverage 10 | -------------------------------------------------------------------------------- /tests/context.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | import os 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 6 | 7 | import esperanto_analyzer 8 | -------------------------------------------------------------------------------- /tests/cli/context.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | import os 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) 6 | 7 | import esperanto_analyzer 8 | -------------------------------------------------------------------------------- /tests/analyzers/context.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | import os 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) 6 | 7 | import esperanto_analyzer 8 | -------------------------------------------------------------------------------- /tests/speech/context.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | import os 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) 6 | 7 | import esperanto_analyzer 8 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/context.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | import os 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..'))) 6 | 7 | import esperanto_analyzer 8 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import IPython 4 | import sys 5 | import os 6 | 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 8 | 9 | import esperanto_analyzer 10 | 11 | IPython.embed() 12 | -------------------------------------------------------------------------------- /bin/cli: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 7 | 8 | from esperanto_analyzer.cli import CLI 9 | 10 | CLI.run(input_sentence=sys.argv[1]) 11 | -------------------------------------------------------------------------------- /bin/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 7 | 8 | from esperanto_analyzer.web import run_app 9 | 10 | run_app(debug=True, port=5000) 11 | -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer import MorphologicalSentenceAnalyzer 9 | 10 | class TestBasic: 11 | def test_import(self): 12 | assert MorphologicalSentenceAnalyzer 13 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/runserver.py: -------------------------------------------------------------------------------- 1 | import os 2 | from context import esperanto_analyzer 3 | 4 | from esperanto_analyzer.web.api.server import run_app 5 | 6 | if __name__ == '__main__': 7 | port = int(os.environ.get('PORT', 5000)) 8 | host = str(os.environ.get('HOST', '0.0.0.0')) 9 | 10 | run_app(debug=True, host=host, port=port) 11 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/numeral.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent one word beloging to grammar class classified as 'Numeral' 3 | 4 | What's a Numeral? 5 | === 6 | In linguistics, a numeral is a member of a part of speech(word) characterized by the 7 | designation of numbers; 8 | """ 9 | from .word import Word 10 | 11 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use 12 | class Numeral(Word): 13 | pass 14 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/analyze_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represents one analyzed raw word transformed in one `Part of Speech` object 3 | such as `Verb`, `Adverb`. 4 | 5 | Eg: word = AnalyzedWord. 6 | """ 7 | 8 | # pylint: disable=too-few-public-methods,missing-docstring 9 | class AnalyzeResult: 10 | def __init__(self, result, raw_word): 11 | self.result = result 12 | self.raw_word = raw_word 13 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/api/results.py: -------------------------------------------------------------------------------- 1 | from flask_restful import Resource, Api, marshal_with, fields, abort 2 | from flask_restful_swagger import swagger 3 | 4 | @swagger.model 5 | class MorphologicalAnalyzeResult(object): 6 | """The result of a call to /hello""" 7 | resource_fields = { 8 | 'word': fields.String, 9 | 'value': fields.String, 10 | } 11 | 12 | def __init__(self, results): 13 | self.results = results 14 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/verb.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent one word beloging to grammar class classified as 'Verb' 3 | 4 | What's a Verb? 5 | === 6 | A verb, is a word (part of speech) that in syntax conveys an action (bring, read, walk), 7 | an occurrence (happen, become), or a state of being (be, exist, stand) 8 | """ 9 | 10 | from .word import Word 11 | 12 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use 13 | class Verb(Word): 14 | 15 | def has_plural(self): 16 | return True 17 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. sample documentation master file, created by 2 | sphinx-quickstart on Mon Apr 16 21:22:43 2012. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to sample's documentation! 7 | ================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | 23 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/noun.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent one word beloging to grammar class classified as 'Noun' 3 | 4 | What's a Noun? 5 | === 6 | A noun is a word(other than a pronoun) that functions as the name of some specific thing 7 | or set of things, such as living creatures, objects, places, actions, feelings... 8 | """ 9 | from .word import Word 10 | 11 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use 12 | class Noun(Word): 13 | 14 | def has_gender(self): 15 | return True 16 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/interjection.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent one word beloging to grammar class classified as 'Interjection' 3 | 4 | What's an Interjection? 5 | === 6 | In linguistics, an interjection is a word or expression that occurs as an utterance on its 7 | own and expresses a spontaneous feeling or reaction. 8 | """ 9 | 10 | from .word import Word 11 | 12 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use 13 | class Interjection(Word): 14 | 15 | def has_plural(self): 16 | return False 17 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/conjunction.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent one word beloging to grammar class classified as 'Conjuction' 3 | 4 | What's a Conjuction? 5 | === 6 | In grammar, a conjunction is a part of speech(a word) that connects words, phrases, or clauses 7 | that are called the conjuncts of the conjoining construction. 8 | """ 9 | 10 | from .word import Word 11 | 12 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use 13 | class Conjunction(Word): 14 | 15 | def has_plural(self): 16 | return False 17 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/pronoun.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent one word beloging to grammar class classified as 'Pronoun' 3 | 4 | What's a Pronoun? 5 | === 6 | In linguistics and grammar, a pronoun is a word that substitutes for a noun 7 | or noun phrase. 8 | Pronouns are often used to refer to a noun that has already been mentioned. 9 | """ 10 | 11 | from .word import Word 12 | 13 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use 14 | class Pronoun(Word): 15 | 16 | def has_gender(self): 17 | return True 18 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | init: 2 | pip install -r requirements.txt 3 | 4 | init_dev: 5 | pip install -r development_requirements.txt 6 | pip install -r test_requirements.txt 7 | pip install -r requirements.txt 8 | 9 | test: 10 | pytest tests --cov-config .coveragerc --cov=esperanto_analyzer --cov-report=html 11 | 12 | lint: 13 | pylint esperanto_analyzer/ --reports=n -f json > .lint_results 14 | 15 | formatted_lint: 16 | pylint esperanto_analyzer/ --reports=n -f json | pylint-json2html -o pylint.html 17 | 18 | web_api: 19 | python esperanto_analyzer/web/runserver.py 20 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/noun.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import re 3 | 4 | from esperanto_analyzer.speech import Noun 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 6 | 7 | class NounMorphologicalAnalyzer(BaseMorphologicalAnalyzer): 8 | # MATCHES: ["patro", "patroj", "patron", "patrojn"] 9 | # DONT MATCHES: ["io", "lo"] 10 | MATCH_REGEXP = re.compile('(^[a-zA-Zĉĝĵĥŝŭ]{2,}(o(j?n?)?)$)', re.IGNORECASE|re.UNICODE) 11 | 12 | 13 | @staticmethod 14 | def word_class(): 15 | return Noun 16 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/adjective.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import re 3 | 4 | from esperanto_analyzer.speech import Adjective 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 6 | 7 | class AdjectiveMorphologicalAnalyzer(BaseMorphologicalAnalyzer): 8 | # MATCHES: ["bela", "belaj", "belan", "belajn"] 9 | # DONT MATCHES: ["la"] => Article 10 | MATCH_REGEXP = re.compile('(^[a-zA-Zĉĝĵĥŝŭ]{2,}(a(j?n?)?)$)', re.IGNORECASE|re.UNICODE) 11 | 12 | @staticmethod 13 | def word_class(): 14 | return Adjective 15 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Make 'Parts of Speech' available through the namespace: `esperanto_analyzer.speech` 3 | Eg: `from esperanto_analyzer.speech import Word, Adjective` 4 | """ 5 | 6 | from .word import Word, NotContentError 7 | from .adverb import Adverb 8 | from .adjective import Adjective 9 | from .article import Article, InvalidArticleError 10 | from .conjunction import Conjunction 11 | from .interjection import Interjection 12 | from .noun import Noun 13 | from .numeral import Numeral 14 | from .preposition import Preposition 15 | from .pronoun import Pronoun 16 | from .verb import Verb 17 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/preposition.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent one word beloging to grammar class classified as 'Preposition' 3 | 4 | What's a Preposition? 5 | === 6 | A word governing, and usually preceding, a noun or pronoun and expressing a relation 7 | to another word or element in the clause. 8 | Prepositions are often used to express spatial or temporal relations (in, under, towards, before) 9 | """ 10 | 11 | from .word import Word 12 | 13 | # pylint: disable=too-few-public-methods,missing-docstring,no-self-use 14 | class Preposition(Word): 15 | 16 | def has_plural(self): 17 | return False 18 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/adjective.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent one word beloging to grammar class classified as 'Adjective' 3 | 4 | What's an Adjective? 5 | === 6 | A word belonging to one of the major form classes in any of numerous languages and typically serving 7 | as a modifier of a noun to denote a quality of the thing named, to indicate its quantity or extent, 8 | or to specify a thing as distinct from something else The word red in "the red car" is an adjective. 9 | """ 10 | 11 | from .word import Word 12 | 13 | # pylint: disable=too-few-public-methods,missing-docstring 14 | class Adjective(Word): 15 | pass 16 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/article.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import re 3 | 4 | from esperanto_analyzer.speech import Article 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 6 | 7 | class ArticleMorphologicalAnalyzer(BaseMorphologicalAnalyzer): 8 | ARTICLES_LIST = ['la'] 9 | 10 | ARTICLES_MATCH_REGEXP = re.compile('|'.join(ARTICLES_LIST), re.IGNORECASE|re.UNICODE) 11 | 12 | # MATCHES: ["la"] 13 | MATCH_REGEXP = re.compile('^(%s)$' % (ARTICLES_MATCH_REGEXP.pattern), re.IGNORECASE|re.UNICODE) 14 | 15 | @staticmethod 16 | def word_class(): 17 | return Article 18 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.web.api.server import create_app 9 | 10 | @pytest.fixture 11 | def app(): 12 | # create the app with common test config 13 | app = create_app().app 14 | app.config['TESTING'] = True 15 | 16 | yield app 17 | 18 | return app 19 | 20 | @pytest.fixture 21 | def client(app): 22 | """A test client for the app.""" 23 | return app.test_client() 24 | 25 | @pytest.fixture 26 | def runner(app): 27 | """A test runner for the app's Click commands.""" 28 | return app.test_cli_runner() 29 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/adverb.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent one word beloging to grammar class classified as 'Adverb' 3 | 4 | What's an Adverb? 5 | === 6 | A word or phrase that modifies or qualifies an adjective, verb, or other adverb or 7 | a word group, expressing a relation of place, time, circumstance, manner, cause, degree, etc. 8 | (e.g., now, yesterday, today, gently, quite, then, there). 9 | """ 10 | 11 | from .word import Word 12 | 13 | # pylint: disable=too-few-public-methods,missing-docstring 14 | class Adverb(Word): 15 | 16 | def has_plural(self): 17 | """ 18 | Adverbs are invariable 19 | """ 20 | return False 21 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/README.md: -------------------------------------------------------------------------------- 1 | Esperanto Analyzer WEB API 2 | ================= 3 | 4 | Usage 5 | ----- 6 | 7 | Clone the repo: 8 | (...) 9 | 10 | Create virtualenv: 11 | 12 | virtualenv venv 13 | source venv/bin/activate 14 | pip install -r requirements.txt 15 | python setup.py develop # or install if you prefer 16 | 17 | Run the sample server 18 | 19 | python runserver.py 20 | 21 | Try the endpoints: 22 | 23 | curl -XGET http://localhost:5000/analyze?sentence=Mia%20nomo%20estas%20Rafaelo%20kaj%20mi%20venas%20el%20Brazilo 24 | 25 | 26 | Swagger docs available at `http://localhost:5000/api/spec.html` 27 | 28 | 29 | License 30 | ------- 31 | 32 | MIT, see LICENSE file 33 | 34 | -------------------------------------------------------------------------------- /tests/speech/test_verb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, Verb, NotContentError 8 | 9 | class TestVerbBasic(): 10 | def test_import(self): 11 | assert(Verb) 12 | 13 | def test_init(self): 14 | assert(Verb('esti') != None) 15 | 16 | def test_superclass(self): 17 | assert(issubclass(Verb, Word)) 18 | 19 | def test_valid_content(self): 20 | assert(Verb('esti')) 21 | 22 | def test_invalid_content(self): 23 | with pytest.raises(NotContentError): 24 | assert(Verb('')) 25 | 26 | def test_content(self): 27 | word = Verb('content') 28 | 29 | assert(word.content == 'content') 30 | 31 | def test_metadata_exists(self): 32 | word = Verb(' ') 33 | 34 | assert(word.metadata == dict()) 35 | -------------------------------------------------------------------------------- /tests/speech/test_numeral.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, Numeral, NotContentError 8 | 9 | class TestNumeralBasic(): 10 | def test_import(self): 11 | assert(Numeral) 12 | 13 | def test_init(self): 14 | assert(Numeral('10') != None) 15 | 16 | def test_superclass(self): 17 | assert(issubclass(Numeral, Word)) 18 | 19 | def test_valid_content(self): 20 | assert(Numeral('dek')) 21 | 22 | def test_invalid_content(self): 23 | with pytest.raises(NotContentError): 24 | assert(Numeral('')) 25 | 26 | def test_content(self): 27 | word = Numeral('du dek') 28 | 29 | assert(word.content == 'du dek') 30 | 31 | def test_metadata_exists(self): 32 | word = Numeral(' ') 33 | 34 | assert(word.metadata == dict()) 35 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_analyze_result.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,missing-docstring 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.analyzers.morphological import AnalyzeResult 8 | 9 | class TestAdjectiveBasic(): 10 | TEST_WORD = 'kapo' 11 | 12 | def test_import(self): 13 | assert AnalyzeResult 14 | 15 | def test_init(self): 16 | assert AnalyzeResult(result=None, raw_word=None) is not None 17 | 18 | def test_result(self): 19 | analyze_result = AnalyzeResult(dict(some='object'), raw_word=self.TEST_WORD) 20 | 21 | assert analyze_result.result == dict(some='object') 22 | 23 | def test_raw_word(self): 24 | analyze_result = AnalyzeResult(dict(some='object'), raw_word=self.TEST_WORD) 25 | 26 | assert analyze_result.raw_word == self.TEST_WORD 27 | -------------------------------------------------------------------------------- /tests/speech/test_adjective.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, Adjective, NotContentError 8 | 9 | class TestAdjectiveBasic(): 10 | def test_import(self): 11 | assert(Adjective) 12 | 13 | def test_init(self): 14 | assert(Adjective('bela') != None) 15 | 16 | def test_superclass(self): 17 | assert(issubclass(Adjective, Word)) 18 | 19 | def test_valid_content(self): 20 | assert(Adjective('bela')) 21 | 22 | def test_invalid_content(self): 23 | with pytest.raises(NotContentError): 24 | assert(Adjective('')) 25 | 26 | def test_content(self): 27 | word = Adjective('content') 28 | 29 | assert(word.content == 'content') 30 | 31 | def test_metadata_exists(self): 32 | word = Adjective(' ') 33 | 34 | assert(word.metadata == dict()) 35 | -------------------------------------------------------------------------------- /tests/speech/test_conjunction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, Conjunction, NotContentError 8 | 9 | class TestConjunctionBasic(): 10 | def test_import(self): 11 | assert(Conjunction) 12 | 13 | def test_init(self): 14 | assert(Conjunction('kaj') != None) 15 | 16 | def test_superclass(self): 17 | assert(issubclass(Conjunction, Word)) 18 | 19 | def test_valid_content(self): 20 | assert(Conjunction('kaj')) 21 | 22 | def test_invalid_content(self): 23 | with pytest.raises(NotContentError): 24 | assert(Conjunction('')) 25 | 26 | def test_content(self): 27 | word = Conjunction('content') 28 | 29 | assert(word.content == 'content') 30 | 31 | def test_metadata_exists(self): 32 | word = Conjunction(' ') 33 | 34 | assert(word.metadata == dict()) 35 | -------------------------------------------------------------------------------- /tests/speech/test_preposition.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, Preposition, NotContentError 8 | 9 | class TestPrepositionBasic(): 10 | def test_import(self): 11 | assert(Preposition) 12 | 13 | def test_init(self): 14 | assert(Preposition('post') != None) 15 | 16 | def test_superclass(self): 17 | assert(issubclass(Preposition, Word)) 18 | 19 | def test_valid_content(self): 20 | assert(Preposition('post')) 21 | 22 | def test_invalid_content(self): 23 | with pytest.raises(NotContentError): 24 | assert(Preposition('')) 25 | 26 | def test_content(self): 27 | word = Preposition('content') 28 | 29 | assert(word.content == 'content') 30 | 31 | def test_metadata_exists(self): 32 | word = Preposition(' ') 33 | 34 | assert(word.metadata == dict()) 35 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Make 'Parts of Speech' available through the namespace: `esperanto_analyzer.analyzers.morphological` 3 | Eg: `from esperanto_analyzer.analyzers.morphological import AdverbMorphologicalAnalyzer` 4 | """ 5 | 6 | from .base import BaseMorphologicalAnalyzer 7 | 8 | from .adverb import AdverbMorphologicalAnalyzer 9 | from .adjective import AdjectiveMorphologicalAnalyzer 10 | from .article import ArticleMorphologicalAnalyzer 11 | from .conjunction import ConjunctionMorphologicalAnalyzer 12 | from .interjection import InterjectionMorphologicalAnalyzer 13 | from .noun import NounMorphologicalAnalyzer 14 | from .numeral import NumeralMorphologicalAnalyzer 15 | from .preposition import PrepositionMorphologicalAnalyzer 16 | from .pronoun import PronounMorphologicalAnalyzer 17 | from .verb import VerbMorphologicalAnalyzer 18 | 19 | from .analyze_result import AnalyzeResult 20 | -------------------------------------------------------------------------------- /tests/speech/test_interjection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, Interjection, NotContentError 8 | 9 | class TestInterjectionBasic(): 10 | def test_import(self): 11 | assert(Interjection) 12 | 13 | def test_init(self): 14 | assert(Interjection('ek!') != None) 15 | 16 | def test_superclass(self): 17 | assert(issubclass(Interjection, Word)) 18 | 19 | def test_valid_content(self): 20 | assert(Interjection('ek!')) 21 | 22 | def test_invalid_content(self): 23 | with pytest.raises(NotContentError): 24 | assert(Interjection('')) 25 | 26 | def test_content(self): 27 | word = Interjection('content') 28 | 29 | assert(word.content == 'content') 30 | 31 | def test_metadata_exists(self): 32 | word = Interjection(' ') 33 | 34 | assert(word.metadata == dict()) 35 | -------------------------------------------------------------------------------- /tests/speech/test_noun.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, Noun, NotContentError 8 | 9 | class TestNounBasic(): 10 | def test_import(self): 11 | assert(Noun) 12 | 13 | def test_init(self): 14 | assert(Noun('lingvo') != None) 15 | 16 | def test_superclass(self): 17 | assert(issubclass(Noun, Word)) 18 | 19 | def test_valid_content(self): 20 | assert(Noun('lingvo')) 21 | 22 | def test_invalid_content(self): 23 | with pytest.raises(NotContentError): 24 | assert(Noun('')) 25 | 26 | def test_content(self): 27 | word = Noun('content') 28 | 29 | assert(word.content == 'content') 30 | 31 | def test_metadata_exists(self): 32 | word = Noun(' ') 33 | 34 | assert(word.metadata == dict()) 35 | 36 | 37 | class TestNounGender(): 38 | def test_has_gender(self): 39 | assert(Noun('lingvo').has_gender()) 40 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | end_of_line = lf 11 | charset = utf-8 12 | 13 | # Docstrings and comments use max_line_length = 79 14 | [*.py] 15 | max_line_length = 119 16 | 17 | # Use 2 spaces for the HTML files 18 | [*.html] 19 | indent_size = 2 20 | 21 | # The JSON files contain newlines inconsistently 22 | [*.json] 23 | indent_size = 2 24 | insert_final_newline = ignore 25 | 26 | [**/admin/js/vendor/**] 27 | indent_style = ignore 28 | indent_size = ignore 29 | 30 | # Minified JavaScript files shouldn't be changed 31 | [**.min.js] 32 | indent_style = ignore 33 | insert_final_newline = ignore 34 | 35 | # Makefiles always use tabs for indentation 36 | [Makefile] 37 | indent_style = tab 38 | 39 | # Batch files use tabs for indentation 40 | [*.bat] 41 | indent_style = tab 42 | 43 | [docs/**.txt] 44 | max_line_length = 79 45 | -------------------------------------------------------------------------------- /tests/speech/test_pronoun.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, Pronoun, NotContentError 8 | 9 | class TestPronounBasic(): 10 | def test_import(self): 11 | assert(Pronoun) 12 | 13 | def test_init(self): 14 | assert(Pronoun('mi') != None) 15 | 16 | def test_superclass(self): 17 | assert(issubclass(Pronoun, Word)) 18 | 19 | def test_valid_content(self): 20 | assert(Pronoun('mi')) 21 | 22 | def test_invalid_content(self): 23 | with pytest.raises(NotContentError): 24 | assert(Pronoun('')) 25 | 26 | def test_content(self): 27 | word = Pronoun('content') 28 | 29 | assert(word.content == 'content') 30 | 31 | def test_metadata_exists(self): 32 | word = Pronoun(' ') 33 | 34 | assert(word.metadata == dict()) 35 | 36 | 37 | class TestPronounGender(): 38 | def test_has_gender(self): 39 | assert(Pronoun('he').has_gender()) 40 | -------------------------------------------------------------------------------- /TODO.todo: -------------------------------------------------------------------------------- 1 | ☐ Better split strategy for sentence received by MorphologicalSentenceAnalyzer @high 2 | ☐ Collect some sample data for all kinds of 'Part of Speech' to proper test each analyzer @high 3 | ☐ Test all possives cases for acusative, plural and pontuaction in the beginning and end of words(eg: multe., multe!, multe?) @high 4 | ☐ Improve Tests Suite (add pytest fixtures, remove duplicated code, better inputs) @medium 5 | ☐ Add Coveralls (It is not working with private repositories...) @low 6 | ☐ Publish this module to PyPi(Python Package Index) @pipy @medium 7 | ☐ Separate web and cli in others modules published at Pypi @pypi @medium 8 | ☐ 100% lint warning solved @lint @standard @low 9 | ☐ Better documentation for classes @doc @low 10 | ☐ Normalize all Regexp matching groups @standard @medium 11 | ☐ More sofisticated way of spliting the sentence in MorphologicalSentenceAnalyzer. (Keeping the pontuaction but really spliting in words, eg: ['Mia nomo, estas Adamo!'] => ['Mia', 'nomo', ',', 'estas', 'Adamo', '!']) 12 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Rafael Fidelis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/interjection.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import re 3 | 4 | from esperanto_analyzer.speech import Interjection 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 6 | 7 | class InterjectionMorphologicalAnalyzer(BaseMorphologicalAnalyzer): 8 | INTERJECTIONS_LIST = ['Aĥ!', 'Aj!', 'Ba!', 'Baf!', 'Baj!', 'Be!', 'Bis!', 'Diable!', 'Ek!', 9 | 'Fi!', 'Fu!', 'Ĝis!', 'Ha!', 'Ha lo!', 'He!', 'Hej!', 'Ho!', 'Ho ve!', 10 | 'Hoj!', 'Hola!', 'Hu!', 'Hup!', 'Hura!', 'Lo!', 'Lu lu!', 'Nu!', 'Uf!', 11 | 'Up!', 'Ŭa!', 'Ve!', 'Volapukaĵo!', 'Jen' 12 | ] 13 | 14 | # Shared regexp flags 15 | RE_FLAGS = re.IGNORECASE|re.UNICODE 16 | 17 | # REGEXP: `/Aĥ!|'Aj!|'Ba!|'Baf!|'Baj!(...)/` 18 | INTERJECTIONS_MATCH_REGEXP = re.compile('|'.join(INTERJECTIONS_LIST), RE_FLAGS) 19 | 20 | # MATCHES only elements in `INTERJECTIONS_LIST` 21 | MATCH_REGEXP = re.compile('^(%s)$' % (INTERJECTIONS_MATCH_REGEXP.pattern), RE_FLAGS) 22 | 23 | @staticmethod 24 | def word_class(): 25 | return Interjection 26 | -------------------------------------------------------------------------------- /tests/speech/test_adverb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, Adverb, NotContentError 8 | 9 | class TestAdverbBasic(): 10 | def test_import(self): 11 | assert(Adverb) 12 | 13 | def test_init(self): 14 | assert(Adverb('multe') != None) 15 | 16 | def test_superclass(self): 17 | assert(issubclass(Adverb, Word)) 18 | 19 | def test_valid_content(self): 20 | assert(Adverb('multe')) 21 | 22 | def test_invalid_content(self): 23 | with pytest.raises(NotContentError): 24 | assert(Adverb('')) 25 | 26 | def test_content(self): 27 | word = Adverb('content') 28 | 29 | assert(word.content == 'content') 30 | 31 | def test_metadata_exists(self): 32 | word = Adverb(' ') 33 | 34 | assert(word.metadata == dict()) 35 | 36 | 37 | class TestAdverbPlural(): 38 | def test_has_plural(self): 39 | assert(Adverb('multe').has_plural() == False) 40 | 41 | def test_plural(self): 42 | word = Adverb('multe') 43 | 44 | assert(word.plural == False) 45 | 46 | def test_plural_ending_word(self): 47 | for word in ['multaj', 'multajn']: 48 | assert(Adverb(word).plural == False) 49 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/conjunction.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import re 3 | 4 | from esperanto_analyzer.speech import Conjunction 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 6 | 7 | class ConjunctionMorphologicalAnalyzer(BaseMorphologicalAnalyzer): 8 | CONJUNCTIONS_LIST = [ 9 | 'antaŭ kiam', 10 | 'antaŭ ol' 11 | 'au', 12 | 'aŭ', 13 | 'ĉar', 14 | 'ĉu', 15 | 'K', 16 | 'k', 17 | 'kaj', 18 | 'kaŭ', 19 | 'ke', 20 | 'kial', 21 | 'kiam', 22 | 'kie', 23 | 'kiel', 24 | 'kune kun', 25 | 'kvankam', 26 | 'kvazau', 27 | 'kvazaŭ', 28 | 'minus', 29 | 'nek', 30 | 'ol', 31 | 'plus', 32 | 'se', 33 | 'sed', 34 | 'tial' 35 | ] 36 | 37 | RE_FLAGS = re.IGNORECASE|re.UNICODE 38 | 39 | CONJUCTIONS_MATCH_REGEXP = re.compile('|'.join(CONJUNCTIONS_LIST), RE_FLAGS) 40 | 41 | # MATCHES only elements in `CONJUNCTIONS_LIST` 42 | MATCH_REGEXP = re.compile('^(%s)$' % (CONJUCTIONS_MATCH_REGEXP.pattern), RE_FLAGS) 43 | 44 | @staticmethod 45 | def word_class(): 46 | return Conjunction 47 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/base.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | class BaseMorphologicalAnalyzer: 3 | # OVERWRITING THIS PROPERTY IS REQUIRED FOR ALL SUBCLASSES 4 | MATCH_REGEXP = None 5 | 6 | def __init__(self, raw_word, options=None): 7 | # Python dicts() as default argument is not a great idea since Python don't 8 | # creates a new version of the default argument in every method call 9 | if options is None: 10 | options = dict() 11 | 12 | self.options = options 13 | self.raw_word = raw_word 14 | self.word = None 15 | self.matches = None 16 | self.processed = False 17 | 18 | def match(self): 19 | return self.MATCH_REGEXP.match(self.raw_word) 20 | 21 | def analyze(self): 22 | if self.processed is True: 23 | return None 24 | 25 | matches = self.match() 26 | 27 | # Set as the first time runned 28 | self.processed = True 29 | 30 | if matches: 31 | self.word = self.word_class()(self.raw_word) 32 | self.matches = matches 33 | 34 | return True 35 | 36 | return False 37 | 38 | @staticmethod 39 | def word_class(): 40 | raise NotImplementedError 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, Rafael Fidelis 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 6 | 7 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 8 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 10 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/api/server.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request 2 | from flask_cors import CORS 3 | 4 | from flask_restful import Resource, Api, marshal_with, fields, abort 5 | 6 | from flask_restful_swagger import swagger 7 | from esperanto_analyzer.web.api import MorphologicalAnalyzeEndpoint 8 | 9 | API_VERSION_NUMBER = '1.0' 10 | API_VERSION_LABEL = 'v1' 11 | 12 | class MorphologicalAnalyzesAPI(object): 13 | 14 | def __init__(self): 15 | self.create_app() 16 | 17 | 18 | def create_app(self): 19 | self.app = Flask(__name__) 20 | CORS(self.app) 21 | 22 | custom_errors = { 23 | 'SentenceInvalidError': { 24 | 'status': 500, 25 | 'message': 'Sentence format not valid' 26 | }, 27 | 'SentenceRequiredError': { 28 | 'status': 400, 29 | 'message': 'Sentence not provided' 30 | } 31 | } 32 | 33 | self.api = swagger.docs(Api(self.app, errors=custom_errors), apiVersion=API_VERSION_NUMBER) 34 | 35 | self.api.add_resource(MorphologicalAnalyzeEndpoint, '/analyze', endpoint='analyze') 36 | 37 | return self.app 38 | 39 | def run(self, *args, **kwargs): # pragma: no cover 40 | self.app.config['PROPAGATE_EXCEPTIONS'] = False 41 | self.app.run(*args, **kwargs) 42 | 43 | 44 | def create_app(): 45 | return MorphologicalAnalyzesAPI() 46 | 47 | def run_app(*args, **kwargs): # pragma: no cover 48 | app = create_app() 49 | app.run(*args, **kwargs) 50 | 51 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/preposition.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import re 3 | 4 | from esperanto_analyzer.speech import Preposition 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 6 | 7 | class PrepositionMorphologicalAnalyzer(BaseMorphologicalAnalyzer): 8 | PREPOSITIONS_LIST = ['K', 'al', 'anstataŭ', 'antaŭ', 'antaŭ ol', 'apud', 'da', 'de', 'disde', 9 | 'du vortoj', 'dum', 'ekde', 'ekster', 'eksteren', 'el', 'en', 'ene', 10 | 'estiel', 'far', 'fare de', 'flanke de', 'for de', 'graŭ', 'inter', 'je', 11 | 'kaj ankaŭ', 'kiel', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ', 12 | 'mala', 'malantaŭ', 'malgraŭ', 'malkiel', 'malsupre de', 'malsupren', 13 | 'meze de', 'na', 'nome de', 'ol', 'per', 'pere de', 'plus', 'po', 'por', 14 | 'post', 'preter', 'pri', 'pro', 'proksime de', 'samkiel', 'sed', 'sekva', 15 | 'sen', 'sub', 'suben', 'super', 'supren', 'sur', 'tiu', 'tiuj', 'tra', 16 | 'trans', 'tri vortoj', 'tuj post', 'tutĉirkaŭ', 17 | 'ĉe', 'ĉi tiu', 'ĉi tiuj', 'ĉirkaŭ', 'ĝis'] 18 | 19 | PROPOSITIONS_MATCH_REGEXP = re.compile('|'.join(PREPOSITIONS_LIST), re.IGNORECASE|re.UNICODE) 20 | 21 | # MATCHES only elements in `PREPOSITIONS_LIST` 22 | MATCH_REGEXP = re.compile('^(%s)$' % (PROPOSITIONS_MATCH_REGEXP.pattern), re.IGNORECASE|re.UNICODE) 23 | 24 | @staticmethod 25 | def word_class(): 26 | return Preposition 27 | -------------------------------------------------------------------------------- /tests/speech/test_word.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, NotContentError 8 | 9 | class TestWordBasic(): 10 | def test_import(self): 11 | assert(Word) 12 | 13 | def test_init(self): 14 | assert(Word(' ') != None) 15 | 16 | def test_valid_content(self): 17 | assert(Word('a')) 18 | 19 | def test_invalid_content(self): 20 | with pytest.raises(NotContentError): 21 | assert(Word('')) 22 | 23 | def test_content(self): 24 | word = Word('content') 25 | 26 | assert(word.content == 'content') 27 | 28 | def test_metadata_exists(self): 29 | word = Word(' ') 30 | 31 | assert(word.metadata == dict()) 32 | 33 | 34 | class TestValidWordPlural(): 35 | def test_esperanto_words(self): 36 | for word in ['kaj', 'ajn']: 37 | assert(Word(word).plural == False) 38 | 39 | def test_plural_without_acusative(self): 40 | word = Word('domoj') 41 | 42 | assert(word.plural == True) 43 | 44 | def test_plural_with_acusative(self): 45 | word = Word('domojn') 46 | 47 | assert(word.plural == True) 48 | 49 | class TestInvalidWordPlural(): 50 | def test_plural_without_acusative(self): 51 | word = Word('domo') 52 | 53 | assert(word.plural == False) 54 | 55 | def test_plural_with_acusative(self): 56 | word = Word('domon') 57 | 58 | assert(word.plural == False) 59 | 60 | class TestPluralWord(Word): 61 | def has_plural(self): 62 | return False 63 | 64 | class TestValidWordDontHasPlural: 65 | def test_valid_word_plural(self): 66 | word = TestPluralWord('multe') 67 | 68 | assert(word.plural == False) 69 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/numeral.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import re 3 | 4 | from esperanto_analyzer.speech import Numeral 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 6 | 7 | class NumeralMorphologicalAnalyzer(BaseMorphologicalAnalyzer): 8 | # These are the basics numbers names in Esperanto 9 | BASIC_NUMBERS_LIST = [ 10 | 'nul', # 0 11 | 'unu', # 1 12 | 'du', # 2 13 | 'tri', # 3 14 | 'kvar', # 4 15 | 'kvin', # 5 16 | 'ses', # 6 17 | 'sep', # 7 18 | 'ok', # 8 19 | 'naŭ', # 9 20 | 'dek' # 10 21 | ] 22 | 23 | # Shared regexp flags 24 | RE_FLAGS = re.IGNORECASE|re.UNICODE 25 | 26 | # TODO: Should this be dynamic? 27 | # Basically: `re.compile('(nul|unu|du|tri|kvar|kvin|ses|sep|ok|naŭ|dek)')` 28 | BASIC_NUMBERS_REGEXP = re.compile('|'.join(BASIC_NUMBERS_LIST), RE_FLAGS) 29 | 30 | # TODO: This still matches "unudek", solve it! 31 | # MATCHES: ["tridek", "okdek", "kvin", "sepcent", "tri miliono"] 32 | OTHERS_NUMBERS_REGEXP = re.compile('(unu|du|tri|kvar|kvin|ses|sep|ok|naŭ|dek)?(dek|cent|milionoj|miliono|miliardoj|miliardo|bilionoj|biliono|mil)', RE_FLAGS) 33 | 34 | # MATCHES: ["1", "100", "-123", "9009809809", "-90123283232"] 35 | NUMBERS_DIGIT_REGEXP = re.compile('-?\d+', re.UNICODE) 36 | 37 | # Join regexps to create the final pattern utilized for this analyzer 38 | FINAL_REGEXP = '^(%s|%s|%s)$' % (NUMBERS_DIGIT_REGEXP.pattern, BASIC_NUMBERS_REGEXP.pattern, OTHERS_NUMBERS_REGEXP.pattern) 39 | 40 | # The final regexp utilized internally in `match()` 41 | MATCH_REGEXP = re.compile(FINAL_REGEXP, RE_FLAGS) 42 | 43 | @staticmethod 44 | def word_class(): 45 | return Numeral 46 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/adverb.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import re 3 | 4 | from esperanto_analyzer.speech import Adverb 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 6 | 7 | class AdverbMorphologicalAnalyzer(BaseMorphologicalAnalyzer): 8 | # MATCHES: ["multe", "flanke", "rapide"] 9 | BASE_MATCH_REGEXP = re.compile('([a-zA-Zĉĝĵĥŝŭ]{2,}(e))', re.IGNORECASE|re.UNICODE) 10 | 11 | # Some specials Esperanto adverbs and time related adverbs(now, yesterday, etc) list 12 | # @see https://www.wikiwand.com/en/Special_Esperanto_adverbs 13 | # @see: http://mylanguages.org/esperanto_adverbs.php 14 | SPECIAL_ADVERBS = [ 15 | 'almenaŭ', 16 | 'ambaŭ', 17 | 'antaŭ', 18 | 'ankaŭ', 19 | 'ankoraŭ', 20 | 'apenaŭ', 21 | 'baldaŭ', 22 | 'ĉirkaŭ', 23 | 'hieraŭ', 24 | 'hodiaŭ', 25 | 'kvazaŭ', 26 | 'morgaŭ', 27 | 'preskaŭ', 28 | 'nun', 29 | 'tiam', 30 | 'ĉiam', 31 | 'neniam', 32 | 'tuj', 33 | 'jam', 34 | 'tie', 35 | 'tien', 36 | 'ĉie', 37 | 'nenie', 38 | 'for', 39 | 'eksteren', 40 | 'tre', 41 | ] 42 | 43 | # Create one regexp joining all the special adverbs 44 | SPECIAL_ADVERBS_MATCH_REGEXP = re.compile('|'.join(SPECIAL_ADVERBS), re.IGNORECASE|re.UNICODE) 45 | 46 | # Creates one string representation of the final `MATCH_REGEXP` joining two regexps 47 | FINAL_REGEXP = '^(%s|%s)$' % (BASE_MATCH_REGEXP.pattern, SPECIAL_ADVERBS_MATCH_REGEXP.pattern) 48 | 49 | # Finally create the FINAL regexp joining all the regexp need to match Adverbs 50 | MATCH_REGEXP = re.compile(FINAL_REGEXP, re.IGNORECASE|re.UNICODE) 51 | 52 | @staticmethod 53 | def word_class(): 54 | return Adverb 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use,invalid-name 2 | 3 | # Learn more: https://github.com/kennethreitz/setup.py 4 | 5 | from setuptools import setup, find_packages 6 | 7 | 8 | with open('README.md') as f: 9 | readme = f.read() 10 | 11 | with open('LICENSE') as f: 12 | lib_license = f.read() 13 | 14 | with open('requirements.txt') as fd: 15 | requirements = [line.rstrip() for line in fd] 16 | 17 | with open('test_requirements.txt') as fd: 18 | test_requirements = [line.rstrip() for line in fd] 19 | 20 | setup( 21 | name='esperanto-analyzer', 22 | version='0.0.3', 23 | description='Morphological and syntactic analysis of Esperanto sentences.', 24 | long_description=readme, 25 | author='Rafael Fidelis', 26 | author_email='rafaelfid3lis@gmail.com', 27 | url='https://github.com/fidelisrafael/esperanto-analyzer', 28 | license=lib_license, 29 | packages=find_packages(exclude=('tests', 'docs')), 30 | install_requires=requirements, 31 | tests_require=test_requirements, 32 | classifiers=[ 33 | 'Programming Language :: Python :: 3.7', 34 | "Programming Language :: Python :: 3", 35 | 'Intended Audience :: Developers', 36 | 'Intended Audience :: Education', 37 | 'Intended Audience :: Information Technology', 38 | 'Intended Audience :: Science/Research', 39 | 'Topic :: Scientific/Engineering', 40 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 41 | 'Topic :: Scientific/Engineering :: Human Machine Interfaces', 42 | 'Topic :: Scientific/Engineering :: Information Analysis', 43 | 'Topic :: Text Processing', 44 | 'Topic :: Text Processing :: Filters', 45 | 'Topic :: Text Processing :: General', 46 | 'Topic :: Text Processing :: Indexing', 47 | 'Topic :: Text Processing :: Linguistic', 48 | "License :: OSI Approved :: MIT License", 49 | "Operating System :: OS Independent", 50 | ] 51 | ) 52 | -------------------------------------------------------------------------------- /tests/speech/test_article.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from context import esperanto_analyzer 6 | 7 | from esperanto_analyzer.speech import Word, Article 8 | from esperanto_analyzer.speech import NotContentError, InvalidArticleError 9 | 10 | class TestArticleBasic(): 11 | def test_import(self): 12 | assert(Article) 13 | 14 | def test_init(self): 15 | assert(Article('la') != None) 16 | 17 | def test_superclass(self): 18 | assert(issubclass(Article, Word)) 19 | 20 | def test_valid_content(self): 21 | assert(Article('la')) 22 | 23 | def test_invalid_content(self): 24 | with pytest.raises(NotContentError): 25 | assert(Article('')) 26 | 27 | def test_content(self): 28 | word = Article('la') 29 | 30 | assert(word.content == 'la') 31 | 32 | def test_metadata_exists(self): 33 | word = Article('la') 34 | 35 | assert(word.metadata == dict()) 36 | 37 | class TestInvalidArticleContent(): 38 | def test_empty_content(self): 39 | with pytest.raises(NotContentError): 40 | assert(Article('')) 41 | 42 | def test_whitespace_content(self): 43 | with pytest.raises(InvalidArticleError): 44 | assert(Article(' ')) 45 | 46 | def test_invalid_content(self): 47 | for word in [' ', 'lo', 'en', 'laj']: 48 | with pytest.raises(InvalidArticleError): 49 | assert(Article(word)) 50 | 51 | 52 | class TestArticlePlural(): 53 | def test_has_plural(self): 54 | assert(Article('la').has_plural()) 55 | 56 | def test_plural(self): 57 | word = Article('la') 58 | 59 | assert(word.plural == False) 60 | 61 | def test_plural_with_plural_context(self): 62 | for word in ['ĉevaloj', 'ĉevalojn', 'belaj', 'belajn']: 63 | article = Article('la', context = word) 64 | assert(article.plural == True) 65 | 66 | def test_plural_with_singular_context(self): 67 | for word in ['ĉevalo', 'ĉevalon', 'bela', 'belan', 'multe', 'tre']: 68 | article = Article('la', context = word) 69 | assert(article.plural == False) 70 | -------------------------------------------------------------------------------- /esperanto_analyzer/cli/cli.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import sys 3 | import tableprint 4 | 5 | from esperanto_analyzer import MorphologicalSentenceAnalyzer 6 | 7 | class AnalyzerNotProcessedError(BaseException): 8 | pass 9 | 10 | class CLI(): 11 | COLORS = { 12 | 'Adjective': 92, # Light Green 13 | 'Adverb': 32, # Green 14 | 'Article': 33, # Yellow 15 | 'Conjunction': 35, # Magenta 16 | 'Interjection': 95, # Light Magenta 17 | 'Noun': 34, # Blue 18 | 'Numeral': 93, # Light Yellow 19 | 'Preposition': 36, # Cian 20 | 'Pronoun': 96, # Light Cian 21 | 'Verb': 31, # Red 22 | 'Undefined': 30 # Black 23 | } 24 | 25 | OUTPUT_TABLE_HEADERS = ['Word', 'Part of Speech'] 26 | 27 | @staticmethod 28 | def run(input_sentence=None, output=sys.stdout): 29 | analyzer = MorphologicalSentenceAnalyzer(input_sentence) 30 | analyzer.analyze() 31 | 32 | CLI.display_output_for_analyzer(analyzer, output=output) 33 | 34 | @staticmethod 35 | def display_output_for_analyzer(analyzer, output=sys.stdout): 36 | if analyzer.processed is False: 37 | raise AnalyzerNotProcessedError('Analyzer must be processed before output display. You must call `analyze()` in your instance') 38 | 39 | CLI.print_results(analyzer.simple_results(), output=output) 40 | 41 | @staticmethod 42 | def format_table_data(results, colorize=True): 43 | out_data = [] 44 | 45 | format_color = lambda string, cname: ('\x1b[%sm%s \x1b[0m') % (CLI.COLORS[cname], string) 46 | 47 | for result in results: 48 | out_data.append([ 49 | format_color(result[0], result[1]) if colorize else result[0], 50 | format_color(result[1], result[1]) if colorize else result[1] 51 | ]) 52 | 53 | return out_data 54 | 55 | @staticmethod 56 | def print_results(results, width=15, output=sys.stdout): 57 | table_data = CLI.format_table_data(results) 58 | 59 | return tableprint.table(table_data, CLI.OUTPUT_TABLE_HEADERS, width=width, out=output) 60 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/article.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent one word beloging to grammar class classified as 'Article' 3 | 4 | What's an Article? 5 | === 6 | An article is a word used to modify a noun, which is a person, place, object, or idea. 7 | Technically, an article is an **adjective**, which is any word that modifies a noun. 8 | 9 | TODO: Should Article inherits from `Adjective`? 10 | """ 11 | 12 | from .word import Word 13 | 14 | # pylint: disable=too-few-public-methods,missing-docstring 15 | class Article(Word): 16 | # All the articles existents in Esperanto, just the definite article: 'la' 17 | # This article is invariable. 18 | # eg: 19 | # "La suno brilas" -> "The sun shines" [Singular] 20 | # "La homoj kuiras" -> "The people cook" [Plural] 21 | VALID_ARTICLES = ['la'] 22 | 23 | def has_plural(self): 24 | """ 25 | Articles are ALWAYS written as: "la" but they can be in plural 26 | """ 27 | return True 28 | 29 | 30 | def _match_plural(self, context=None): 31 | """ 32 | Depending of the `context` the meaning of the article "la" can change, eg: 33 | 34 | "la suno" => # Singular 35 | "la homoj" => # Plurar 36 | 37 | So if this method receives `context`(which is basically a string), it can 38 | foreseen if the article is in singular or plural, eg: 39 | 40 | > Article('la')._match_plural(None) # => False 41 | > Article('la')._match_plural('domo') # => False 42 | > Article('la')._match_plural('domoj') # => True 43 | """ 44 | if context: 45 | return Word(context).plural 46 | 47 | return False 48 | 49 | def _validate_content(self, content): 50 | Word._validate_content(self, content) 51 | 52 | 53 | # Since Esperanto ONLY HAS 1 article("la"), we make sure to validate if the current 54 | # instance is really representing one valid Esperanto article. 55 | if not content.lower() in self.VALID_ARTICLES: 56 | raise InvalidArticleError 57 | 58 | 59 | class InvalidArticleError(Exception): 60 | """ 61 | This Exception is raised when Article is created with invalid content. 62 | Eg: Article('lo') # raise InvalidArticleError 63 | """ 64 | pass 65 | -------------------------------------------------------------------------------- /esperanto_analyzer/web/api/morphological_endpoint.py: -------------------------------------------------------------------------------- 1 | from flask import request, Response 2 | from flask_restful import Resource, Api, marshal_with, fields, abort 3 | from flask_restful_swagger import swagger 4 | from .results import MorphologicalAnalyzeResult 5 | from .errors import SentenceRequiredError 6 | from .errors import SentenceInvalidError 7 | 8 | from esperanto_analyzer import MorphologicalSentenceAnalyzer 9 | from esperanto_analyzer.cli import CLI 10 | 11 | class MorphologicalAnalyzeEndpoint(Resource): 12 | @swagger.operation( 13 | responseClass=MorphologicalAnalyzeResult.__name__, 14 | nickname='analyzes', 15 | responseMessages=[ 16 | {"code": 400, "message": "Input required"}, 17 | {"code": 500, "message": "JSON format not valid"}, 18 | ], 19 | parameters=[ 20 | { 21 | "name": "sentence", 22 | "description": "The esperanto sentence to be analyzed morphologically", 23 | "required": True, 24 | "allowMultiple": False, 25 | "dataType": "string", 26 | "paramType": "query" 27 | }, 28 | ]) 29 | @marshal_with(MorphologicalAnalyzeResult.resource_fields) 30 | def get(self): 31 | """Return a MorphologicalAnalyzeResult object""" 32 | sentence = request.args['sentence'] 33 | 34 | if not sentence: 35 | raise SentenceRequiredError() 36 | 37 | analyzer = MorphologicalSentenceAnalyzer(sentence=sentence) 38 | analyzer.analyze() 39 | 40 | return self._format_results(analyzer.results()) 41 | 42 | def options(self): 43 | response = Response('{}') 44 | response.headers['Content-Type'] = 'application/json' 45 | response.headers['Access-Control-Allow-Origin'] = '*' 46 | response.headers['Access-Control-Allow-Headers'] = '*' 47 | response.headers['Access-Control-Allow-Method'] = 'POST, GET, OPTIONS' 48 | 49 | return response 50 | 51 | def _format_results(self, results=None): 52 | if results is None: 53 | results = [] 54 | 55 | data = [] 56 | 57 | for result in results: 58 | try: 59 | # Get the current 'Part of Speech' name, such as: 'Adverb', 'Noun' 60 | pos_name = result[1].result.word.__class__.__name__ 61 | except: 62 | pos_name = 'Undefined' 63 | 64 | data.append(dict(word=result[0], value=pos_name, extra=dict())) 65 | 66 | return data 67 | -------------------------------------------------------------------------------- /esperanto_analyzer/speech/word.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class represent the smallest unit with pratical meaning of one language. The function 3 | of one word is to describe parts of humans thoughts, so its one unit of human language. 4 | 5 | What's an Word? 6 | === 7 | 8 | A unit of language, consisting of one or more spoken sounds or their written representation, 9 | that functions as a principal carrier of meaning. 10 | 11 | Words are composed of one or more morphemes and are either the smallest units susceptible of 12 | independent use or consist of two or three such units combined under certain linking conditions, 13 | as with the loss of primary accent that distinguishes black·bird· from black· bird·. 14 | 15 | Words are usually separated by spaces in writing, and are distinguished phonologically, 16 | as by accent, in many languages. 17 | 18 | Technically one word is one set of "Letters" 19 | """ 20 | 21 | import re 22 | 23 | # pylint: disable=too-few-public-methods,missing-docstring 24 | class Word: 25 | # Only words with at least 4 characteres(This exclude words such as "ajn" and "kaj") that 26 | # finish with "j", or "jn" are in plural. 27 | PLURAL_DETECT_REGEXP = re.compile('.{2,}([^n]j|jn)$', re.IGNORECASE|re.UNICODE) 28 | 29 | def __init__(self, content, context=None): 30 | self._validate_content(content) 31 | 32 | self.content = content 33 | self.context = context 34 | self.metadata = dict() 35 | self.plural = (self._match_plural(context) not in [False, None]) 36 | 37 | def _match_plural(self, _context=None): 38 | """ 39 | This method determine if one word is in it's plural form. 40 | Some context can be send to help to determine if some word is in plural or not. 41 | """ 42 | 43 | # Some words dont have plural (such as 'Adverb') 44 | if not self.has_plural(): 45 | return None 46 | 47 | return self.PLURAL_DETECT_REGEXP.match(self.content) 48 | 49 | 50 | def has_plural(self): # pylint: disable=no-self-use 51 | """ 52 | This method determines if one words has the capibility of being in the plural. 53 | This method should be override for subclasses(eg: Adverb) 54 | """ 55 | return True 56 | 57 | def _validate_content(self, content): # pylint: disable=no-self-use 58 | if not content: 59 | raise NotContentError 60 | 61 | class NotContentError(Exception): 62 | """ 63 | This Exception is raised when one Word is created with empty content. 64 | Eg: Word('') # raise InvalidArticleError 65 | """ 66 | pass 67 | -------------------------------------------------------------------------------- /esperanto_analyzer/morphological_sentence_analyzer.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | # pylint: disable=too-few-public-methods,missing-docstring 6 | 7 | import re 8 | 9 | from esperanto_analyzer.analyzers import MorphologicalAnalyzer 10 | 11 | class MorphologicalSentenceAnalyzer: 12 | # The same as `string.punctuation` 13 | SENTENCE_CLEAN_REGEXP = re.compile('[!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~]') 14 | 15 | def __init__(self, sentence): 16 | self.sentence = sentence 17 | self.sentence_words = self._split_sentence(sentence) 18 | self.processed = False 19 | self.internal_results = None 20 | 21 | def analyze(self): 22 | # Avoid running the same thing many times returning the previous cached results 23 | if self.processed is True: 24 | return None 25 | 26 | # Cache the results 27 | self.internal_results = self._process_words(self.sentence_words) 28 | self.processed = True 29 | 30 | return True 31 | 32 | def analyzes_results(self): 33 | if not self.processed: 34 | return None 35 | 36 | return [result.results for result in self.internal_results] 37 | 38 | def simple_results(self): 39 | return self._format_simple_results(self.results()) 40 | 41 | def results(self): 42 | if not self.processed: 43 | return None 44 | 45 | results = [] 46 | 47 | for analyze in self.analyzes_results(): 48 | results.append([analyze.raw_word, analyze]) 49 | 50 | return results 51 | 52 | def _split_sentence(self, sentence): 53 | clean_sentence = self._clean_sentence(sentence) 54 | 55 | return clean_sentence.split() 56 | 57 | def _clean_sentence(self, sentence): 58 | return re.sub(self.SENTENCE_CLEAN_REGEXP, '', sentence) 59 | 60 | def _process_words(self, words): 61 | results = [] 62 | 63 | for word in words: 64 | analyzer = MorphologicalAnalyzer(word) 65 | analyzer.analyze() 66 | 67 | results.append(analyzer) 68 | 69 | return results 70 | 71 | def _format_simple_results(self, results): 72 | out_data = [] 73 | 74 | for data in results: 75 | try: 76 | # Get the current 'Part of Speech' name, such as: 'Adverb', 'Noun' 77 | pos_name = data[1].result.word.__class__.__name__ 78 | except: 79 | pos_name = 'Undefined' 80 | 81 | out_data.append([ 82 | data[0], 83 | pos_name 84 | ]) 85 | 86 | return out_data 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | pylint.html 2 | .pylint.html 3 | 4 | 5 | ### Code ### 6 | # Visual Studio Code - https://code.visualstudio.com/ 7 | .settings/ 8 | .vscode/ 9 | tsconfig.json 10 | jsconfig.json 11 | 12 | 13 | ### macOS ### 14 | # General 15 | .DS_Store 16 | .AppleDouble 17 | .LSOverride 18 | 19 | # Icon must end with two \r 20 | Icon 21 | 22 | # Thumbnails 23 | ._* 24 | 25 | # Files that might appear in the root of a volume 26 | .DocumentRevisions-V100 27 | .fseventsd 28 | .Spotlight-V100 29 | .TemporaryItems 30 | .Trashes 31 | .VolumeIcon.icns 32 | .com.apple.timemachine.donotpre 33 | 34 | 35 | # Created by https://www.gitignore.io/api/osx,macos,python 36 | 37 | ### macOS ### 38 | # General 39 | .DS_Store 40 | .AppleDouble 41 | .LSOverride 42 | 43 | # Icon must end with two \r 44 | Icon 45 | 46 | # Thumbnails 47 | ._* 48 | 49 | # Directories potentially created on remote AFP share 50 | .AppleDB 51 | .AppleDesktop 52 | Network Trash Folder 53 | Temporary Items 54 | .apdisk 55 | 56 | 57 | # Byte-compiled / optimized / DLL files 58 | __pycache__/ 59 | *.py[cod] 60 | *$py.class 61 | .pytest_cache/ 62 | .lint_results 63 | # C extensions 64 | *.so 65 | coverage/ 66 | # Distribution / packaging 67 | .Python 68 | env/ 69 | build/ 70 | develop-eggs/ 71 | dist/ 72 | downloads/ 73 | eggs/ 74 | .eggs/ 75 | lib/ 76 | lib64/ 77 | parts/ 78 | sdist/ 79 | var/ 80 | *.egg-info/ 81 | .installed.cfg 82 | *.egg 83 | 84 | # PyInstaller 85 | # Usually these files are written by a python script from a template 86 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 87 | *.manifest 88 | *.spec 89 | 90 | # Installer logs 91 | pip-log.txt 92 | pip-delete-this-directory.txt 93 | 94 | # Unit test / coverage reports 95 | htmlcov/ 96 | .tox/ 97 | .coverage 98 | .coverage.* 99 | .cache 100 | nosetests.xml 101 | coverage.xml 102 | *,cover 103 | .hypothesis/ 104 | 105 | # Translations 106 | *.mo 107 | *.pot 108 | 109 | # Django stuff: 110 | *.log 111 | local_settings.py 112 | 113 | # Flask stuff: 114 | instance/ 115 | .webassets-cache 116 | 117 | # Scrapy stuff: 118 | .scrapy 119 | 120 | # Sphinx documentation 121 | docs/_build/ 122 | 123 | # PyBuilder 124 | target/ 125 | 126 | # IPython Notebook 127 | .ipynb_checkpoints 128 | 129 | # pyenv 130 | .python-version 131 | 132 | # celery beat schedule file 133 | celerybeat-schedule 134 | 135 | # dotenv 136 | .env 137 | 138 | # virtualenv 139 | .venv/ 140 | venv/ 141 | ENV/ 142 | 143 | # Spyder project settings 144 | .spyderproject 145 | 146 | # Rope project settings 147 | .ropeproject 148 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/verb.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import re 3 | 4 | from esperanto_analyzer.speech import Verb 5 | from esperanto_analyzer.analyzers.morphological import AdverbMorphologicalAnalyzer 6 | from esperanto_analyzer.analyzers.morphological import ConjunctionMorphologicalAnalyzer 7 | from esperanto_analyzer.analyzers.morphological import NumeralMorphologicalAnalyzer 8 | from esperanto_analyzer.analyzers.morphological import PrepositionMorphologicalAnalyzer 9 | from esperanto_analyzer.analyzers.morphological import PronounMorphologicalAnalyzer 10 | 11 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 12 | 13 | class VerbMorphologicalAnalyzer(BaseMorphologicalAnalyzer): 14 | 15 | # https://en.wiktionary.org/wiki/Appendix:Esperanto_verbs 16 | # Note: Not completed 17 | VERBS_ENDINGS = [ 18 | 'i', # Infinitive 19 | 'u', # Volative/Jussive 20 | 'as', # Present indicative 21 | 'os', # Future indicative 22 | 'is', # Past indicative 23 | 'us', # Conditional 24 | 'ite', # Past passive adverbial 25 | 'ate', # Present passive adverbial 26 | 'ote', # Future passive adverbial 27 | 'inte', # Past active adverbial 28 | 'ante', # Present active adverbial 29 | 'onte', # Future active adverbial 30 | ] 31 | 32 | # Tenses that receives acusative (n) and plural (j) suffix 33 | VERBS_ENDINGS_ACUSATIVE_PLURAL = [ 34 | 'inta', # Past active participle, 35 | 'anta', # Present active participle 36 | 'onta', # Future active participle 37 | 'into', # Past active nominal 38 | 'anto', # Present active nominal 39 | 'onto', # Future active nominal 40 | 'ita', # Past passive participle 41 | 'ata', # Present passive participle 42 | 'ota', # Future passive participle 43 | 'ito', # Past passive nominal 44 | 'ato', # Past passive nominal 45 | 'oto', # Future passive nominal 46 | ] 47 | 48 | RE_FLAGS = re.IGNORECASE|re.UNICODE 49 | 50 | VERBS_ENDINGS_REGEXP = re.compile('|'.join(VERBS_ENDINGS), RE_FLAGS) 51 | 52 | VERBS_ENDINGS_ACUSATIVE_PLURAL_REGEXP = re.compile('|'.join(VERBS_ENDINGS_ACUSATIVE_PLURAL), RE_FLAGS) 53 | 54 | # MATCHES: ["ŝatis", "ŝatas", "ŝatu", "ŝatus", "ŝati"] and so on 55 | MATCH_REGEXP = re.compile('^([a-zA-Zĉĝĵĥŝŭ]{2,}(%s|(%s)(j?n?)?))$' % (VERBS_ENDINGS_REGEXP.pattern, VERBS_ENDINGS_ACUSATIVE_PLURAL_REGEXP.pattern), RE_FLAGS) 56 | 57 | @staticmethod 58 | def word_class(): 59 | return Verb 60 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class receives one word as input and process it through all Morphological Analyzers 3 | """ 4 | 5 | # pylint: disable=too-few-public-methods,missing-docstring 6 | 7 | from esperanto_analyzer.analyzers.morphological import AdjectiveMorphologicalAnalyzer 8 | from esperanto_analyzer.analyzers.morphological import AdverbMorphologicalAnalyzer 9 | from esperanto_analyzer.analyzers.morphological import ArticleMorphologicalAnalyzer 10 | from esperanto_analyzer.analyzers.morphological import ConjunctionMorphologicalAnalyzer 11 | from esperanto_analyzer.analyzers.morphological import InterjectionMorphologicalAnalyzer 12 | from esperanto_analyzer.analyzers.morphological import NounMorphologicalAnalyzer 13 | from esperanto_analyzer.analyzers.morphological import NumeralMorphologicalAnalyzer 14 | from esperanto_analyzer.analyzers.morphological import PrepositionMorphologicalAnalyzer 15 | from esperanto_analyzer.analyzers.morphological import PronounMorphologicalAnalyzer 16 | from esperanto_analyzer.analyzers.morphological import VerbMorphologicalAnalyzer 17 | 18 | from esperanto_analyzer.analyzers.morphological import AnalyzeResult 19 | 20 | 21 | class MorphologicalAnalyzer: 22 | # TODO: Reorganize this order for better perfomance 23 | DEFAULT_ANALYZERS = [ 24 | AdverbMorphologicalAnalyzer, 25 | ArticleMorphologicalAnalyzer, 26 | ConjunctionMorphologicalAnalyzer, 27 | InterjectionMorphologicalAnalyzer, 28 | NumeralMorphologicalAnalyzer, 29 | PrepositionMorphologicalAnalyzer, 30 | PronounMorphologicalAnalyzer, 31 | AdjectiveMorphologicalAnalyzer, 32 | NounMorphologicalAnalyzer, 33 | VerbMorphologicalAnalyzer, 34 | ] 35 | 36 | def __init__(self, raw_word): 37 | self.raw_word = raw_word 38 | self.processed = False 39 | self.results = None 40 | 41 | def analyze(self): 42 | if self.processed: 43 | return None 44 | 45 | analyzer = self.__apply_analyzers(self.raw_word, self.DEFAULT_ANALYZERS) 46 | 47 | self.results = self.__finish_result(result=analyzer, raw_word=self.raw_word) 48 | self.processed = True 49 | 50 | return True 51 | 52 | def __finish_result(self, result, raw_word): 53 | return AnalyzeResult(result=result, raw_word=raw_word) 54 | 55 | def __apply_analyzers(self, word, analyzers=None): 56 | if analyzers is None or len(analyzers) is 0: 57 | return None 58 | 59 | for analyzer in analyzers: 60 | analyzer_instance = analyzer(word) 61 | 62 | if analyzer_instance.analyze() is True: 63 | return analyzer_instance 64 | -------------------------------------------------------------------------------- /esperanto_analyzer/analyzers/morphological/pronoun.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring 2 | import re 3 | 4 | from esperanto_analyzer.speech import Pronoun 5 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 6 | 7 | class PronounMorphologicalAnalyzer(BaseMorphologicalAnalyzer): 8 | 9 | # These are the personal pronouns and also the base for possessive pronouns 10 | # Ex: 11 | # Personal = "mi" Possesive = "mia" 12 | # Personal = "ili" Possesive = "ilia" 13 | PERSONAL_PRONOUNS_LIST = [ 14 | 'mi', 15 | 'vi', 16 | 'li', 17 | 'ŝi', 18 | 'ĝi', 19 | 'ni', 20 | 'oni', 21 | 'ili' 22 | ] 23 | 24 | # Interrogative Pronouns 25 | # Relative Pronouns 26 | # https://esperanto.lingolia.com/en/grammar/pronouns/indefinite-pronouns 27 | OTHERS_PRONOUNS_LIST = [ 28 | 'kiu', 29 | 'kio', 30 | 'kies', 31 | 'tiu', 32 | 'ĉi tiu', 33 | 'tia', 34 | ] 35 | 36 | # https://esperanto.lingolia.com/en/grammar/pronouns/indefinite-pronouns 37 | INDEFINITE_PRONOUNS_LIST = [ 38 | 'nenio', 39 | 'neniu', 40 | 'ĉio', 41 | 'ĉiu', 42 | 'io', 43 | 'iu', 44 | 'io ajn', 45 | 'iu ajn', 46 | 'io ajn', 47 | 'ĉio ajn', 48 | 'iu ajn', 49 | 'ĉiu ajn' 50 | ] 51 | 52 | # Shared flags 53 | RE_FLAGS = re.IGNORECASE|re.UNICODE 54 | 55 | # /mi|vi|li|ŝi|gi|(...)/ 56 | PERSONAL_PRONOUNS_LIST_REGEXP = re.compile('|'.join(PERSONAL_PRONOUNS_LIST), RE_FLAGS) 57 | 58 | # /kiu|kio|kies|tiu|ĉi tiu|tia/ 59 | OTHERS_PRONOUNS_LIST_REGEXP = re.compile('|'.join(OTHERS_PRONOUNS_LIST), RE_FLAGS) 60 | 61 | # /nenio|neniu|ĉio|ĉiu|io|iu|io ajn|iu ajn|io ajn|ĉio ajn|iu ajn|ĉiu ajn/ 62 | INDEFINITE_PRONOUNS_LIST_REGEXP = re.compile('|'.join(INDEFINITE_PRONOUNS_LIST), RE_FLAGS) 63 | 64 | # ["mia", "via", "lia", (...)] 65 | PERSONAL_POSSESSIVE_PRONOUNS_LIST = [pronoun + "a" for pronoun in PERSONAL_PRONOUNS_LIST] 66 | 67 | # /mia|via|lia|ŝia|gia|(...)/ 68 | PERSONAL_POSSESSIVE_PRONOUNS_LIST_REGEXP = re.compile('|'.join(PERSONAL_POSSESSIVE_PRONOUNS_LIST), RE_FLAGS) 69 | 70 | # /mi|vi|li|ŝi|gi|(...)|mia|via|lia|ŝia|gia|(...)/ 71 | ALL_PERSONAL_PRONOUNS_REGEXP = re.compile("(%s|%s|%s|%s)" % (PERSONAL_POSSESSIVE_PRONOUNS_LIST_REGEXP.pattern, PERSONAL_PRONOUNS_LIST_REGEXP.pattern, OTHERS_PRONOUNS_LIST_REGEXP.pattern, INDEFINITE_PRONOUNS_LIST_REGEXP.pattern)) 72 | 73 | # MATCHES: ["mi", "via", "viajn", "viaj", "liajn"] 74 | MATCH_REGEXP = re.compile('(^(%s((j?n?)?))$)' % ALL_PERSONAL_PRONOUNS_REGEXP.pattern, RE_FLAGS) 75 | 76 | # breakpoint() 77 | 78 | @staticmethod 79 | def word_class(): 80 | return Pronoun 81 | -------------------------------------------------------------------------------- /tests/cli/test_cly.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import pytest 4 | from io import StringIO 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer import MorphologicalSentenceAnalyzer 9 | from esperanto_analyzer.cli.cli import CLI, AnalyzerNotProcessedError 10 | 11 | class TestCLIBasic(): 12 | TEST_SENTENCE = 'Mi loĝas en Brazilo' 13 | EXPECT_OUTPUT_TEST_SENTENCE = '╭─────────────────┬─────────────────╮\n│ Word │ Part of Speech │\n├─────────────────┼─────────────────┤\n│ \x1b[96mMi \x1b[0m │ \x1b[96mPronoun \x1b[0m │\n│ \x1b[31mloĝas \x1b[0m │ \x1b[31mVerb \x1b[0m │\n│ \x1b[36men \x1b[0m │ \x1b[36mPreposition \x1b[0m │\n│ \x1b[34mBrazilo \x1b[0m │ \x1b[34mNoun \x1b[0m │\n╰─────────────────┴─────────────────╯\n' 14 | 15 | COLORS = { 16 | 'Adjective': 92, # Light Green 17 | 'Adverb': 32, # Green 18 | 'Article': 33, # Yellow 19 | 'Conjunction': 35, # Magenta 20 | 'Interjection': 95, # Light Magenta 21 | 'Noun': 34, # Blue 22 | 'Numeral': 93, # Light Yellow 23 | 'Preposition': 36, # Cian 24 | 'Pronoun': 96, # Light Cian 25 | 'Verb': 31, # Red 26 | 'Undefined': 30 # Black 27 | } 28 | 29 | def test_import(self): 30 | assert CLI 31 | 32 | def test_colors(self): 33 | assert CLI.COLORS == self.COLORS 34 | 35 | def test_output_table_headers(self): 36 | assert CLI.OUTPUT_TABLE_HEADERS == ['Word', 'Part of Speech'] 37 | 38 | def test_display_output_for_analyzer_without_executing(self): 39 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 40 | 41 | with pytest.raises(AnalyzerNotProcessedError): 42 | CLI.display_output_for_analyzer(analyzer) 43 | 44 | def test_print_results(self): 45 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 46 | analyzer.analyze() 47 | 48 | output = StringIO() 49 | 50 | # Execute the method that will write to `output` 51 | CLI.print_results(analyzer.simple_results(), output=output) 52 | 53 | assert output.getvalue() == self.EXPECT_OUTPUT_TEST_SENTENCE 54 | 55 | def test_run(self): 56 | output = StringIO() 57 | CLI.run(self.TEST_SENTENCE, output) 58 | 59 | assert output.getvalue() == self.EXPECT_OUTPUT_TEST_SENTENCE 60 | 61 | def test_format_table_data_with_formating(self): 62 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 63 | analyzer.analyze() 64 | 65 | expected = [ 66 | ['\x1b[96mMi \x1b[0m', '\x1b[96mPronoun \x1b[0m'], 67 | ['\x1b[31mloĝas \x1b[0m', '\x1b[31mVerb \x1b[0m'], 68 | ['\x1b[36men \x1b[0m', '\x1b[36mPreposition \x1b[0m'], 69 | ['\x1b[34mBrazilo \x1b[0m', '\x1b[34mNoun \x1b[0m'] 70 | ] 71 | 72 | assert CLI.format_table_data(analyzer.simple_results()) == expected 73 | 74 | def test_format_table_data_without_formating(self): 75 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 76 | analyzer.analyze() 77 | 78 | expected = [ 79 | ['Mi', 'Pronoun'], 80 | ['loĝas', 'Verb'], 81 | ['en', 'Preposition'], 82 | ['Brazilo', 'Noun'] 83 | ] 84 | 85 | assert CLI.format_table_data(analyzer.simple_results(), colorize=False) == expected 86 | 87 | -------------------------------------------------------------------------------- /tests/web/test_web_api_endpoints.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.web.api.server import create_app, run_app 9 | from esperanto_analyzer.web.api.errors import SentenceRequiredError, SentenceInvalidError 10 | from esperanto_analyzer.web.api.morphological_endpoint import MorphologicalAnalyzeEndpoint 11 | from esperanto_analyzer.web.api.results import MorphologicalAnalyzeResult 12 | 13 | class TestWebRoot: 14 | def test_get_404(self, client): 15 | response = client.get('/') 16 | 17 | assert(response.status_code == 404) 18 | 19 | class TestSentenceAnalyze: 20 | def test_bad_request_without_sentence(self, client): 21 | response = client.get('/analyze') 22 | 23 | assert(response.status_code == 400) 24 | 25 | def test_ok_with_sentence(self, client): 26 | response = client.get('/analyze?sentence=Mia%nomo') 27 | 28 | assert(response.status_code == 200) 29 | 30 | def test_response_with_sentence(self, client): 31 | response = client.get('/analyze?sentence=Mia%nomo') 32 | 33 | assert response.get_json() == [{'word': 'Mianomo', 'value': 'Noun'}] 34 | 35 | def test_response_with_sentence_but_invalid(self, client): 36 | response = client.get('/analyze?sentence=```') 37 | 38 | assert response.get_json() == [] 39 | 40 | def test_response_status_code_with_sentence_but_invalid(self, client): 41 | response = client.get('/analyze?sentence=```') 42 | 43 | assert response.status_code == 200 44 | 45 | def test_bad_request_without_sentence(self, client): 46 | response = client.get('/analyze') 47 | 48 | assert(response.status_code == 400) 49 | 50 | def test_exception_with_empty_sentence(self, client): 51 | with pytest.raises(SentenceRequiredError): 52 | assert client.get('/analyze?sentence=') 53 | 54 | def test_options_request(self, client): 55 | response = client.options('/analyze') 56 | 57 | assert response.status_code == 200 58 | 59 | def test_options_response_CORS_origin_header(self, client): 60 | response = client.options('/analyze') 61 | 62 | assert response.headers['Access-Control-Allow-Origin'] == '*' 63 | 64 | def test_options_response_CORS_headers_header(self, client): 65 | response = client.options('/analyze') 66 | assert response.headers['Access-Control-Allow-Headers'] == '*' 67 | 68 | def test_options_response_CORS_origin_header(self, client): 69 | response = client.options('/analyze') 70 | 71 | assert response.headers['Access-Control-Allow-Method'] == 'POST, GET, OPTIONS' 72 | 73 | def test_unicode_encoded_response(self, client): 74 | response = client.get('analyze?sentence=👍👍👍') 75 | 76 | assert response.get_json() == [{'word': '👍', 'value': 'Undefined'}, {'word': '👍', 'value': 'Undefined'}, {'word': '👍', 'value': 'Undefined'}] 77 | 78 | def test_unicode_encoded_response(self, client): 79 | response = client.get('analyze?sentence=%F0%9F%91%8D%20%F0%9F%91%8E%20%F0%9F%91%8E') 80 | 81 | assert response.get_json() == [{'word': '👍', 'value': 'Undefined'}, {'word': '👎', 'value': 'Undefined'}, {'word': '👎', 'value': 'Undefined'}] 82 | 83 | class TestMorphologicalAnalyzeEndpoint(): 84 | def test__format_results_none(self): 85 | instance = MorphologicalAnalyzeEndpoint() 86 | 87 | assert instance._format_results(None) == [] 88 | 89 | def test__format_results_error(self): 90 | instance = MorphologicalAnalyzeEndpoint() 91 | results = [[dict(), None]] 92 | 93 | assert instance._format_results(results) == [{'word': {}, 'value': 'Undefined', 'extra': {}}] 94 | 95 | 96 | class TestMorphologicalAnalyzeResult(): 97 | def test_results(self): 98 | result = MorphologicalAnalyzeResult(dict(test=1, works=2)) 99 | 100 | assert result.results == dict(test=1, works=2) 101 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_verb_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Verb 9 | from esperanto_analyzer.analyzers.morphological import VerbMorphologicalAnalyzer 10 | 11 | class TestVerbMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'ŝatas' 13 | 14 | def test_import(self): 15 | assert VerbMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = VerbMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp(self): 51 | assert VerbMorphologicalAnalyzer.MATCH_REGEXP is not None 52 | 53 | def test_word_class(self): 54 | assert isinstance(VerbMorphologicalAnalyzer.word_class()(self.TEST_WORD), Verb) 55 | 56 | class TestVerbMorphologicalAnalyzerMatchMethod(): 57 | VALID_WORDS = ['ŝatis', 'ŝatas', 'ŝatu', 'ŝatus', 'ŝati'] 58 | INVALID_WORDS = ['multe', 'domo', 'hundoj', 'vi', 'bela', 'belajn', 'tio'] 59 | 60 | def test_match(self): 61 | for word in self.VALID_WORDS: 62 | analyzer = VerbMorphologicalAnalyzer(word) 63 | matches = analyzer.match() 64 | 65 | assert matches is not None 66 | 67 | def test_match_empty(self): 68 | for word in self.INVALID_WORDS: 69 | analyzer = VerbMorphologicalAnalyzer(word) 70 | matches = analyzer.match() 71 | 72 | assert matches is None 73 | 74 | 75 | class TestVerbMorphologicalAnalyzerAnalyzeMethod(): 76 | INVALID_WORDS = ['multe', 'domo', 'hundoj', 'vi', 'bela', 'belajn', 'tio'] #, 'kiu'] 77 | VALID_WORDS = ['ŝatis', 'ŝatas', 'ŝatu', 'ŝatus', 'ŝati', 'amas'] 78 | 79 | def test_invalid_analyze(self): 80 | for word in self.INVALID_WORDS: 81 | analyzer = VerbMorphologicalAnalyzer(word) 82 | result = analyzer.analyze() 83 | 84 | assert not result 85 | 86 | def test_invalid_analyze_word(self): 87 | for word in self.INVALID_WORDS: 88 | analyzer = VerbMorphologicalAnalyzer(word) 89 | analyzer.analyze() 90 | 91 | # if(analyzer.word): breakpoint() 92 | 93 | assert analyzer.word is None 94 | 95 | def test_invalid_analyze_match(self): 96 | for word in self.INVALID_WORDS: 97 | analyzer = VerbMorphologicalAnalyzer(word) 98 | analyzer.analyze() 99 | 100 | assert analyzer.matches is None 101 | 102 | def test_analyze(self): 103 | for word in self.VALID_WORDS: 104 | analyzer = VerbMorphologicalAnalyzer(word) 105 | 106 | assert analyzer.analyze() 107 | 108 | def test_analyze_word(self): 109 | for word in self.VALID_WORDS: 110 | analyzer = VerbMorphologicalAnalyzer(word) 111 | analyzer.analyze() 112 | 113 | assert isinstance(analyzer.word, Verb) 114 | assert analyzer.word.content == word 115 | 116 | def test_analyze_match(self): 117 | for word in self.VALID_WORDS: 118 | analyzer = VerbMorphologicalAnalyzer(word) 119 | analyzer.analyze() 120 | 121 | assert analyzer.matches is not None 122 | 123 | def test_analyze_return_false(self): 124 | for word in self.INVALID_WORDS: 125 | analyzer = VerbMorphologicalAnalyzer(word) 126 | 127 | assert analyzer.analyze() is False 128 | 129 | def test_analyze_return_true(self): 130 | for word in self.VALID_WORDS: 131 | analyzer = VerbMorphologicalAnalyzer(word) 132 | 133 | assert analyzer.analyze() 134 | 135 | 136 | def test_analyze_processed(self): 137 | for word in self.VALID_WORDS: 138 | analyzer = VerbMorphologicalAnalyzer(word) 139 | 140 | assert analyzer.processed is False 141 | 142 | analyzer.analyze() 143 | 144 | assert analyzer.processed is True 145 | 146 | def test_analyze_processed_response(self): 147 | for word in self.VALID_WORDS: 148 | analyzer = VerbMorphologicalAnalyzer(word) 149 | analyzer.analyze() 150 | 151 | assert analyzer.analyze() is None 152 | assert analyzer.analyze() is None 153 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_noun_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Noun 9 | from esperanto_analyzer.analyzers.morphological import NounMorphologicalAnalyzer 10 | 11 | class TestNounMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'kaj' 13 | 14 | def test_import(self): 15 | assert NounMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = NounMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = NounMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = NounMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = NounMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = NounMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = NounMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp(self): 51 | assert NounMorphologicalAnalyzer.MATCH_REGEXP is not None 52 | 53 | def test_word_class(self): 54 | assert isinstance(NounMorphologicalAnalyzer.word_class()(self.TEST_WORD), Noun) 55 | 56 | def test_regexp_value(self): 57 | assert NounMorphologicalAnalyzer.MATCH_REGEXP == re.compile('(^[a-zA-Zĉĝĵĥŝŭ]{2,}(o(j?n?)?)$)', re.IGNORECASE|re.UNICODE) 58 | 59 | class TestNounMorphologicalAnalyzerMatchMethod(): 60 | VALID_WORDS = [ 61 | 'domo', 'domoj', 'homon', 'homojn', 'ĉambro' 62 | ] 63 | 64 | INVALID_WORDS = ['io', 'lo', 'bela', 'la', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ'] 65 | 66 | def test_match(self): 67 | for word in self.VALID_WORDS: 68 | analyzer = NounMorphologicalAnalyzer(word) 69 | matches = analyzer.match() 70 | 71 | assert matches is not None 72 | assert len(matches.span()) == 2 73 | 74 | def test_match_empty(self): 75 | for word in self.INVALID_WORDS: 76 | analyzer = NounMorphologicalAnalyzer(word) 77 | matches = analyzer.match() 78 | 79 | assert matches is None 80 | 81 | class TestNounMorphologicalAnalyzerAnalyzeMethod(): 82 | VALID_WORDS = [ 83 | 'domo', 'domoj', 'homon', 'homojn', 'ĉambro' 84 | ] 85 | 86 | INVALID_WORDS = ['io', 'lo', 'bela', 'la', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ'] 87 | 88 | def test_invalid_analyze(self): 89 | for word in self.INVALID_WORDS: 90 | analyzer = NounMorphologicalAnalyzer(word) 91 | result = analyzer.analyze() 92 | 93 | assert not result 94 | 95 | def test_invalid_analyze_word(self): 96 | for word in self.INVALID_WORDS: 97 | analyzer = NounMorphologicalAnalyzer(word) 98 | analyzer.analyze() 99 | 100 | assert analyzer.word is None 101 | 102 | def test_invalid_analyze_match(self): 103 | for word in self.INVALID_WORDS: 104 | analyzer = NounMorphologicalAnalyzer(word) 105 | analyzer.analyze() 106 | 107 | assert analyzer.matches is None 108 | 109 | def test_analyze(self): 110 | for word in self.VALID_WORDS: 111 | analyzer = NounMorphologicalAnalyzer(word) 112 | 113 | assert analyzer.analyze() 114 | 115 | def test_analyze_word(self): 116 | for word in self.VALID_WORDS: 117 | analyzer = NounMorphologicalAnalyzer(word) 118 | analyzer.analyze() 119 | 120 | assert isinstance(analyzer.word, Noun) 121 | assert analyzer.word.content == word 122 | 123 | def test_analyze_match(self): 124 | for word in self.VALID_WORDS: 125 | analyzer = NounMorphologicalAnalyzer(word) 126 | analyzer.analyze() 127 | 128 | assert analyzer.matches is not None 129 | 130 | def test_analyze_return_false(self): 131 | for word in self.INVALID_WORDS: 132 | analyzer = NounMorphologicalAnalyzer(word) 133 | 134 | assert analyzer.analyze() is False 135 | 136 | def test_analyze_return_true(self): 137 | for word in self.VALID_WORDS: 138 | analyzer = NounMorphologicalAnalyzer(word) 139 | 140 | assert analyzer.analyze() 141 | 142 | 143 | def test_analyze_processed(self): 144 | for word in self.VALID_WORDS: 145 | analyzer = NounMorphologicalAnalyzer(word) 146 | 147 | assert analyzer.processed is False 148 | 149 | analyzer.analyze() 150 | 151 | assert analyzer.processed is True 152 | 153 | def test_analyze_processed_response(self): 154 | for word in self.VALID_WORDS: 155 | analyzer = NounMorphologicalAnalyzer(word) 156 | analyzer.analyze() 157 | 158 | assert analyzer.analyze() is None 159 | assert analyzer.analyze() is None 160 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_base_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Word 9 | from esperanto_analyzer.analyzers.morphological import BaseMorphologicalAnalyzer 10 | 11 | class TestBaseMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'komputilo' 13 | 14 | def test_import(self): 15 | assert BaseMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp(self): 51 | assert BaseMorphologicalAnalyzer.MATCH_REGEXP is None 52 | 53 | def test_word_class(self): 54 | with pytest.raises(NotImplementedError): 55 | BaseMorphologicalAnalyzer.word_class() 56 | 57 | class TestBaseMorphologicalAnalyzerMatchMethod(): 58 | TEST_WORD = 'komputilo' 59 | 60 | def test_match(self): 61 | analyzer = BaseMorphologicalAnalyzer(self.TEST_WORD) 62 | 63 | with pytest.raises(AttributeError, match="'NoneType' object has no attribute 'match'"): 64 | analyzer.match() 65 | 66 | class TestAnalyzer(BaseMorphologicalAnalyzer): 67 | # Only words with MINIMUM 9 letters 68 | MATCH_REGEXP = re.compile('.{9,}') 69 | 70 | @staticmethod 71 | def word_class(): 72 | return Word 73 | 74 | class TestChildMorphologicalAnalyzerMatchMethod(): 75 | TEST_WORD = 'komputilo' 76 | 77 | def test_match(self): 78 | analyzer = TestAnalyzer(self.TEST_WORD) 79 | matches = analyzer.match() 80 | 81 | assert matches is not None 82 | 83 | def test_match_empty(self): 84 | analyzer = TestAnalyzer('vorto') 85 | matches = analyzer.match() 86 | 87 | assert matches is None 88 | 89 | 90 | class TestBaseMorphologicalAnalyzerAnalyzeMethod(): 91 | TEST_WORD = 'komputilo' 92 | 93 | def test_analyze(self): 94 | analyzer = TestAnalyzer(self.TEST_WORD) 95 | result = analyzer.analyze() 96 | 97 | assert result 98 | assert isinstance(analyzer.word, Word) 99 | assert analyzer.matches is not None 100 | 101 | def test_analyze_word(self): 102 | analyzer = TestAnalyzer(self.TEST_WORD) 103 | result = analyzer.analyze() 104 | 105 | assert result 106 | assert isinstance(analyzer.word, Word) 107 | assert analyzer.raw_word == self.TEST_WORD 108 | assert analyzer.word.content == self.TEST_WORD 109 | assert analyzer.raw_word == analyzer.word.content 110 | 111 | def test_analyze_word_invalid(self): 112 | analyzer = TestAnalyzer('io') 113 | result = analyzer.analyze() 114 | 115 | assert result is False 116 | assert analyzer.word is None 117 | assert analyzer.raw_word == 'io' 118 | 119 | def test_analyze_matches(self): 120 | analyzer = TestAnalyzer(self.TEST_WORD) 121 | result = analyzer.analyze() 122 | 123 | assert result 124 | assert isinstance(analyzer.matches, re.Match) 125 | assert analyzer.matches 126 | 127 | def test_analyze_matches_span(self): 128 | analyzer = TestAnalyzer(self.TEST_WORD) 129 | result = analyzer.analyze() 130 | 131 | assert result 132 | assert len(analyzer.matches.span()) == 2 133 | assert analyzer.matches.span() == (0, 9) 134 | 135 | def test_analyze_matches_invalid(self): 136 | analyzer = TestAnalyzer('io') 137 | result = analyzer.analyze() 138 | 139 | assert result is False 140 | assert analyzer.matches is None 141 | 142 | def test_analyze_matches_span_invalid(self): 143 | analyzer = TestAnalyzer('io') 144 | result = analyzer.analyze() 145 | 146 | assert result is False 147 | assert analyzer.matches is None 148 | assert not analyzer.matches 149 | 150 | def test_analyze_return_true(self): 151 | analyzer = TestAnalyzer(self.TEST_WORD) 152 | 153 | assert analyzer.analyze() 154 | 155 | def test_analyze_return_false(self): 156 | analyzer = TestAnalyzer('io') 157 | 158 | assert analyzer.analyze() is False 159 | 160 | def test_analyze_processed(self): 161 | analyzer = TestAnalyzer(self.TEST_WORD) 162 | 163 | assert analyzer.processed is False 164 | 165 | analyzer.analyze() 166 | 167 | assert analyzer.processed is True 168 | 169 | def test_analyze_processed_response(self): 170 | analyzer = TestAnalyzer(self.TEST_WORD) 171 | analyzer.analyze() 172 | 173 | assert analyzer.analyze() is None 174 | assert analyzer.analyze() is None 175 | 176 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\sample.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\sample.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_adjective_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Adjective 9 | from esperanto_analyzer.analyzers.morphological import AdjectiveMorphologicalAnalyzer 10 | 11 | class TestAdjectiveMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'bela' 13 | 14 | def test_import(self): 15 | assert AdjectiveMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = AdjectiveMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp_value(self): 51 | assert AdjectiveMorphologicalAnalyzer.MATCH_REGEXP == re.compile('(^[a-zA-Zĉĝĵĥŝŭ]{2,}(a(j?n?)?)$)', re.IGNORECASE) 52 | 53 | def test_match_regexp(self): 54 | assert AdjectiveMorphologicalAnalyzer.MATCH_REGEXP is not None 55 | 56 | def test_word_class(self): 57 | assert isinstance(AdjectiveMorphologicalAnalyzer.word_class()(self.TEST_WORD), Adjective) 58 | 59 | class TestAdjectiveMorphologicalAnalyzerMatchMethod(): 60 | VALID_WORDS = ['bela', 'belan', 'belaj', 'belajn'] 61 | INVALID_WORDS = ['domo', 'la', '?', '!'] 62 | 63 | def test_match(self): 64 | for word in self.VALID_WORDS: 65 | analyzer = AdjectiveMorphologicalAnalyzer(word) 66 | matches = analyzer.match() 67 | 68 | assert matches is not None 69 | 70 | def test_match_empty(self): 71 | for word in self.INVALID_WORDS: 72 | analyzer = AdjectiveMorphologicalAnalyzer(word) 73 | matches = analyzer.match() 74 | 75 | assert matches is None 76 | 77 | class TestAdjectiveMorphologicalAnalyzerAnalyzeMethod(): 78 | INVALID_WORDS = [ 79 | 'io', 'multe', 'domo', 'hundoj', 'kiu', 'vi', 80 | '[', ']', '{', '}', '|', '\\', '(', ')', '=', '+', '*', 81 | '&', '^', '%', '$', '#', '@', '`', '~', ';', ':', ',', '.', 82 | '<', '>', '/', 83 | '.!', '!', 'n!', 'jn!', 'j!', 84 | '..!', '..!', '..n!', '..jn!', 85 | '..aj!', '..ajn!', '..aj', '..ajn', 'ajn', 86 | '.!', '?', 'n?', 'jn?', 'j?', 87 | '90a', '000an', '999ajn', '000aj', '__ajn', '__an', '__a', 88 | 'bel0an', 'bel9ajn', '9belajn', '9bela', 89 | ] 90 | 91 | VALID_WORDS = [ 92 | 'ĝusta', 'bela', 'belan', 'belaj', 'belajn', 'bongusta' 93 | ] 94 | 95 | def test_invalid_analyze(self): 96 | for word in self.INVALID_WORDS: 97 | analyzer = AdjectiveMorphologicalAnalyzer(word) 98 | result = analyzer.analyze() 99 | 100 | assert not result 101 | 102 | def test_invalid_analyze_word(self): 103 | for word in self.INVALID_WORDS: 104 | analyzer = AdjectiveMorphologicalAnalyzer(word) 105 | analyzer.analyze() 106 | 107 | assert analyzer.word is None 108 | 109 | def test_invalid_analyze_match(self): 110 | for word in self.INVALID_WORDS: 111 | analyzer = AdjectiveMorphologicalAnalyzer(word) 112 | analyzer.analyze() 113 | 114 | assert analyzer.matches is None 115 | 116 | def test_analyze(self): 117 | for word in self.VALID_WORDS: 118 | analyzer = AdjectiveMorphologicalAnalyzer(word) 119 | 120 | assert analyzer.analyze() 121 | 122 | def test_analyze_word(self): 123 | for word in self.VALID_WORDS: 124 | analyzer = AdjectiveMorphologicalAnalyzer(word) 125 | analyzer.analyze() 126 | 127 | assert isinstance(analyzer.word, Adjective) 128 | assert analyzer.word.content == word 129 | 130 | def test_analyze_match(self): 131 | for word in self.VALID_WORDS: 132 | analyzer = AdjectiveMorphologicalAnalyzer(word) 133 | analyzer.analyze() 134 | 135 | assert analyzer.matches is not None 136 | 137 | def test_analyze_return_false(self): 138 | for word in self.INVALID_WORDS: 139 | analyzer = AdjectiveMorphologicalAnalyzer(word) 140 | 141 | assert analyzer.analyze() is False 142 | 143 | def test_analyze_return_true(self): 144 | for word in self.VALID_WORDS: 145 | analyzer = AdjectiveMorphologicalAnalyzer(word) 146 | 147 | assert analyzer.analyze() 148 | 149 | 150 | def test_analyze_processed(self): 151 | for word in self.VALID_WORDS: 152 | analyzer = AdjectiveMorphologicalAnalyzer(word) 153 | 154 | assert analyzer.processed is False 155 | 156 | analyzer.analyze() 157 | 158 | assert analyzer.processed is True 159 | 160 | def test_analyze_processed_response(self): 161 | for word in self.VALID_WORDS: 162 | analyzer = AdjectiveMorphologicalAnalyzer(word) 163 | analyzer.analyze() 164 | 165 | assert analyzer.analyze() is None 166 | assert analyzer.analyze() is None 167 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/sample.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/sample.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/sample" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/sample" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_article_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Article 9 | from esperanto_analyzer.analyzers.morphological import ArticleMorphologicalAnalyzer 10 | 11 | class TestArticleMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'la' 13 | 14 | def test_import(self): 15 | assert ArticleMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = ArticleMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp(self): 51 | assert ArticleMorphologicalAnalyzer.MATCH_REGEXP is not None 52 | 53 | def test_word_class(self): 54 | assert isinstance(ArticleMorphologicalAnalyzer.word_class()(self.TEST_WORD), Article) 55 | 56 | class TestArticleMorphologicalAnalyzerMatchMethod(): 57 | VALID_WORDS = ['la'] 58 | INVALID_WORDS = ['io', 'lo', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', '?', '!'] 59 | 60 | def test_match(self): 61 | for word in self.VALID_WORDS: 62 | analyzer = ArticleMorphologicalAnalyzer(word) 63 | matches = analyzer.match() 64 | 65 | assert matches is not None 66 | assert len(matches.span()) == 2 67 | 68 | def test_match_empty(self): 69 | for word in self.INVALID_WORDS: 70 | analyzer = ArticleMorphologicalAnalyzer(word) 71 | matches = analyzer.match() 72 | 73 | assert matches is None 74 | 75 | class TestArticleMorphologicalAnalyzerAnalyzeMethod(): 76 | VALID_WORDS = ['la'] 77 | INVALID_WORDS = ['io', 'lo', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', '?', '!'] 78 | 79 | def test_invalid_analyze(self): 80 | for word in self.INVALID_WORDS: 81 | analyzer = ArticleMorphologicalAnalyzer(word) 82 | result = analyzer.analyze() 83 | 84 | assert not result 85 | 86 | def test_invalid_analyze_word(self): 87 | for word in self.INVALID_WORDS: 88 | analyzer = ArticleMorphologicalAnalyzer(word) 89 | analyzer.analyze() 90 | 91 | assert analyzer.word is None 92 | 93 | def test_invalid_analyze_match(self): 94 | for word in self.INVALID_WORDS: 95 | analyzer = ArticleMorphologicalAnalyzer(word) 96 | analyzer.analyze() 97 | 98 | assert analyzer.matches is None 99 | 100 | def test_analyze(self): 101 | for word in self.VALID_WORDS: 102 | analyzer = ArticleMorphologicalAnalyzer(word) 103 | 104 | assert analyzer.analyze() 105 | 106 | def test_conjunctions_list(self): 107 | for word in ArticleMorphologicalAnalyzer.ARTICLES_LIST: 108 | analyzer = ArticleMorphologicalAnalyzer(word) 109 | 110 | assert analyzer.analyze() 111 | 112 | def test_analyze_word(self): 113 | for word in self.VALID_WORDS: 114 | analyzer = ArticleMorphologicalAnalyzer(word) 115 | analyzer.analyze() 116 | 117 | assert isinstance(analyzer.word, Article) 118 | assert analyzer.word.content == word 119 | 120 | def test_analyze_match(self): 121 | for word in self.VALID_WORDS: 122 | analyzer = ArticleMorphologicalAnalyzer(word) 123 | analyzer.analyze() 124 | 125 | assert analyzer.matches is not None 126 | 127 | def test_analyze_return_false(self): 128 | for word in self.INVALID_WORDS: 129 | analyzer = ArticleMorphologicalAnalyzer(word) 130 | 131 | assert analyzer.analyze() is False 132 | 133 | def test_analyze_return_true(self): 134 | for word in self.VALID_WORDS: 135 | analyzer = ArticleMorphologicalAnalyzer(word) 136 | 137 | assert analyzer.analyze() 138 | 139 | 140 | def test_analyze_processed(self): 141 | for word in self.VALID_WORDS: 142 | analyzer = ArticleMorphologicalAnalyzer(word) 143 | 144 | assert analyzer.processed is False 145 | 146 | analyzer.analyze() 147 | 148 | assert analyzer.processed is True 149 | 150 | def test_analyze_processed_response(self): 151 | for word in self.VALID_WORDS: 152 | analyzer = ArticleMorphologicalAnalyzer(word) 153 | analyzer.analyze() 154 | 155 | assert analyzer.analyze() is None 156 | assert analyzer.analyze() is None 157 | 158 | class TestArticleMorphologicalAnalyzerConjuctionList: 159 | def test_conjunctions_not_empty(self): 160 | assert ArticleMorphologicalAnalyzer.ARTICLES_LIST is not None 161 | 162 | def test_conjunctions_not_size(self): 163 | assert len(ArticleMorphologicalAnalyzer.ARTICLES_LIST) == 1 164 | 165 | def test_conjunctions_match_list(self): 166 | for word in ArticleMorphologicalAnalyzer.ARTICLES_LIST: 167 | assert ArticleMorphologicalAnalyzer.ARTICLES_MATCH_REGEXP.match(word) 168 | 169 | def test_conjunctions_match_final_regexp_list(self): 170 | for word in ArticleMorphologicalAnalyzer.ARTICLES_LIST: 171 | assert ArticleMorphologicalAnalyzer.MATCH_REGEXP.match(word) 172 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_conjuction_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Conjunction 9 | from esperanto_analyzer.analyzers.morphological import ConjunctionMorphologicalAnalyzer 10 | 11 | class TestConjunctionMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'kaj' 13 | 14 | def test_import(self): 15 | assert ConjunctionMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = ConjunctionMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp(self): 51 | assert ConjunctionMorphologicalAnalyzer.MATCH_REGEXP is not None 52 | 53 | def test_word_class(self): 54 | assert isinstance(ConjunctionMorphologicalAnalyzer.word_class()(self.TEST_WORD), Conjunction) 55 | 56 | class TestConjunctionMorphologicalAnalyzerMatchMethod(): 57 | VALID_WORDS = [ 58 | 'ĉar', 'aŭ', 'kaj', 'kiel', 'kiam', 'minus', 'nek', 'sed', 'tial', 59 | 60 | ] 61 | 62 | INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', '?', '!'] 63 | 64 | def test_match(self): 65 | for word in self.VALID_WORDS: 66 | analyzer = ConjunctionMorphologicalAnalyzer(word) 67 | matches = analyzer.match() 68 | 69 | assert matches is not None 70 | assert len(matches.span()) == 2 71 | 72 | def test_match_empty(self): 73 | for word in self.INVALID_WORDS: 74 | analyzer = ConjunctionMorphologicalAnalyzer(word) 75 | matches = analyzer.match() 76 | 77 | assert matches is None 78 | 79 | class TestConjunctionMorphologicalAnalyzerAnalyzeMethod(): 80 | VALID_WORDS = [ 81 | 'ĉar', 'aŭ', 'kaj', 'kiel', 'kiam', 'minus', 'nek', 'sed', 'tial' 82 | ] 83 | 84 | INVALID_WORDS = [ 85 | 'io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', '?', '!' 86 | ] 87 | 88 | def test_invalid_analyze(self): 89 | for word in self.INVALID_WORDS: 90 | analyzer = ConjunctionMorphologicalAnalyzer(word) 91 | result = analyzer.analyze() 92 | 93 | assert not result 94 | 95 | def test_invalid_analyze_word(self): 96 | for word in self.INVALID_WORDS: 97 | analyzer = ConjunctionMorphologicalAnalyzer(word) 98 | analyzer.analyze() 99 | 100 | assert analyzer.word is None 101 | 102 | def test_invalid_analyze_match(self): 103 | for word in self.INVALID_WORDS: 104 | analyzer = ConjunctionMorphologicalAnalyzer(word) 105 | analyzer.analyze() 106 | 107 | assert analyzer.matches is None 108 | 109 | def test_analyze(self): 110 | for word in self.VALID_WORDS: 111 | analyzer = ConjunctionMorphologicalAnalyzer(word) 112 | 113 | assert analyzer.analyze() 114 | 115 | def test_conjunctions_list(self): 116 | for word in ConjunctionMorphologicalAnalyzer.CONJUNCTIONS_LIST: 117 | analyzer = ConjunctionMorphologicalAnalyzer(word) 118 | 119 | assert analyzer.analyze() 120 | 121 | def test_analyze_word(self): 122 | for word in self.VALID_WORDS: 123 | analyzer = ConjunctionMorphologicalAnalyzer(word) 124 | analyzer.analyze() 125 | 126 | assert isinstance(analyzer.word, Conjunction) 127 | assert analyzer.word.content == word 128 | 129 | def test_analyze_match(self): 130 | for word in self.VALID_WORDS: 131 | analyzer = ConjunctionMorphologicalAnalyzer(word) 132 | analyzer.analyze() 133 | 134 | assert analyzer.matches is not None 135 | 136 | def test_analyze_return_false(self): 137 | for word in self.INVALID_WORDS: 138 | analyzer = ConjunctionMorphologicalAnalyzer(word) 139 | 140 | assert analyzer.analyze() is False 141 | 142 | def test_analyze_return_true(self): 143 | for word in self.VALID_WORDS: 144 | analyzer = ConjunctionMorphologicalAnalyzer(word) 145 | 146 | assert analyzer.analyze() 147 | 148 | 149 | def test_analyze_processed(self): 150 | for word in self.VALID_WORDS: 151 | analyzer = ConjunctionMorphologicalAnalyzer(word) 152 | 153 | assert analyzer.processed is False 154 | 155 | analyzer.analyze() 156 | 157 | assert analyzer.processed is True 158 | 159 | def test_analyze_processed_response(self): 160 | for word in self.VALID_WORDS: 161 | analyzer = ConjunctionMorphologicalAnalyzer(word) 162 | analyzer.analyze() 163 | 164 | assert analyzer.analyze() is None 165 | assert analyzer.analyze() is None 166 | 167 | class TestConjunctionMorphologicalAnalyzerConjuctionList: 168 | def test_conjunctions_not_empty(self): 169 | assert ConjunctionMorphologicalAnalyzer.CONJUNCTIONS_LIST is not None 170 | 171 | def test_conjunctions_not_size(self): 172 | assert len(ConjunctionMorphologicalAnalyzer.CONJUNCTIONS_LIST) == 25 173 | 174 | def test_conjunctions_match_list(self): 175 | for word in ConjunctionMorphologicalAnalyzer.CONJUNCTIONS_LIST: 176 | assert ConjunctionMorphologicalAnalyzer.CONJUCTIONS_MATCH_REGEXP.match(word) 177 | 178 | def test_conjunctions_match_final_regexp_list(self): 179 | for word in ConjunctionMorphologicalAnalyzer.CONJUNCTIONS_LIST: 180 | assert ConjunctionMorphologicalAnalyzer.MATCH_REGEXP.match(word) 181 | -------------------------------------------------------------------------------- /tests/test_morphological_sentence_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Word 9 | from esperanto_analyzer.speech import Adverb 10 | from esperanto_analyzer.speech import Adjective 11 | from esperanto_analyzer.speech import Article, InvalidArticleError 12 | from esperanto_analyzer.speech import Conjunction 13 | from esperanto_analyzer.speech import Interjection 14 | from esperanto_analyzer.speech import Noun 15 | from esperanto_analyzer.speech import Numeral 16 | from esperanto_analyzer.speech import Preposition 17 | from esperanto_analyzer.speech import Pronoun 18 | from esperanto_analyzer.speech import Verb 19 | 20 | from esperanto_analyzer import MorphologicalSentenceAnalyzer 21 | 22 | 23 | class TestMorphologicalSentenceAnalyzerBasic(): 24 | TEST_SENTENCE = 'Mi loĝas en Brazilo' 25 | 26 | def test_import(self): 27 | assert MorphologicalSentenceAnalyzer 28 | 29 | def test_initialize(self): 30 | assert MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 31 | 32 | def test_initialize_sentence(self): 33 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 34 | 35 | assert analyzer.sentence is self.TEST_SENTENCE 36 | 37 | def test_initialize_sentence_words(self): 38 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 39 | 40 | assert analyzer.sentence_words == ['Mi', 'loĝas', 'en', 'Brazilo'] 41 | 42 | def test_initialize_results(self): 43 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 44 | 45 | assert analyzer.results() is None 46 | 47 | def test_initialize_processed(self): 48 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 49 | 50 | assert analyzer.processed is False 51 | 52 | def test_analyze(self): 53 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 54 | 55 | assert analyzer.analyze() 56 | 57 | def test_analyze_results(self): 58 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 59 | 60 | assert analyzer.analyze() 61 | assert analyzer.results() is not None 62 | 63 | def test_analyze_results_size(self): 64 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 65 | 66 | assert analyzer.analyze() 67 | assert len(analyzer.results()) == 4 68 | assert len(analyzer.results()[1]) == 2 69 | 70 | def test_analyze_processed(self): 71 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 72 | 73 | assert analyzer.processed is False 74 | assert analyzer.analyze() 75 | assert analyzer.processed is True 76 | 77 | def test_analyze_processed_multiples_times(self): 78 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 79 | 80 | assert analyzer.processed is False 81 | assert analyzer.analyze() # First analyze 82 | assert analyzer.processed is True 83 | assert analyzer.analyze() is None 84 | assert analyzer.analyze() is None 85 | 86 | def test_analyze_internal_results_class(self): 87 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 88 | analyzer.analyze() 89 | 90 | classes_names = [an.__class__.__name__ for an in analyzer.internal_results] 91 | 92 | assert classes_names == ['MorphologicalAnalyzer', 'MorphologicalAnalyzer', 'MorphologicalAnalyzer', 'MorphologicalAnalyzer'] 93 | 94 | def test_analyzes_results_not_processed(self): 95 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 96 | 97 | assert analyzer.analyzes_results() is None 98 | 99 | def test_analyzes_internals_results_processed(self): 100 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 101 | analyzer.analyze() 102 | 103 | assert analyzer.analyzes_results() == [result.results for result in analyzer.internal_results] 104 | 105 | def test_analyzes_results_class(self): 106 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 107 | analyzer.analyze() 108 | 109 | classes_names = [analyze.__class__.__name__ for analyze in analyzer.analyzes_results()] 110 | 111 | assert classes_names == ['AnalyzeResult', 'AnalyzeResult', 'AnalyzeResult', 'AnalyzeResult'] 112 | 113 | def test_analyzes_results_class_result(self): 114 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 115 | analyzer.analyze() 116 | 117 | result_classes = [analyze.result.__class__.__name__ for analyze in analyzer.analyzes_results()] 118 | 119 | assert result_classes == ['PronounMorphologicalAnalyzer', 'VerbMorphologicalAnalyzer', 'PrepositionMorphologicalAnalyzer', 'NounMorphologicalAnalyzer'] 120 | 121 | def test_analyzes_results_word_classnames(self): 122 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 123 | analyzer.analyze() 124 | 125 | result_classes = [analyze.result.word.__class__.__name__ for analyze in analyzer.analyzes_results()] 126 | 127 | assert result_classes == ['Pronoun', 'Verb', 'Preposition', 'Noun'] 128 | 129 | def test_analyzes_results_raw_word(self): 130 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 131 | analyzer.analyze() 132 | 133 | words = [analyze.result.raw_word for analyze in analyzer.analyzes_results()] 134 | 135 | assert words == ['Mi', 'loĝas', 'en', 'Brazilo'] 136 | 137 | def test_analyzes_results_processed(self): 138 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 139 | analyzer.analyze() 140 | 141 | processed_status = [an.result.processed for an in analyzer.analyzes_results()] 142 | 143 | assert processed_status == [True, True, True, True] 144 | 145 | def test_analyzes_results_word_class(self): 146 | analyzer = MorphologicalSentenceAnalyzer(self.TEST_SENTENCE) 147 | analyzer.analyze() 148 | 149 | words_classes = [an.result.word_class() for an in analyzer.analyzes_results()] 150 | 151 | assert words_classes == [Pronoun, Verb, Preposition, Noun] 152 | 153 | 154 | def test_sentence_clean_regexp(self): 155 | sentence = '(Mia) [nomo] estas, Esperanto. Hodiau estas la jaro 2018. jes' 156 | new_sentence = re.sub(MorphologicalSentenceAnalyzer.SENTENCE_CLEAN_REGEXP, '', sentence) 157 | 158 | assert new_sentence == 'Mia nomo estas Esperanto Hodiau estas la jaro 2018 jes' 159 | 160 | def test_undefined_token(self): 161 | analyzer = MorphologicalSentenceAnalyzer('Mia asdiosdsds') 162 | analyzer.analyze() 163 | 164 | assert analyzer.simple_results() == [['Mia', 'Pronoun'], ['asdiosdsds', 'Undefined']] 165 | assert analyzer.simple_results()[1][1] == 'Undefined' 166 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_adverb_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Adverb 9 | from esperanto_analyzer.analyzers.morphological import AdverbMorphologicalAnalyzer 10 | 11 | class TestAdverbMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'bonege' 13 | 14 | def test_import(self): 15 | assert AdverbMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = AdverbMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp(self): 51 | assert AdverbMorphologicalAnalyzer.MATCH_REGEXP is not None 52 | 53 | def test_word_class(self): 54 | assert isinstance(AdverbMorphologicalAnalyzer.word_class()(self.TEST_WORD), Adverb) 55 | 56 | class TestAdverbMorphologicalAnalyzerMatchMethod(): 57 | VALID_WORDS = [ 58 | 'multe', 'bone', 'rapide', 'almenaŭ', 'ankoraŭ', 'ĝuste' 59 | ] 60 | 61 | INVALID_WORDS = [ 62 | 'io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', '?', '!', 63 | '[', ']', '{', '}', '|', '\\', '(', ')', '=', '+', '*', 64 | '&', '^', '%', '$', '#', '@', '`', '~', ';', ':', ',', '.', 65 | '<', '>', '/', 66 | '.!', '!', 'n!', 'jn!', 'j!', 67 | '..!', '..!', '..n!', '..jn!', 68 | '..ej!', '..ejn!', '..ej', '..ejn', 'ejn', 69 | '.!', '?', 'n?', 'jn?', 'j?', 70 | '90e', '000en', '999ejn', '000ej', '__ejn', '__en', '__e', 71 | 'bel0en', 'bel9ejn', '9belejn', '9bele', 'almen9ŭ', '.lmenaŭ', 72 | ] 73 | 74 | def test_match(self): 75 | for word in self.VALID_WORDS: 76 | analyzer = AdverbMorphologicalAnalyzer(word) 77 | matches = analyzer.match() 78 | 79 | assert matches is not None 80 | assert len(matches.span()) == 2 81 | 82 | def test_match_empty(self): 83 | for word in self.INVALID_WORDS: 84 | analyzer = AdverbMorphologicalAnalyzer(word) 85 | matches = analyzer.match() 86 | 87 | assert matches is None 88 | 89 | def test_match_regexp_value(self): 90 | assert AdverbMorphologicalAnalyzer.MATCH_REGEXP == re.compile('^(([a-zA-Zĉĝĵĥŝŭ]{2,}(e))|almenaŭ|ambaŭ|antaŭ|ankaŭ|ankoraŭ|apenaŭ|baldaŭ|ĉirkaŭ|hieraŭ|hodiaŭ|kvazaŭ|morgaŭ|preskaŭ|nun|tiam|ĉiam|neniam|tuj|jam|tie|tien|ĉie|nenie|for|eksteren|tre)$', re.IGNORECASE) 91 | 92 | class TestAdverbMorphologicalAnalyzerAnalyzeMethod(): 93 | INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi'] 94 | 95 | VALID_WORDS = [ 96 | 'multe', 'bone', 'rapide', 'almenaŭ', 'ankoraŭ', 'ĝuste' 97 | ] 98 | 99 | def test_invalid_analyze(self): 100 | for word in self.INVALID_WORDS: 101 | analyzer = AdverbMorphologicalAnalyzer(word) 102 | result = analyzer.analyze() 103 | 104 | assert not result 105 | 106 | def test_invalid_analyze_word(self): 107 | for word in self.INVALID_WORDS: 108 | analyzer = AdverbMorphologicalAnalyzer(word) 109 | analyzer.analyze() 110 | 111 | assert analyzer.word is None 112 | 113 | def test_invalid_analyze_match(self): 114 | for word in self.INVALID_WORDS: 115 | analyzer = AdverbMorphologicalAnalyzer(word) 116 | analyzer.analyze() 117 | 118 | assert analyzer.matches is None 119 | 120 | def test_analyze(self): 121 | for word in self.VALID_WORDS: 122 | analyzer = AdverbMorphologicalAnalyzer(word) 123 | 124 | assert analyzer.analyze() 125 | 126 | def test_adverbs_list(self): 127 | for word in AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS: 128 | analyzer = AdverbMorphologicalAnalyzer(word) 129 | 130 | assert analyzer.analyze() 131 | 132 | def test_analyze_word(self): 133 | for word in self.VALID_WORDS: 134 | analyzer = AdverbMorphologicalAnalyzer(word) 135 | analyzer.analyze() 136 | 137 | assert isinstance(analyzer.word, Adverb) 138 | assert analyzer.word.content == word 139 | 140 | def test_analyze_match(self): 141 | for word in self.VALID_WORDS: 142 | analyzer = AdverbMorphologicalAnalyzer(word) 143 | analyzer.analyze() 144 | 145 | assert analyzer.matches is not None 146 | 147 | def test_analyze_return_false(self): 148 | for word in self.INVALID_WORDS: 149 | analyzer = AdverbMorphologicalAnalyzer(word) 150 | 151 | assert analyzer.analyze() is False 152 | 153 | def test_analyze_return_true(self): 154 | for word in self.VALID_WORDS: 155 | analyzer = AdverbMorphologicalAnalyzer(word) 156 | 157 | assert analyzer.analyze() 158 | 159 | 160 | def test_analyze_processed(self): 161 | for word in self.VALID_WORDS: 162 | analyzer = AdverbMorphologicalAnalyzer(word) 163 | 164 | assert analyzer.processed is False 165 | 166 | analyzer.analyze() 167 | 168 | assert analyzer.processed is True 169 | 170 | def test_analyze_processed_response(self): 171 | for word in self.VALID_WORDS: 172 | analyzer = AdverbMorphologicalAnalyzer(word) 173 | analyzer.analyze() 174 | 175 | assert analyzer.analyze() is None 176 | assert analyzer.analyze() is None 177 | 178 | class TestAdverbMorphologicalAnalyzerAdversList: 179 | def test_adverbs_not_empty(self): 180 | assert AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS is not None 181 | 182 | def test_adverbs_not_size(self): 183 | assert len(AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS) == 26 184 | 185 | def test_adverbs_match_list(self): 186 | for word in AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS: 187 | assert AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS_MATCH_REGEXP.match(word) 188 | 189 | def test_adverbs_match_final_regexp_list(self): 190 | for word in AdverbMorphologicalAnalyzer.SPECIAL_ADVERBS: 191 | assert AdverbMorphologicalAnalyzer.MATCH_REGEXP.match(word) 192 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_interjection_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Interjection 9 | from esperanto_analyzer.analyzers.morphological import InterjectionMorphologicalAnalyzer 10 | 11 | class TestInterjectionMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'volapukaĵo!' 13 | 14 | def test_import(self): 15 | assert InterjectionMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = InterjectionMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp(self): 51 | assert InterjectionMorphologicalAnalyzer.MATCH_REGEXP is not None 52 | 53 | def test_word_class(self): 54 | assert isinstance(InterjectionMorphologicalAnalyzer.word_class()(self.TEST_WORD), Interjection) 55 | 56 | class TestInterjectionMorphologicalAnalyzerMatchMethod(): 57 | VALID_WORDS = ['Aĥ!', 'Aj!', 'Ba!', 'Baf!', 'Baj!', 'Be!', 'Bis!', 'Diable!', 'Ek!', 58 | 'Fi!', 'Fu!', 'Ĝis!', 'Ha!', 'Ha lo!', 'He!', 'Hej!', 'Ho!', 'Ho ve!', 59 | 'Hoj!', 'Hola!', 'Hu!', 'Hup!', 'Hura!', 'Lo!', 'Lu lu!', 'Nu!', 'Uf!', 60 | 'Up!', 'Ŭa!', 'Ve!', 'Volapukaĵo!', 'Jen' 61 | ] 62 | 63 | INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'multe', 'ankoraŭ', 'dek', 64 | 'du', 'ĉar', 'aŭ', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ'] 65 | 66 | def test_match(self): 67 | for word in self.VALID_WORDS: 68 | analyzer = InterjectionMorphologicalAnalyzer(word) 69 | matches = analyzer.match() 70 | 71 | assert matches is not None 72 | assert len(matches.span()) == 2 73 | 74 | def test_match_empty(self): 75 | for word in self.INVALID_WORDS: 76 | analyzer = InterjectionMorphologicalAnalyzer(word) 77 | matches = analyzer.match() 78 | 79 | assert matches is None 80 | 81 | class TestInterjectionMorphologicalAnalyzerAnalyzeMethod(): 82 | VALID_WORDS = ['Aĥ!', 'Aj!', 'Ba!', 'Baf!', 'Baj!', 'Be!', 'Bis!', 'Diable!', 'Ek!', 83 | 'Fi!', 'Fu!', 'Ĝis!', 'Ha!', 'Ha lo!', 'He!', 'Hej!', 'Ho!', 'Ho ve!', 84 | 'Hoj!', 'Hola!', 'Hu!', 'Hup!', 'Hura!', 'Lo!', 'Lu lu!', 'Nu!', 'Uf!', 85 | 'Up!', 'Ŭa!', 'Ve!', 'Volapukaĵo!', 'Jen' 86 | ] 87 | 88 | INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'multe', 'ankoraŭ', 'dek', 89 | 'du', 'ĉar', 'aŭ', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ'] 90 | 91 | def test_invalid_analyze(self): 92 | for word in self.INVALID_WORDS: 93 | analyzer = InterjectionMorphologicalAnalyzer(word) 94 | result = analyzer.analyze() 95 | 96 | assert not result 97 | 98 | def test_invalid_analyze_word(self): 99 | for word in self.INVALID_WORDS: 100 | analyzer = InterjectionMorphologicalAnalyzer(word) 101 | analyzer.analyze() 102 | 103 | assert analyzer.word is None 104 | 105 | def test_invalid_analyze_match(self): 106 | for word in self.INVALID_WORDS: 107 | analyzer = InterjectionMorphologicalAnalyzer(word) 108 | analyzer.analyze() 109 | 110 | assert analyzer.matches is None 111 | 112 | def test_analyze(self): 113 | for word in self.VALID_WORDS: 114 | analyzer = InterjectionMorphologicalAnalyzer(word) 115 | 116 | assert analyzer.analyze() 117 | 118 | def test_prepositions_list(self): 119 | for word in InterjectionMorphologicalAnalyzer.INTERJECTIONS_LIST: 120 | analyzer = InterjectionMorphologicalAnalyzer(word) 121 | 122 | assert analyzer.analyze() 123 | 124 | def test_analyze_word(self): 125 | for word in self.VALID_WORDS: 126 | analyzer = InterjectionMorphologicalAnalyzer(word) 127 | analyzer.analyze() 128 | 129 | assert isinstance(analyzer.word, Interjection) 130 | assert analyzer.word.content == word 131 | 132 | def test_analyze_match(self): 133 | for word in self.VALID_WORDS: 134 | analyzer = InterjectionMorphologicalAnalyzer(word) 135 | analyzer.analyze() 136 | 137 | assert analyzer.matches is not None 138 | 139 | def test_analyze_return_false(self): 140 | for word in self.INVALID_WORDS: 141 | analyzer = InterjectionMorphologicalAnalyzer(word) 142 | 143 | assert analyzer.analyze() is False 144 | 145 | def test_analyze_return_true(self): 146 | for word in self.VALID_WORDS: 147 | analyzer = InterjectionMorphologicalAnalyzer(word) 148 | 149 | assert analyzer.analyze() 150 | 151 | 152 | def test_analyze_processed(self): 153 | for word in self.VALID_WORDS: 154 | analyzer = InterjectionMorphologicalAnalyzer(word) 155 | 156 | assert analyzer.processed is False 157 | 158 | analyzer.analyze() 159 | 160 | assert analyzer.processed is True 161 | 162 | def test_analyze_processed_response(self): 163 | for word in self.VALID_WORDS: 164 | analyzer = InterjectionMorphologicalAnalyzer(word) 165 | analyzer.analyze() 166 | 167 | assert analyzer.analyze() is None 168 | assert analyzer.analyze() is None 169 | 170 | class TestInterjectionMorphologicalAnalyzerPrepositionsList: 171 | def test_prepositions_not_empty(self): 172 | assert InterjectionMorphologicalAnalyzer.INTERJECTIONS_LIST is not None 173 | 174 | def test_prepositions_not_size(self): 175 | assert len(InterjectionMorphologicalAnalyzer.INTERJECTIONS_LIST) == 32 176 | 177 | def test_prepositions_match_list(self): 178 | for word in InterjectionMorphologicalAnalyzer.INTERJECTIONS_LIST: 179 | assert InterjectionMorphologicalAnalyzer.INTERJECTIONS_MATCH_REGEXP.match(word) 180 | 181 | def test_prepositions_match_final_regexp_list(self): 182 | for word in InterjectionMorphologicalAnalyzer.INTERJECTIONS_LIST: 183 | assert InterjectionMorphologicalAnalyzer.MATCH_REGEXP.match(word) 184 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # sample documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Apr 16 21:22:43 2012. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = [] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = u'sample' 44 | copyright = u'2012, Kenneth Reitz' 45 | 46 | # The version info for the project you're documenting, acts as replacement for 47 | # |version| and |release|, also used in various other places throughout the 48 | # built documents. 49 | # 50 | # The short X.Y version. 51 | version = 'v0.0.1' 52 | # The full version, including alpha/beta/rc tags. 53 | release = 'v0.0.1' 54 | 55 | # The language for content autogenerated by Sphinx. Refer to documentation 56 | # for a list of supported languages. 57 | #language = None 58 | 59 | # There are two options for replacing |today|: either, you set today to some 60 | # non-false value, then it is used: 61 | #today = '' 62 | # Else, today_fmt is used as the format for a strftime call. 63 | #today_fmt = '%B %d, %Y' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | exclude_patterns = ['_build'] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | 90 | # -- Options for HTML output --------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. See the documentation for 93 | # a list of builtin themes. 94 | html_theme = 'default' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | #html_theme_options = {} 100 | 101 | # Add any paths that contain custom themes here, relative to this directory. 102 | #html_theme_path = [] 103 | 104 | # The name for this set of Sphinx documents. If None, it defaults to 105 | # " v documentation". 106 | #html_title = None 107 | 108 | # A shorter title for the navigation bar. Default is the same as html_title. 109 | #html_short_title = None 110 | 111 | # The name of an image file (relative to this directory) to place at the top 112 | # of the sidebar. 113 | #html_logo = None 114 | 115 | # The name of an image file (within the static path) to use as favicon of the 116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 117 | # pixels large. 118 | #html_favicon = None 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ['_static'] 124 | 125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 126 | # using the given strftime format. 127 | #html_last_updated_fmt = '%b %d, %Y' 128 | 129 | # If true, SmartyPants will be used to convert quotes and dashes to 130 | # typographically correct entities. 131 | #html_use_smartypants = True 132 | 133 | # Custom sidebar templates, maps document names to template names. 134 | #html_sidebars = {} 135 | 136 | # Additional templates that should be rendered to pages, maps page names to 137 | # template names. 138 | #html_additional_pages = {} 139 | 140 | # If false, no module index is generated. 141 | #html_domain_indices = True 142 | 143 | # If false, no index is generated. 144 | #html_use_index = True 145 | 146 | # If true, the index is split into individual pages for each letter. 147 | #html_split_index = False 148 | 149 | # If true, links to the reST sources are added to the pages. 150 | #html_show_sourcelink = True 151 | 152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 153 | #html_show_sphinx = True 154 | 155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 156 | #html_show_copyright = True 157 | 158 | # If true, an OpenSearch description file will be output, and all pages will 159 | # contain a tag referring to it. The value of this option must be the 160 | # base URL from which the finished HTML is served. 161 | #html_use_opensearch = '' 162 | 163 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 164 | #html_file_suffix = None 165 | 166 | # Output file base name for HTML help builder. 167 | htmlhelp_basename = 'sampledoc' 168 | 169 | 170 | # -- Options for LaTeX output -------------------------------------------------- 171 | 172 | latex_elements = { 173 | # The paper size ('letterpaper' or 'a4paper'). 174 | #'papersize': 'letterpaper', 175 | 176 | # The font size ('10pt', '11pt' or '12pt'). 177 | #'pointsize': '10pt', 178 | 179 | # Additional stuff for the LaTeX preamble. 180 | #'preamble': '', 181 | } 182 | 183 | # Grouping the document tree into LaTeX files. List of tuples 184 | # (source start file, target name, title, author, documentclass [howto/manual]). 185 | latex_documents = [ 186 | ('index', 'sample.tex', u'sample Documentation', 187 | u'Kenneth Reitz', 'manual'), 188 | ] 189 | 190 | # The name of an image file (relative to this directory) to place at the top of 191 | # the title page. 192 | #latex_logo = None 193 | 194 | # For "manual" documents, if this is true, then toplevel headings are parts, 195 | # not chapters. 196 | #latex_use_parts = False 197 | 198 | # If true, show page references after internal links. 199 | #latex_show_pagerefs = False 200 | 201 | # If true, show URL addresses after external links. 202 | #latex_show_urls = False 203 | 204 | # Documents to append as an appendix to all manuals. 205 | #latex_appendices = [] 206 | 207 | # If false, no module index is generated. 208 | #latex_domain_indices = True 209 | 210 | 211 | # -- Options for manual page output -------------------------------------------- 212 | 213 | # One entry per manual page. List of tuples 214 | # (source start file, name, description, authors, manual section). 215 | man_pages = [ 216 | ('index', 'sample', u'sample Documentation', 217 | [u'Kenneth Reitz'], 1) 218 | ] 219 | 220 | # If true, show URL addresses after external links. 221 | #man_show_urls = False 222 | 223 | 224 | # -- Options for Texinfo output ------------------------------------------------ 225 | 226 | # Grouping the document tree into Texinfo files. List of tuples 227 | # (source start file, target name, title, author, 228 | # dir menu entry, description, category) 229 | texinfo_documents = [ 230 | ('index', 'sample', u'sample Documentation', 231 | u'Kenneth Reitz', 'sample', 'One line description of project.', 232 | 'Miscellaneous'), 233 | ] 234 | 235 | # Documents to append as an appendix to all manuals. 236 | #texinfo_appendices = [] 237 | 238 | # If false, no module index is generated. 239 | #texinfo_domain_indices = True 240 | 241 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 242 | #texinfo_show_urls = 'footnote' 243 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_numeral_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Numeral 9 | from esperanto_analyzer.analyzers.morphological import NumeralMorphologicalAnalyzer 10 | 11 | class TestNumeralMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'dek' 13 | 14 | def test_import(self): 15 | assert NumeralMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = NumeralMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp(self): 51 | assert NumeralMorphologicalAnalyzer.MATCH_REGEXP is not None 52 | 53 | def test_regexp_value(self): 54 | assert NumeralMorphologicalAnalyzer.MATCH_REGEXP == re.compile('^(-?\\d+|nul|unu|du|tri|kvar|kvin|ses|sep|ok|naŭ|dek|(unu|du|tri|kvar|kvin|ses|sep|ok|naŭ|dek)?(dek|cent|milionoj|miliono|miliardoj|miliardo|bilionoj|biliono|mil))$', re.IGNORECASE) 55 | 56 | def test_word_class(self): 57 | isinstance(NumeralMorphologicalAnalyzer.word_class()(self.TEST_WORD), Numeral) 58 | 59 | class TestNumeralMorphologicalAnalyzerMatchMethod(): 60 | VALID_WORDS = [ 61 | 'unu', 'du', 'tri', 'kvar', 'kvin', 'ses', 'sep', 'ok', 'naŭ', 'dek', 62 | 'dudek', 'tridek', 'kvardek', 'kvindek', 'sesdek', 'sepdek', 'okdek', 'naŭdek', 63 | 'cent', 'ducent', 'tricent', 'kvarcent', 'kvincent', 'sescent', 'sepcent', 'okcent', 'naŭcent', 64 | 'mil', 'dumil', 'miliardo', 'miliono', 'miliardoj', 'milionoj' 65 | ] 66 | 67 | VALID_DIGITS = ['10', '20', '-1', '0', '102041', '9992232213'] 68 | 69 | INVALID_DIGITS = ['a10', '2a0', '-1x', '01#', '102041@', '!9992232213'] 70 | 71 | INVALID_WORDS = ['io', 'lo', 'bela', 'la', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', '?', '!'] 72 | 73 | def test_match(self): 74 | for word in self.VALID_WORDS: 75 | analyzer = NumeralMorphologicalAnalyzer(word) 76 | matches = analyzer.match() 77 | 78 | assert matches is not None 79 | assert len(matches.span()) == 2 80 | 81 | def test_match_digits(self): 82 | for word in self.VALID_DIGITS: 83 | analyzer = NumeralMorphologicalAnalyzer(word) 84 | matches = analyzer.match() 85 | 86 | assert matches is not None 87 | assert len(matches.span()) == 2 88 | 89 | def test_match_invalid_digits(self): 90 | for word in self.INVALID_DIGITS: 91 | analyzer = NumeralMorphologicalAnalyzer(word) 92 | matches = analyzer.match() 93 | 94 | assert matches is None 95 | 96 | def test_match_empty(self): 97 | for word in self.INVALID_WORDS: 98 | analyzer = NumeralMorphologicalAnalyzer(word) 99 | matches = analyzer.match() 100 | 101 | assert matches is None 102 | 103 | class TestNumeralMorphologicalAnalyzerAnalyzeMethod(): 104 | VALID_WORDS = [ 105 | 'unu', 'du', 'tri', 'kvar', 'kvin', 'ses', 'sep', 'ok', 'naŭ', 'dek', 106 | 'dudek', 'tridek', 'kvardek', 'kvindek', 'sesdek', 'sepdek', 'okdek', 'naŭdek', 107 | 'cent', 'ducent', 'tricent', 'kvarcent', 'kvincent', 'sescent', 'sepcent', 'okcent', 'naŭcent', 108 | 'mil', 'dumil', 'miliardo', 'miliono', 'miliardoj', 'milionoj', 109 | '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 110 | '11', '20', '-1', '0', '102041', '9992232213' 111 | ] 112 | 113 | INVALID_WORDS = ['io', 'lo', 'bela', 'la', 'kiu', 'vi', 'kun', 'multe', 'ankoraŭ', 114 | 'a10', '2a0', '-1x', '01#', '102041@', '!9992232213' 115 | ] 116 | 117 | def test_invalid_analyze(self): 118 | for word in self.INVALID_WORDS: 119 | analyzer = NumeralMorphologicalAnalyzer(word) 120 | result = analyzer.analyze() 121 | 122 | assert not result 123 | 124 | def test_invalid_analyze_word(self): 125 | for word in self.INVALID_WORDS: 126 | analyzer = NumeralMorphologicalAnalyzer(word) 127 | analyzer.analyze() 128 | 129 | assert analyzer.word is None 130 | 131 | def test_invalid_analyze_match(self): 132 | for word in self.INVALID_WORDS: 133 | analyzer = NumeralMorphologicalAnalyzer(word) 134 | analyzer.analyze() 135 | 136 | assert analyzer.matches is None 137 | 138 | def test_analyze(self): 139 | for word in self.VALID_WORDS: 140 | analyzer = NumeralMorphologicalAnalyzer(word) 141 | 142 | assert analyzer.analyze() 143 | 144 | def test_analyze_word(self): 145 | for word in self.VALID_WORDS: 146 | analyzer = NumeralMorphologicalAnalyzer(word) 147 | analyzer.analyze() 148 | 149 | assert isinstance(analyzer.word, Numeral) 150 | assert analyzer.word.content == word 151 | 152 | def test_analyze_match(self): 153 | for word in self.VALID_WORDS: 154 | analyzer = NumeralMorphologicalAnalyzer(word) 155 | analyzer.analyze() 156 | 157 | assert analyzer.matches is not None 158 | 159 | def test_analyze_return_false(self): 160 | for word in self.INVALID_WORDS: 161 | analyzer = NumeralMorphologicalAnalyzer(word) 162 | 163 | assert analyzer.analyze() is False 164 | 165 | def test_analyze_return_true(self): 166 | for word in self.VALID_WORDS: 167 | analyzer = NumeralMorphologicalAnalyzer(word) 168 | 169 | assert analyzer.analyze() 170 | 171 | 172 | def test_analyze_processed(self): 173 | for word in self.VALID_WORDS: 174 | analyzer = NumeralMorphologicalAnalyzer(word) 175 | 176 | assert analyzer.processed is False 177 | 178 | analyzer.analyze() 179 | 180 | assert analyzer.processed is True 181 | 182 | def test_analyze_processed_response(self): 183 | for word in self.VALID_WORDS: 184 | analyzer = NumeralMorphologicalAnalyzer(word) 185 | analyzer.analyze() 186 | 187 | assert analyzer.analyze() is None 188 | assert analyzer.analyze() is None 189 | 190 | class TestNumeralMorphologicalAnalyzerBasicNumbersList: 191 | def test_numbers_not_empty(self): 192 | assert NumeralMorphologicalAnalyzer.BASIC_NUMBERS_LIST is not None 193 | 194 | def test_basic_numbers_included(self): 195 | for number in ['nul', 'unu', 'du', 'tri', 'kvar', 'kvin', 'ses', 'sep', 'ok', 'naŭ', 'dek']: 196 | assert number in NumeralMorphologicalAnalyzer.BASIC_NUMBERS_LIST 197 | 198 | def test_numbers_not_size(self): 199 | assert len(NumeralMorphologicalAnalyzer.BASIC_NUMBERS_LIST) == 11 200 | 201 | def test_numbers_match_list(self): 202 | for word in NumeralMorphologicalAnalyzer.BASIC_NUMBERS_LIST: 203 | assert NumeralMorphologicalAnalyzer.BASIC_NUMBERS_REGEXP.match(word) 204 | 205 | def test_numbers_match_final_regexp_list(self): 206 | for word in NumeralMorphologicalAnalyzer.BASIC_NUMBERS_LIST: 207 | assert NumeralMorphologicalAnalyzer.MATCH_REGEXP.match(word) 208 | 209 | def test_others_numbers_regexp(self): 210 | for word in ['dudek', 'tridek', 'ducent', 'dumil', 'trimil', 'mil', 'miliono', 'milionoj', 'cent', 'dek', 'miliardo']: 211 | assert NumeralMorphologicalAnalyzer.OTHERS_NUMBERS_REGEXP.match(word) 212 | 213 | def test_numbers_digit_regexp(self): 214 | for word in ['1', '20', '300', '999999', '-10']: 215 | assert NumeralMorphologicalAnalyzer.NUMBERS_DIGIT_REGEXP.match(word) 216 | 217 | def test_invalid_others_numbers_regexp(self): 218 | for word in ['domo', 'la', 'multe', 'bela', 'belajn', 'a0x']: 219 | assert NumeralMorphologicalAnalyzer.OTHERS_NUMBERS_REGEXP.match(word) is None 220 | 221 | def test_invalid_numbers_digit_regexp(self): 222 | for word in ['@', '!10', '*10*']: 223 | assert NumeralMorphologicalAnalyzer.NUMBERS_DIGIT_REGEXP.match(word) is None 224 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_preposition_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Preposition 9 | from esperanto_analyzer.analyzers.morphological import PrepositionMorphologicalAnalyzer 10 | 11 | class TestPrepositionMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'anstataŭ' 13 | 14 | def test_import(self): 15 | assert PrepositionMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = PrepositionMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp(self): 51 | assert PrepositionMorphologicalAnalyzer.MATCH_REGEXP is not None 52 | 53 | def test_word_class(self): 54 | assert isinstance(PrepositionMorphologicalAnalyzer.word_class()(self.TEST_WORD), Preposition) 55 | 56 | class TestPrepositionMorphologicalAnalyzerMatchMethod(): 57 | VALID_WORDS = ['K', 'al', 'anstataŭ', 'antaŭ', 'antaŭ ol', 'apud', 'da', 'de', 'disde', 58 | 'du vortoj', 'dum', 'ekde', 'ekster', 'eksteren', 'el', 'en', 'ene', 59 | 'estiel', 'far', 'fare de', 'flanke de', 'for de', 'graŭ', 'inter', 'je', 60 | 'kaj ankaŭ', 'kiel', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ', 61 | 'mala', 'malantaŭ', 'malgraŭ', 'malkiel', 'malsupre de', 'malsupren', 62 | 'meze de', 'na', 'nome de', 'ol', 'per', 'pere de', 'plus', 'po', 'por', 63 | 'post', 'preter', 'pri', 'pro', 'proksime de', 'samkiel', 'sed', 'sekva', 64 | 'sen', 'sub', 'suben', 'super', 'supren', 'sur', 'tiu', 'tiuj', 'tra', 65 | 'trans', 'tri vortoj', 'tuj post', 'tutĉirkaŭ', 66 | 'ĉe', 'ĉi tiu', 'ĉi tiuj', 'ĉirkaŭ', 'ĝis' 67 | ] 68 | 69 | INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'multe', 'ankoraŭ', 'dek', 70 | 'du', 'ĉar', 'aŭ', '?', '!'] 71 | 72 | def test_match(self): 73 | for word in self.VALID_WORDS: 74 | analyzer = PrepositionMorphologicalAnalyzer(word) 75 | matches = analyzer.match() 76 | 77 | assert matches is not None 78 | assert len(matches.span()) == 2 79 | 80 | def test_match_empty(self): 81 | for word in self.INVALID_WORDS: 82 | analyzer = PrepositionMorphologicalAnalyzer(word) 83 | matches = analyzer.match() 84 | 85 | assert matches is None 86 | 87 | class TestPrepositionMorphologicalAnalyzerAnalyzeMethod(): 88 | VALID_WORDS = ['K', 'al', 'anstataŭ', 'antaŭ', 'antaŭ ol', 'apud', 'da', 'de', 'disde', 89 | 'du vortoj', 'dum', 'ekde', 'ekster', 'eksteren', 'el', 'en', 'ene', 90 | 'estiel', 'far', 'fare de', 'flanke de', 'for de', 'graŭ', 'inter', 'je', 91 | 'kaj ankaŭ', 'kiel', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ', 92 | 'mala', 'malantaŭ', 'malgraŭ', 'malkiel', 'malsupre de', 'malsupren', 93 | 'meze de', 'na', 'nome de', 'ol', 'per', 'pere de', 'plus', 'po', 'por', 94 | 'post', 'preter', 'pri', 'pro', 'proksime de', 'samkiel', 'sed', 'sekva', 95 | 'sen', 'sub', 'suben', 'super', 'supren', 'sur', 'tiu', 'tiuj', 'tra', 96 | 'trans', 'tri vortoj', 'tuj post', 'tutĉirkaŭ', 97 | 'ĉe', 'ĉi tiu', 'ĉi tiuj', 'ĉirkaŭ', 'ĝis', 98 | ] 99 | 100 | INVALID_WORDS = ['io', 'bela', 'domo', 'hundoj', 'kiu', 'vi', 'multe', 'ankoraŭ', 'dek', 101 | 'du', 'ĉar', 'aŭ', '?', '!'] 102 | 103 | def test_invalid_analyze(self): 104 | for word in self.INVALID_WORDS: 105 | analyzer = PrepositionMorphologicalAnalyzer(word) 106 | result = analyzer.analyze() 107 | 108 | assert not result 109 | 110 | def test_invalid_analyze_word(self): 111 | for word in self.INVALID_WORDS: 112 | analyzer = PrepositionMorphologicalAnalyzer(word) 113 | analyzer.analyze() 114 | 115 | assert analyzer.word is None 116 | 117 | def test_invalid_analyze_match(self): 118 | for word in self.INVALID_WORDS: 119 | analyzer = PrepositionMorphologicalAnalyzer(word) 120 | analyzer.analyze() 121 | 122 | assert analyzer.matches is None 123 | 124 | def test_analyze(self): 125 | for word in self.VALID_WORDS: 126 | analyzer = PrepositionMorphologicalAnalyzer(word) 127 | 128 | assert analyzer.analyze() 129 | 130 | def test_prepositions_list(self): 131 | for word in PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST: 132 | analyzer = PrepositionMorphologicalAnalyzer(word) 133 | 134 | assert analyzer.analyze() 135 | 136 | def test_analyze_word(self): 137 | for word in self.VALID_WORDS: 138 | analyzer = PrepositionMorphologicalAnalyzer(word) 139 | analyzer.analyze() 140 | 141 | assert isinstance(analyzer.word, Preposition) 142 | assert analyzer.word.content == word 143 | 144 | def test_analyze_match(self): 145 | for word in self.VALID_WORDS: 146 | analyzer = PrepositionMorphologicalAnalyzer(word) 147 | analyzer.analyze() 148 | 149 | assert analyzer.matches is not None 150 | 151 | def test_analyze_return_false(self): 152 | for word in self.INVALID_WORDS: 153 | analyzer = PrepositionMorphologicalAnalyzer(word) 154 | 155 | assert analyzer.analyze() is False 156 | 157 | def test_analyze_return_true(self): 158 | for word in self.VALID_WORDS: 159 | analyzer = PrepositionMorphologicalAnalyzer(word) 160 | 161 | assert analyzer.analyze() 162 | 163 | 164 | def test_analyze_processed(self): 165 | for word in self.VALID_WORDS: 166 | analyzer = PrepositionMorphologicalAnalyzer(word) 167 | 168 | assert analyzer.processed is False 169 | 170 | analyzer.analyze() 171 | 172 | assert analyzer.processed is True 173 | 174 | def test_analyze_processed_response(self): 175 | for word in self.VALID_WORDS: 176 | analyzer = PrepositionMorphologicalAnalyzer(word) 177 | analyzer.analyze() 178 | 179 | assert analyzer.analyze() is None 180 | assert analyzer.analyze() is None 181 | 182 | class TestPrepositionMorphologicalAnalyzerPrepositionsList: 183 | PREPOSITIONS_LIST = ['K', 'al', 'anstataŭ', 'antaŭ', 'antaŭ ol', 'apud', 'da', 'de', 'disde', 184 | 'du vortoj', 'dum', 'ekde', 'ekster', 'eksteren', 'el', 'en', 'ene', 185 | 'estiel', 'far', 'fare de', 'flanke de', 'for de', 'graŭ', 'inter', 'je', 186 | 'kaj ankaŭ', 'kiel', 'kontraŭ', 'kontraŭe de', 'krom', 'kun', 'laŭ', 187 | 'mala', 'malantaŭ', 'malgraŭ', 'malkiel', 'malsupre de', 'malsupren', 188 | 'meze de', 'na', 'nome de', 'ol', 'per', 'pere de', 'plus', 'po', 'por', 189 | 'post', 'preter', 'pri', 'pro', 'proksime de', 'samkiel', 'sed', 'sekva', 190 | 'sen', 'sub', 'suben', 'super', 'supren', 'sur', 'tiu', 'tiuj', 'tra', 191 | 'trans', 'tri vortoj', 'tuj post', 'tutĉirkaŭ', 192 | 'ĉe', 'ĉi tiu', 'ĉi tiuj', 'ĉirkaŭ', 'ĝis'] 193 | 194 | def test_preposition_list_not_changed(self): 195 | assert PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST == self.PREPOSITIONS_LIST 196 | 197 | def test_prepositions_not_empty(self): 198 | assert PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST is not None 199 | 200 | def test_prepositions_not_size(self): 201 | assert len(PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST) == 73 202 | 203 | def test_prepositions_match_list(self): 204 | for word in PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST: 205 | assert PrepositionMorphologicalAnalyzer.PROPOSITIONS_MATCH_REGEXP.match(word) 206 | 207 | def test_prepositions_match_final_regexp_list(self): 208 | for word in PrepositionMorphologicalAnalyzer.PREPOSITIONS_LIST: 209 | assert PrepositionMorphologicalAnalyzer.MATCH_REGEXP.match(word) 210 | -------------------------------------------------------------------------------- /tests/analyzers/morphological/test_pronoun_morphological_analyzer.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-docstring,no-self-use 2 | 3 | import re 4 | import pytest 5 | 6 | from context import esperanto_analyzer 7 | 8 | from esperanto_analyzer.speech import Pronoun 9 | from esperanto_analyzer.analyzers.morphological import PronounMorphologicalAnalyzer 10 | 11 | class TestPronounMorphologicalAnalyzerBasic(): 12 | TEST_WORD = 'mi' 13 | 14 | def test_import(self): 15 | assert PronounMorphologicalAnalyzer 16 | 17 | def test_initialize_default_options(self): 18 | analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD) 19 | 20 | assert analyzer.options == dict() 21 | 22 | def test_initialize_overwrite_options(self): 23 | analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD, dict(option='ok')) 24 | 25 | assert analyzer.options == dict(option='ok') 26 | 27 | def test_initialize_raw_word(self): 28 | analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD) 29 | 30 | assert analyzer.raw_word == self.TEST_WORD 31 | 32 | def test_initialize_word(self): 33 | analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD) 34 | 35 | # analyzer.word is only populated after calling `analyze()` method 36 | assert analyzer.word is None 37 | 38 | def test_initialize_matches(self): 39 | analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD) 40 | 41 | # analyzer.matches is only populated after calling `analyze()` method 42 | assert analyzer.matches is None 43 | 44 | def test_initialize_processed(self): 45 | analyzer = PronounMorphologicalAnalyzer(self.TEST_WORD) 46 | 47 | # analyzer.matches is only populated after calling `analyze()` method 48 | assert analyzer.processed is False 49 | 50 | def test_match_regexp(self): 51 | assert PronounMorphologicalAnalyzer.MATCH_REGEXP is not None 52 | 53 | def test_word_class(self): 54 | isinstance(PronounMorphologicalAnalyzer.word_class()(self.TEST_WORD), Pronoun) 55 | 56 | class TestPronounMorphologicalAnalyzerMatchMethod(): 57 | VALID_WORDS = [ 58 | 'mi', 'vi', 'li', 'ŝi', 'ĝi', 'oni', 'ili', 'ni', 59 | 'min', 'vin', 'lin', 'ŝin', 'ĝin', 'onin', 'ilin', 'nin', 60 | 'mia', 'via', 'lia', 'ŝia', 'ĝia', 'onia', 'ilia', 'nia', 61 | 'miaj', 'viaj', 'liaj', 'ŝiaj', 'ĝiaj', 'oniaj', 'iliaj', 'niaj', 62 | 'mian', 'vian', 'lian', 'ŝian', 'ĝian', 'onian', 'ilian', 'nian', 63 | 'miajn', 'viajn', 'liajn', 'ŝiajn', 'ĝiajn', 'oniajn', 'iliajn', 'niajn', 64 | 'kiu', 'kio', 'kies', 'tiu', 'ĉi tiu', 'tia', 65 | 'nenio', 'neniu', 'ĉio', 'ĉiu', 'io', 'iu', 'io ajn', 'iu ajn', 66 | 'nenion', 'neniun', 'ĉion', 'ĉiun', 'ion', 'iun', 'io ajn', 'iu ajn', 67 | 'io ajn', 'ĉio ajn', 'iu ajn', 'ĉiu ajn' 68 | ] 69 | 70 | INVALID_WORDS = ['lo', 'bela', 'la', 'kun', 'multe', 'ankoraŭ', 71 | 'a10', '2a0', '-1x', '01#', '102041@', '!9992232213', 'ilianj', 72 | 'ilimia', 'miaan', 'miani', 'vianj' 73 | ] 74 | 75 | def test_match(self): 76 | for word in self.VALID_WORDS: 77 | analyzer = PronounMorphologicalAnalyzer(word) 78 | matches = analyzer.match() 79 | 80 | assert matches is not None 81 | assert len(matches.span()) == 2 82 | 83 | def test_match_empty(self): 84 | for word in self.INVALID_WORDS: 85 | analyzer = PronounMorphologicalAnalyzer(word) 86 | matches = analyzer.match() 87 | assert matches is None 88 | 89 | class TestPronounMorphologicalAnalyzerAnalyzeMethod(): 90 | VALID_WORDS = [ 91 | 'mi', 'vi', 'li', 'ŝi', 'ĝi', 'oni', 'ili', 'ni', 92 | 'min', 'vin', 'lin', 'ŝin', 'ĝin', 'onin', 'ilin', 'nin', 93 | 'mia', 'via', 'lia', 'ŝia', 'ĝia', 'onia', 'ilia', 'nia', 94 | 'miaj', 'viaj', 'liaj', 'ŝiaj', 'ĝiaj', 'oniaj', 'iliaj', 'niaj', 95 | 'mian', 'vian', 'lian', 'ŝian', 'ĝian', 'onian', 'ilian', 'nian', 96 | 'miajn', 'viajn', 'liajn', 'ŝiajn', 'ĝiajn', 'oniajn', 'iliajn', 'niajn', 97 | 'kiu', 'kio', 'kies', 'tiu', 'ĉi tiu', 'tia', 98 | 'nenio', 'neniu', 'ĉio', 'ĉiu', 'io', 'iu', 'io ajn', 'iu ajn', 99 | 'nenion', 'neniun', 'ĉion', 'ĉiun', 'ion', 'iun', 'io ajn', 'iu ajn', 100 | 'io ajn', 'ĉio ajn', 'iu ajn', 'ĉiu ajn' 101 | ] 102 | 103 | INVALID_WORDS = ['lo', 'bela', 'la', 'kun', 'multe', 'ankoraŭ', 104 | 'a10', '2a0', '-1x', '01#', '102041@', '!9992232213', 'ilianj', 105 | 'ilimia', 'miaan', 'miani', 'vianj' 106 | ] 107 | 108 | def test_invalid_analyze(self): 109 | for word in self.INVALID_WORDS: 110 | analyzer = PronounMorphologicalAnalyzer(word) 111 | result = analyzer.analyze() 112 | 113 | assert not result 114 | 115 | def test_invalid_analyze_word(self): 116 | for word in self.INVALID_WORDS: 117 | analyzer = PronounMorphologicalAnalyzer(word) 118 | analyzer.analyze() 119 | 120 | assert analyzer.word is None 121 | 122 | def test_invalid_analyze_match(self): 123 | for word in self.INVALID_WORDS: 124 | analyzer = PronounMorphologicalAnalyzer(word) 125 | analyzer.analyze() 126 | 127 | assert analyzer.matches is None 128 | 129 | def test_analyze(self): 130 | for word in self.VALID_WORDS: 131 | analyzer = PronounMorphologicalAnalyzer(word) 132 | 133 | assert analyzer.analyze() 134 | 135 | def test_analyze_word(self): 136 | for word in self.VALID_WORDS: 137 | analyzer = PronounMorphologicalAnalyzer(word) 138 | analyzer.analyze() 139 | 140 | assert isinstance(analyzer.word, Pronoun) 141 | assert analyzer.word.content == word 142 | 143 | def test_analyze_match(self): 144 | for word in self.VALID_WORDS: 145 | analyzer = PronounMorphologicalAnalyzer(word) 146 | analyzer.analyze() 147 | 148 | assert analyzer.matches is not None 149 | 150 | def test_analyze_return_false(self): 151 | for word in self.INVALID_WORDS: 152 | analyzer = PronounMorphologicalAnalyzer(word) 153 | 154 | assert analyzer.analyze() is False 155 | 156 | def test_analyze_return_true(self): 157 | for word in self.VALID_WORDS: 158 | analyzer = PronounMorphologicalAnalyzer(word) 159 | 160 | assert analyzer.analyze() 161 | 162 | 163 | def test_analyze_processed(self): 164 | for word in self.VALID_WORDS: 165 | analyzer = PronounMorphologicalAnalyzer(word) 166 | 167 | assert analyzer.processed is False 168 | 169 | analyzer.analyze() 170 | 171 | assert analyzer.processed is True 172 | 173 | def test_analyze_processed_response(self): 174 | for word in self.VALID_WORDS: 175 | analyzer = PronounMorphologicalAnalyzer(word) 176 | analyzer.analyze() 177 | 178 | assert analyzer.analyze() is None 179 | assert analyzer.analyze() is None 180 | 181 | class TestPronounMorphologicalAnalyzerPersonalPronounsList: 182 | BASIC_PERSONAL_PRONOUNS = ['mi', 'vi','li', 'ŝi', 'ĝi', 'oni', 'ili'] 183 | 184 | def test_pronouns_not_empty(self): 185 | assert PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST is not None 186 | 187 | def test_basic_pronouns_included(self): 188 | for number in self.BASIC_PERSONAL_PRONOUNS: 189 | assert number in PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST 190 | 191 | def test_pronouns_not_size(self): 192 | assert len(PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST) == 8 193 | 194 | def test_pronouns_list_match_regexp(self): 195 | for word in PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST: 196 | assert PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST_REGEXP.match(word) 197 | 198 | def test_pronouns_match_hardcoded_list(self): 199 | for word in self.BASIC_PERSONAL_PRONOUNS: 200 | assert PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST_REGEXP.match(word) 201 | 202 | def test_pronouns_match_final_regexp_list(self): 203 | for word in PronounMorphologicalAnalyzer.PERSONAL_PRONOUNS_LIST: 204 | assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word) 205 | 206 | def test_pronouns_acusative_match_final_regexp(self): 207 | for word in self.BASIC_PERSONAL_PRONOUNS: 208 | assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word) 209 | 210 | def test_pronouns_acusative_match_final_regexp_list(self): 211 | for word in self.BASIC_PERSONAL_PRONOUNS: 212 | assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word + 'n') 213 | 214 | class TestPronounMorphologicalAnalyzerPossessivePronounsList: 215 | BASIC_POSSESSIVE_PRONOUNS = [ 216 | 'mia', 'via', 'lia', 'ŝia', 'ĝia', 'onia', 'nia', 'ilia' 217 | ] 218 | 219 | def test_pronouns_not_empty(self): 220 | assert PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST is not None 221 | 222 | def test_basic_pronouns_included(self): 223 | for number in self.BASIC_POSSESSIVE_PRONOUNS: 224 | assert number in PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST 225 | 226 | def test_pronouns_not_size(self): 227 | assert len(PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST) == 8 228 | 229 | def test_pronouns_list_match_regexp(self): 230 | for word in PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST: 231 | assert PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST_REGEXP.match(word) 232 | 233 | def test_pronouns_match_hardcoded_list(self): 234 | for word in self.BASIC_POSSESSIVE_PRONOUNS: 235 | assert PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST_REGEXP.match(word) 236 | 237 | def test_pronouns_match_final_regexp_list(self): 238 | for word in PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST: 239 | assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word) 240 | 241 | def test_pronouns_plural_match_final_regexp_list(self): 242 | for word in self.BASIC_POSSESSIVE_PRONOUNS: 243 | assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word + 'j') 244 | 245 | def test_pronouns_plural_acusative_match_final_regexp_list(self): 246 | for word in self.BASIC_POSSESSIVE_PRONOUNS: 247 | assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word + 'jn') 248 | 249 | def test_pronouns_acusative_match_final_regexp_list(self): 250 | for word in self.BASIC_POSSESSIVE_PRONOUNS: 251 | assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word + 'n') 252 | 253 | 254 | class TestPronounMorphologicalAnalyzerAllBasicPersonalPronounsList: 255 | ALL_BASIC_PRONOUNS = [ 256 | 'mi', 'vi', 'li', 'ŝi', 'ĝi', 'oni', 'ni', 'ili', 257 | 'mia', 'via', 'lia', 'ŝia', 'ĝia', 'onia', 'nia', 'ilia', 258 | 'miaj', 'viaj', 'liaj', 'ŝiaj', 'ĝiaj', 'oniaj', 'niaj', 'iliaj' 259 | ] 260 | 261 | def test_pronouns_not_empty(self): 262 | assert PronounMorphologicalAnalyzer.ALL_PERSONAL_PRONOUNS_REGEXP is not None 263 | 264 | def test_pronouns_list_match_regexp(self): 265 | for word in PronounMorphologicalAnalyzer.PERSONAL_POSSESSIVE_PRONOUNS_LIST: 266 | assert PronounMorphologicalAnalyzer.ALL_PERSONAL_PRONOUNS_REGEXP.match(word) 267 | 268 | def test_pronouns_match_hardcoded_list(self): 269 | for word in self.ALL_BASIC_PRONOUNS: 270 | assert PronounMorphologicalAnalyzer.ALL_PERSONAL_PRONOUNS_REGEXP.match(word) 271 | 272 | def test_pronouns_match_final_regexp_list(self): 273 | for word in self.ALL_BASIC_PRONOUNS: 274 | assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word) 275 | 276 | def test_pronouns_acusative_match_final_regexp_list(self): 277 | for word in self.ALL_BASIC_PRONOUNS: 278 | assert PronounMorphologicalAnalyzer.MATCH_REGEXP.match(word + 'n') 279 | 280 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Esperanto Analyzer 2 | 3 | ---- 4 | 5 | ![Esperanto Flag](https://upload.wikimedia.org/wikipedia/commons/thumb/f/f5/Flag_of_Esperanto.svg/640px-Flag_of_Esperanto.svg.png?1535986891157) 6 | 7 | ## Build Status: 8 | 9 | ### Development: 10 | 11 | [![Build Status](https://travis-ci.com/fidelisrafael/esperanto-analyzer.svg?token=k5uMpn3U564QqWar8oA1&branch=development)](https://travis-ci.com/fidelisrafael/esperanto-analyzer) 12 | 13 | [![codecov](https://codecov.io/gh/fidelisrafael/esperanto-analyzer/branch/development/graph/badge.svg)](https://codecov.io/gh/fidelisrafael/esperanto-analyzer) 14 | 15 | ### Master: 16 | 17 | [![Build Status](https://travis-ci.com/fidelisrafael/esperanto-analyzer.svg?token=k5uMpn3U564QqWar8oA1&branch=master)](https://travis-ci.com/fidelisrafael/esperanto-analyzer) 18 | 19 | [![codecov](https://codecov.io/gh/fidelisrafael/esperanto-analyzer/branch/master/graph/badge.svg)](https://codecov.io/gh/fidelisrafael/esperanto-analyzer) 20 | 21 | --- 22 | 23 | ### Atendu! Kio estas Esperanto? (_Wait! What is Esperanto?_) 24 | 25 | That is a fair question! Esperanto is the most widely spoken constructed international auxiliary language 26 | (_conlang_) in the world. It was created back in **1887** by a polish-jewish guy named "Ludwik Lejzer Zamenhof"_(often refered as L.L Zamenhof)_. Zamenhof's goal was to **create an easy and flexible language** that would serve as a universal second language to foster peace and international understanding of people from all around the world. 27 | 28 | The phonology, grammar, vocabulary, and semantics are based on the **Indo-European**(_Italian_,_Spanish_,_French_, _Catalan_, _Russian_, _German_...) languages spoken in Europe. The sound inventory is essentially **Slavic**, as is much of the semantics, whereas the vocabulary derives primarily from the **Romance languages**, with a lesser contribution from **Germanic languages** and minor contributions from **Slavic languages** and **Greek**. 29 | 30 | The language has more than **130 years of history** and culture now, and a very active community as well. 31 | 32 | Esperanto is a SUPER regular language, this means that the language does not have **irregular verbs** or **gender distinction for articles**, beside this Esperanto has only **16 grammar rules**. 33 | For example, one of the rules: ALL **Nouns** MUST end with the vowel `o`, eg: 34 | 35 | - `domo` 36 | - `homo` 37 | - `komputilo` 38 | - `komputilisto` 39 | 40 | Or **Adjectives** MUST end with the letter `a`, eg: 41 | 42 | - `bela` 43 | - `granda` 44 | - `varma` 45 | - `malvarma` 46 | 47 | If you want to know (or learn) more about Esperanto, you should read the following links: 48 | 49 | - [Esperanto at Wikipedia](https://www.wikiwand.com/en/Esperanto) 50 | - [Kio estas Esperanto? (in Esperanto)](https://lernu.net/eo/esperanto) or in [English](https://lernu.net/es/esperanto) 51 | - Esperanto course at Duolingo for: [[English speakers]](https://www.duolingo.com/course/eo/en/Learn-Esperanto-Online), [[Portuguese speakers]](https://www.duolingo.com/course/eo/pt/Learn-Esperanto-Online), [[Spanish speakers]](https://www.duolingo.com/course/eo/es/Learn-Esperanto-Online) 52 | - [Esperanto course at Lernu.net](http://lernu.net/kurso) 53 | - [Youtube serie: Esperanto estas...](https://www.youtube.com/watch?v=RlftmTm8I18&list=PL83728C14BFC5822F) 54 | 55 | --- 56 | 57 | ## About this project 58 | 59 | The aim of this project is to create one tool that can read and grammarly classify Esperanto sentences. 60 | 61 | The first part of project consists in **Morphological Analyzes** of Esperanto words, the next step is to create a **Syntactical Analyzer** for the language as well. 62 | 63 | --- 64 | 65 | ## How to use it? 66 | 67 | ### Demo 68 | 69 | You can check it out the demo application built with React: [Online Demo](https://fidelisrafael.github.io/esperanto-analyzer-react/) or [Github Repository](https://github.com/fidelisrafael/esperanto-analyzer-react/) 70 | 71 | [![Frontend application](./docs/esperanto_analyzer_screenshot.png)](https://fidelisrafael.github.io/esperanto-analyzer-react/) 72 | 73 | 74 | Or you can try the demo API hosted on Heroku: 75 | 76 | [https://esperanto-analyzer-api.herokuapp.com/analyze?sentence=Esperanto%20estas%20tre%20facila%20lingvo%20al%20lerni]( 77 | https://esperanto-analyzer-api.herokuapp.com/analyze?sentence=Esperanto%20estas%20tre%20facila%20lingvo%20al%20lerni) 78 | 79 | --- 80 | 81 | ## Installation 82 | 83 | First, install it: 84 | 85 | ```bash 86 | $ pip install esperanto-analyzer 87 | ``` 88 | 89 | ## CLI usage: 90 | 91 | [TODO] (Skip it for now) 92 | 93 | Now you will have the libraries source-code files in your system, and also the executable `binary` through CLI, test it: 94 | 95 | ```bash 96 | $ eo-analyzer --version 97 | > Version: 0.0.1 98 | ``` 99 | 100 | 101 | ```sh 102 | $ eo-analyzer "Jen la alfabeto de Esperanto. Ĉiu litero ĉiam sonas same kaj literumado estas perfekte regula. Klaku la ekzemplojn por aŭdi la elparolon!" 103 | ``` 104 | 105 | ![eo-analyzer response](https://i.imgur.com/4hWUcWY.png) 106 | 107 | Pretty cool humn? 108 | 109 | ## Python library usage 110 | 111 | Ok, so now you want to import this library in your project, right? That's super simple, just drop these lines in your project: 112 | 113 | ### Morphological analyzes of sentences 114 | 115 | ```py 116 | from esperanto_analyzer import MorphologicalSentenceAnalyzer 117 | 118 | # Creates one instance to morphologically analyzes one sentence 119 | analyzer = MorphologicalSentenceAnalyzer("Esperanto estas tre facila lingvo al lerni.") 120 | analyzer.analyze() # => Returns True/False 121 | 122 | # This is the simplest human-readable response of the morphological analyzes' results 123 | print(analyzer.simple_results()) 124 | # => [['Esperanto', 'Noun'], ['estas', 'Verb'], ['tre', 'Adverb'], ['facila', 'Adjective'], ['lingvo', 'Noun'], ['al', 'Preposition'], ['lerni', 'Verb']] 125 | 126 | ``` 127 | 128 | But you can always deal with a more complex results set if you (or better, your software) want/need to: 129 | 130 | ```py 131 | # The `#results()` method returns a Array object wirh a more complex structure than `#simple_results()` method 132 | results = analyzer.analyzes_results() 133 | first_analyze = results[0] 134 | 135 | # Returns and Array object with `AnalyzeResult` objects 136 | print(results) 137 | # => [, ,(...)] 138 | 139 | print(first_analyze) 140 | # => 141 | 142 | # Rich and detailed results using `AnalyzeResult` 143 | print(first_analyze.result) 144 | # => 145 | 146 | # Get any information that you might need using the response objects API 147 | print((first_analyze.result.raw_word, first_analyze.result.matches, first_analyze.result.word_class() )) 148 | # => ('Esperanto', , ) 149 | 150 | ``` 151 | --- 152 | 153 | ### Morphological analyze of a single WORD 154 | 155 | You can also use the internal analyzers of **words** if you want so, ex: 156 | 157 | ```py 158 | from esperanto_analyzer.analyzers.morphological import AdjectiveMorphologicalAnalyzer, NumeralMorphologicalAnalyzer 159 | 160 | # There's the total of `10` morphological analyzers, such as `VerbMorphologicalAnalyzer`, `NumeralMorphologicalAnalyzer` 161 | analyzer = AdjectiveMorphologicalAnalyzer('belajn') 162 | # If it returns true, that means that the inputed word is a valid adjective. False otherwise 163 | analyzer.analyze() # => returns True/False 164 | 165 | print(analyzer.matches) 166 | # => 167 | print(analyzer.raw_word) # => 'belajn' 168 | 169 | # The `word` property is one class object that inherits from the `Word` class. 170 | print(analyzer.word) 171 | # => 172 | 173 | # Get the base class name for the detected 'Part of Speech' class 174 | print(analyzer.word.__class__.__name__) # => 'Adjective' 175 | 176 | numeral_analyzer = NumeralMorphologicalAnalyzer('naŭcent') 177 | numeral_analyzer.analyze() # => True 178 | 179 | print(numeral_analyzer.word) 180 | # => 181 | 182 | print(numeral_analyzer.matches) 183 | # => 184 | 185 | ``` 186 | 187 | --- 188 | 189 | ### Parts of Speech: Word, Article, Adverb, Adjective, Verb... 190 | 191 | You can even use the **Parts of Speech**(such as `Article`, `Adverb`, `Pronoun`, `Conjunction`) of the language: 192 | 193 | ```py 194 | # `esperanto_analyzer.speech` is home for all parts-of-speech classes 195 | from esperanto_analyzer.speech import Article 196 | 197 | # Raises an `InvalidArticleError` Exception, since 'lo' is not an Esperanto article 198 | article = Article('lo') 199 | 200 | # 'La' is the ONLY valid article in Esperanto 201 | valid_article = Article('la') 202 | 203 | 204 | # All `esperanto_analyzer.speech` objects inherits from `esperanto_analyzer.speech.word.Word` class 205 | print(valid_article.__class__.__bases__) # => (esperanto_analyzer.speech.word.Word,) 206 | 207 | # La is invariable article, it's the same for plural and singular sentences, ex: 208 | # 'La domo' # The house 209 | # 'La domoj' # The houses 210 | print(valid_article.plural) # => False 211 | 212 | # You can provide some `context` when creating the `Part of Speech` so it can determine if the word should be in plural or singular, eg: 213 | print(Article('la', 'domoj').plural) # => True 214 | 215 | 216 | ``` 217 | 218 | --- 219 | 220 | ## Development Setup 221 | 222 | Clone this repository: 223 | 224 | ```bash 225 | $ git clone https://github.com/fidelisrafael/esperanto-analyzer.git 226 | $ cd esperanto-analyzer 227 | ``` 228 | 229 | Make sure you have `python` >= `3.7.0` and `virtualenv` >= `16.0.0` installed: 230 | 231 | ```bash 232 | $ python --version 233 | > Python 3.7.0 234 | $ virtualenv --version 235 | > 16.0.0 236 | ``` 237 | 238 | Otherwise, [install it](https://virtualenv.pypa.io/en/stable/installation/). 239 | 240 | Then, create one new `virtualenv` and activate it: 241 | 242 | ```bash 243 | $ virtualenv venv 244 | $ source venv/bin/activate 245 | ``` 246 | 247 | Install the dependencies for development and test enviroments: 248 | 249 | ```bash 250 | # If you just want to install the needed dependencies for production, just run: `make init` 251 | $ make init_dev 252 | > pip install -r development_requirements.txt 253 | > pip install -r test_requirements.txt 254 | > pip install -r requirements.txt 255 | ``` 256 | 257 | Run the tests: 258 | 259 | ```bash 260 | $ make test 261 | > pytest tests --cov-config .coveragerc --cov=esperanto_analyzer --cov-report=html 262 | > =============================================================================== test session starts ================================================================================ 263 | > platform darwin -- Python 3.7.0, pytest-3.7.4, py-1.6.0, pluggy-0.7.1 264 | > rootdir: /(...)/esperanto_analyzer, inifile: 265 | > plugins: cov-2.5.1 266 | > collected 492 items 267 | 268 | > (...) 269 | 270 | > ====================================================================== 492 passed, 2 warnings in 2.61 seconds ====================================================================== 271 | ``` 272 | 273 | You can follow the code coverage stats opening: `coverage/index.html` 274 | 275 | ### OBS: This library has **100%** code coverage at the time of this writing! 276 | 277 | --- 278 | 279 | ### Built-in JSON Web API 280 | 281 | **_Note: This web API will be published as a separated package in a near future._** 282 | 283 | This library cames with a very simple HTTP Server built on top of Flask to provide an WEB API interface for integration with others systems. You can run the HTTP server running the following make task in the root folder of the project: 284 | 285 | ```bash 286 | $ make web_api # or simply running: python web/runserver.py 287 | > python esperanto_analyzer/web/runserver.py 288 | > * Serving Flask app "esperanto_analyzer.web.api.server" (lazy loading) 289 | > * Environment: production 290 | > WARNING: Do not use the development server in a production environment. 291 | > Use a production WSGI server instead. 292 | > * Debug mode: on 293 | > * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit) 294 | ``` 295 | 296 | Or you can just run it from inside any python project with: 297 | 298 | ```py 299 | from esperanto_analyzer.web import run_app 300 | 301 | run_app(debug=True, port=9090) 302 | # * Serving Flask app "esperanto_analyzer.web.api.server" (lazy loading) 303 | # * Environment: production 304 | # WARNING: Do not use the development server in a production environment. 305 | # Use a production WSGI server instead. 306 | # * Debug mode: off 307 | # * Running on http://127.0.0.1:9090/ (Press CTRL+C to quit) 308 | 309 | ``` 310 | 311 | This server has auto-reload(or hot-reload) enabled by default, so you don't need to restart the server when you change the source code. 312 | 313 | To test it: 314 | 315 | ```bash 316 | curl http://127.0.0.1:5000/analyze?sentence=Kio%20estas%20Esperanto%3F%20%C4%9Ci%20estas%20lingvo%20tre%20ta%C5%ADga%20por%20internacia%20komunikado. 317 | ``` 318 | 319 | ### HTTP API Deploy 320 | 321 | If you need an API(like [this one](https://esperanto-analyzer-api.herokuapp.com/analyze?sentence=Esperanto%20estas%20tre%20facila%20lingvo%20al%20lerni)) you can just easily deploy this project to `Heroku` since it comes with a `Procfile` file, this will take no more than 4 commands: 322 | 323 | OBS: You will need [Heroku's CLI](https://devcenter.heroku.com/articles/heroku-cli) for this. 324 | 325 | ```bash 326 | $ git clone https://github.com/fidelisrafael/esperanto-analyzer.git 327 | $ cd esperanto-analyzer 328 | $ heroku create my-esperanto-analyzer 329 | > Creating ⬢ my-analyzer-test... done 330 | $ git push heroku master:master 331 | # Open https://my-esperanto-analyzer.herokuapp.com/analyze?sentence=Kiel%20%vi%fartas 332 | $ heroku open '/analyze?sentence=Kiel%20vi%20fartas?' 333 | ``` 334 | 335 | --- 336 | 337 | ## How it works? 338 | 339 | This library can be used in a miriad of ways to analyze Esperanto sentences and words, for a complete reference of the API and all the possibilities you should check the 'Full API' section. 340 | 341 | [TODO] 342 | 343 | --- 344 | 345 | 346 | ## :calendar: Roadmap 347 | 348 | - :white_medium_small_square: Create syntactical analyzers 349 | - :white_medium_small_square: Update this Roadmap with more plans 350 | - :white_check_mark: Front-end application. (Done, [check it out](https://fidelisrafael.github.io/esperanto-analyzer-react/)) 351 | 352 | 353 | --- 354 | 355 | ## :thumbsup: Contributing 356 | 357 | Bug reports and pull requests are welcome on GitHub at http://github.com/fidelisrafael/esperanto-analyzer. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](contributor-covenant.org) code of conduct. 358 | 359 | --- 360 | 361 | ## :memo: License 362 | 363 | The library is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT). 364 | --------------------------------------------------------------------------------