├── .gitignore
├── .pylintrc
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── Pipfile
├── Pipfile.lock
├── README.md
├── README.rst
├── documentation
    └── docs
    │   ├── Makefile
    │   ├── api
    │       ├── lexnlp.config.en.rst
    │       ├── lexnlp.config.rst
    │       ├── lexnlp.extract.all_locales.rst
    │       ├── lexnlp.extract.all_locales.tests.rst
    │       ├── lexnlp.extract.common.annotations.rst
    │       ├── lexnlp.extract.common.copyrights.rst
    │       ├── lexnlp.extract.common.date_parsing.rst
    │       ├── lexnlp.extract.common.definitions.rst
    │       ├── lexnlp.extract.common.durations.rst
    │       ├── lexnlp.extract.common.entities.rst
    │       ├── lexnlp.extract.common.ocr_rating.rst
    │       ├── lexnlp.extract.common.rst
    │       ├── lexnlp.extract.common.tests.rst
    │       ├── lexnlp.extract.de.rst
    │       ├── lexnlp.extract.de.tests.rst
    │       ├── lexnlp.extract.en.addresses.rst
    │       ├── lexnlp.extract.en.addresses.tests.rst
    │       ├── lexnlp.extract.en.contracts.rst
    │       ├── lexnlp.extract.en.contracts.tests.rst
    │       ├── lexnlp.extract.en.entities.rst
    │       ├── lexnlp.extract.en.entities.tests.rst
    │       ├── lexnlp.extract.en.preprocessing.rst
    │       ├── lexnlp.extract.en.rst
    │       ├── lexnlp.extract.en.tests.rst
    │       ├── lexnlp.extract.es.rst
    │       ├── lexnlp.extract.es.tests.rst
    │       ├── lexnlp.extract.ml.classifier.rst
    │       ├── lexnlp.extract.ml.detector.rst
    │       ├── lexnlp.extract.ml.detector.tests.rst
    │       ├── lexnlp.extract.ml.en.definitions.rst
    │       ├── lexnlp.extract.ml.en.definitions.tests.rst
    │       ├── lexnlp.extract.ml.en.rst
    │       ├── lexnlp.extract.ml.rst
    │       ├── lexnlp.extract.rst
    │       ├── lexnlp.ml.catalog.rst
    │       ├── lexnlp.ml.rst
    │       ├── lexnlp.nlp.en.rst
    │       ├── lexnlp.nlp.en.segments.rst
    │       ├── lexnlp.nlp.en.tests.rst
    │       ├── lexnlp.nlp.en.transforms.rst
    │       ├── lexnlp.nlp.rst
    │       ├── lexnlp.nlp.train.en.rst
    │       ├── lexnlp.nlp.train.en.tests.rst
    │       ├── lexnlp.nlp.train.rst
    │       ├── lexnlp.rst
    │       ├── lexnlp.tests.rst
    │       ├── lexnlp.utils.lines_processing.rst
    │       ├── lexnlp.utils.rst
    │       ├── lexnlp.utils.tests.rst
    │       ├── lexnlp.utils.unicode.rst
    │       ├── lexnlp.utils.unicode.tests.rst
    │       └── modules.rst
    │   ├── make.bat
    │   ├── requirements.txt
    │   └── source
    │       ├── _static
    │           ├── css
    │           │   └── custom_styles.css
    │           └── img
    │           │   └── lexnlp_logo.png
    │       ├── about.rst
    │       ├── api
    │           ├── lexnlp.config.en.rst
    │           ├── lexnlp.config.rst
    │           ├── lexnlp.extract.common.annotations.rst
    │           ├── lexnlp.extract.common.copyrights.rst
    │           ├── lexnlp.extract.common.date_parsing.rst
    │           ├── lexnlp.extract.common.definitions.rst
    │           ├── lexnlp.extract.common.durations.rst
    │           ├── lexnlp.extract.common.rst
    │           ├── lexnlp.extract.common.tests.rst
    │           ├── lexnlp.extract.de.rst
    │           ├── lexnlp.extract.de.tests.rst
    │           ├── lexnlp.extract.en.addresses.rst
    │           ├── lexnlp.extract.en.addresses.tests.rst
    │           ├── lexnlp.extract.en.amounts.get_amounts.rst
    │           ├── lexnlp.extract.en.amounts.get_np.rst
    │           ├── lexnlp.extract.en.amounts.text2num.rst
    │           ├── lexnlp.extract.en.citations.get_citations.rst
    │           ├── lexnlp.extract.en.conditions.create_condition_pattern.rst
    │           ├── lexnlp.extract.en.conditions.get_conditions.rst
    │           ├── lexnlp.extract.en.constraints.create_constraint_pattern.rst
    │           ├── lexnlp.extract.en.constraints.get_constraints.rst
    │           ├── lexnlp.extract.en.contracts.rst
    │           ├── lexnlp.extract.en.contracts.tests.rst
    │           ├── lexnlp.extract.en.copyright.CopyrightNPExtractor.rst
    │           ├── lexnlp.extract.en.copyright.get_copyright.rst
    │           ├── lexnlp.extract.en.dates.build_date_model.rst
    │           ├── lexnlp.extract.en.dates.get_date_features.rst
    │           ├── lexnlp.extract.en.dates.get_dates.rst
    │           ├── lexnlp.extract.en.dates.get_dates_list.rst
    │           ├── lexnlp.extract.en.dates.get_raw_date_list.rst
    │           ├── lexnlp.extract.en.dates.get_raw_dates.rst
    │           ├── lexnlp.extract.en.dates.train_default_model.rst
    │           ├── lexnlp.extract.en.definitions.get_definitions.rst
    │           ├── lexnlp.extract.en.dict_entities.SearchResultPosition.rst
    │           ├── lexnlp.extract.en.dict_entities.add_alias_to_entity.rst
    │           ├── lexnlp.extract.en.dict_entities.add_aliases_to_entity.rst
    │           ├── lexnlp.extract.en.dict_entities.alias_is_blacklisted.rst
    │           ├── lexnlp.extract.en.dict_entities.conflicts_take_first_by_id.rst
    │           ├── lexnlp.extract.en.dict_entities.conflicts_top_by_priority.rst
    │           ├── lexnlp.extract.en.dict_entities.entity_alias.rst
    │           ├── lexnlp.extract.en.dict_entities.entity_config.rst
    │           ├── lexnlp.extract.en.dict_entities.find_dict_entities.rst
    │           ├── lexnlp.extract.en.dict_entities.get_alias_id.rst
    │           ├── lexnlp.extract.en.dict_entities.get_alias_text.rst
    │           ├── lexnlp.extract.en.dict_entities.get_entity_aliases.rst
    │           ├── lexnlp.extract.en.dict_entities.get_entity_id.rst
    │           ├── lexnlp.extract.en.dict_entities.get_entity_name.rst
    │           ├── lexnlp.extract.en.dict_entities.get_entity_priority.rst
    │           ├── lexnlp.extract.en.dict_entities.normalize_text.rst
    │           ├── lexnlp.extract.en.dict_entities.prepare_alias_blacklist_dict.rst
    │           ├── lexnlp.extract.en.distances.get_distances.rst
    │           ├── lexnlp.extract.en.durations.get_durations.rst
    │           ├── lexnlp.extract.en.entities.rst
    │           ├── lexnlp.extract.en.entities.tests.rst
    │           ├── lexnlp.extract.en.geoentities.get_geoentities.rst
    │           ├── lexnlp.extract.en.money.get_money.rst
    │           ├── lexnlp.extract.en.percents.get_percents.rst
    │           ├── lexnlp.extract.en.pii.get_pii.rst
    │           ├── lexnlp.extract.en.pii.get_ssns.rst
    │           ├── lexnlp.extract.en.pii.get_us_phones.rst
    │           ├── lexnlp.extract.en.preprocessing.rst
    │           ├── lexnlp.extract.en.ratios.get_ratios.rst
    │           ├── lexnlp.extract.en.regulations.get_regulations.rst
    │           ├── lexnlp.extract.en.rst
    │           ├── lexnlp.extract.en.tests.rst
    │           ├── lexnlp.extract.en.trademarks.get_trademarks.rst
    │           ├── lexnlp.extract.en.urls.get_urls.rst
    │           ├── lexnlp.extract.en.utils.NPExtractor.rst
    │           ├── lexnlp.extract.en.utils.strip_unicode_punctuation.rst
    │           ├── lexnlp.extract.es.rst
    │           ├── lexnlp.extract.es.tests.rst
    │           ├── lexnlp.extract.ml.classifier.rst
    │           ├── lexnlp.extract.ml.detector.rst
    │           ├── lexnlp.extract.ml.detector.tests.rst
    │           ├── lexnlp.extract.ml.en.definitions.rst
    │           ├── lexnlp.extract.ml.en.definitions.tests.rst
    │           ├── lexnlp.extract.ml.en.rst
    │           ├── lexnlp.extract.ml.rst
    │           ├── lexnlp.extract.rst
    │           ├── lexnlp.nlp.en.rst
    │           ├── lexnlp.nlp.en.segments.pages.MODULE_PATH.rst
    │           ├── lexnlp.nlp.en.segments.pages.PAGE_SEGMENTER_MODEL.rst
    │           ├── lexnlp.nlp.en.segments.pages.build_page_break_features.rst
    │           ├── lexnlp.nlp.en.segments.pages.get_pages.rst
    │           ├── lexnlp.nlp.en.segments.paragraphs.MODULE_PATH.rst
    │           ├── lexnlp.nlp.en.segments.paragraphs.Optional.rst
    │           ├── lexnlp.nlp.en.segments.paragraphs.PARAGRAPH_SEGMENTER_MODEL.rst
    │           ├── lexnlp.nlp.en.segments.paragraphs.RE_NEW_LINE.rst
    │           ├── lexnlp.nlp.en.segments.paragraphs.Union.rst
    │           ├── lexnlp.nlp.en.segments.paragraphs.build_paragraph_break_features.rst
    │           ├── lexnlp.nlp.en.segments.paragraphs.get_paragraphs.rst
    │           ├── lexnlp.nlp.en.segments.paragraphs.splitlines_with_spans.rst
    │           ├── lexnlp.nlp.en.segments.rst
    │           ├── lexnlp.nlp.en.segments.sections.MODULE_PATH.rst
    │           ├── lexnlp.nlp.en.segments.sections.SECTION_SEGMENTER_MODEL.rst
    │           ├── lexnlp.nlp.en.segments.sections.build_section_break_features.rst
    │           ├── lexnlp.nlp.en.segments.sections.get_sections.rst
    │           ├── lexnlp.nlp.en.segments.sentences.Any.rst
    │           ├── lexnlp.nlp.en.segments.sentences.MODULE_PATH.rst
    │           ├── lexnlp.nlp.en.segments.sentences.PRE_PROCESS_TEXT_REMOVE.rst
    │           ├── lexnlp.nlp.en.segments.sentences.SENTENCE_SEGMENTER_MODEL.rst
    │           ├── lexnlp.nlp.en.segments.sentences.SENTENCE_SPLITTERS.rst
    │           ├── lexnlp.nlp.en.segments.sentences.SENTENCE_SPLITTERS_LOWER_EXCLUDE.rst
    │           ├── lexnlp.nlp.en.segments.sentences.STRIP_GROUP.rst
    │           ├── lexnlp.nlp.en.segments.sentences.Union.rst
    │           ├── lexnlp.nlp.en.segments.sentences.build_sentence_model.rst
    │           ├── lexnlp.nlp.en.segments.sentences.extra_abbreviations.rst
    │           ├── lexnlp.nlp.en.segments.sentences.get_sentence__with_coords_list.rst
    │           ├── lexnlp.nlp.en.segments.sentences.get_sentence_list.rst
    │           ├── lexnlp.nlp.en.segments.sentences.get_sentence_span.rst
    │           ├── lexnlp.nlp.en.segments.sentences.get_sentence_span_list.rst
    │           ├── lexnlp.nlp.en.segments.sentences.post_process_sentence.rst
    │           ├── lexnlp.nlp.en.segments.sentences.pre_process_document.rst
    │           ├── lexnlp.nlp.en.segments.titles.MODULE_PATH.rst
    │           ├── lexnlp.nlp.en.segments.titles.SECTION_SEGMENTER_MODEL.rst
    │           ├── lexnlp.nlp.en.segments.titles.UNICODE_CHAR_TOP_CATEGORY_MAPPING.rst
    │           ├── lexnlp.nlp.en.segments.titles.build_document_title_features.rst
    │           ├── lexnlp.nlp.en.segments.titles.build_model.rst
    │           ├── lexnlp.nlp.en.segments.titles.build_title_features.rst
    │           ├── lexnlp.nlp.en.segments.titles.get_titles.rst
    │           ├── lexnlp.nlp.en.segments.utils.build_document_distribution.rst
    │           ├── lexnlp.nlp.en.segments.utils.build_document_line_distribution.rst
    │           ├── lexnlp.nlp.en.tests.rst
    │           ├── lexnlp.nlp.en.tokens.BIGRAM_COLLOCATIONS.rst
    │           ├── lexnlp.nlp.en.tokens.COLLOCATION_SIZE.rst
    │           ├── lexnlp.nlp.en.tokens.DEFAULT_LEMMATIZER.rst
    │           ├── lexnlp.nlp.en.tokens.DEFAULT_STEMMER.rst
    │           ├── lexnlp.nlp.en.tokens.MODULE_PATH.rst
    │           ├── lexnlp.nlp.en.tokens.STOPWORDS.rst
    │           ├── lexnlp.nlp.en.tokens.TRIGRAM_COLLOCATIONS.rst
    │           ├── lexnlp.nlp.en.tokens.get_adjectives.rst
    │           ├── lexnlp.nlp.en.tokens.get_adverbs.rst
    │           ├── lexnlp.nlp.en.tokens.get_lemma_list.rst
    │           ├── lexnlp.nlp.en.tokens.get_lemmas.rst
    │           ├── lexnlp.nlp.en.tokens.get_nouns.rst
    │           ├── lexnlp.nlp.en.tokens.get_stem_list.rst
    │           ├── lexnlp.nlp.en.tokens.get_stems.rst
    │           ├── lexnlp.nlp.en.tokens.get_token_list.rst
    │           ├── lexnlp.nlp.en.tokens.get_tokens.rst
    │           ├── lexnlp.nlp.en.tokens.get_verbs.rst
    │           ├── lexnlp.nlp.en.tokens.get_wordnet_pos.rst
    │           ├── lexnlp.nlp.en.transforms.characters.MODULE_PATH.rst
    │           ├── lexnlp.nlp.en.transforms.characters.get_character_distribution.rst
    │           ├── lexnlp.nlp.en.transforms.characters.get_character_ngram_distribution.rst
    │           ├── lexnlp.nlp.en.transforms.rst
    │           ├── lexnlp.nlp.en.transforms.tokens.MODULE_PATH.rst
    │           ├── lexnlp.nlp.en.transforms.tokens.get_bigram_distribution.rst
    │           ├── lexnlp.nlp.en.transforms.tokens.get_ngram_distribution.rst
    │           ├── lexnlp.nlp.en.transforms.tokens.get_skipgram_distribution.rst
    │           ├── lexnlp.nlp.en.transforms.tokens.get_token_distribution.rst
    │           ├── lexnlp.nlp.en.transforms.tokens.get_trigram_distribution.rst
    │           ├── lexnlp.nlp.rst
    │           ├── lexnlp.rst
    │           ├── lexnlp.tests.rst
    │           ├── lexnlp.utils.lines_processing.rst
    │           ├── lexnlp.utils.rst
    │           ├── lexnlp.utils.tests.rst
    │           ├── lexnlp.utils.unicode.rst
    │           ├── lexnlp.utils.unicode.tests.rst
    │           ├── lexnlpprivate.extract.en.addresses.rst
    │           ├── lexnlpprivate.extract.en.addresses.tests.rst
    │           ├── lexnlpprivate.extract.en.rst
    │           ├── lexnlpprivate.extract.rst
    │           ├── lexnlpprivate.rst
    │           ├── modules.rst
    │           └── setup.rst
    │       ├── changes.rst
    │       ├── conf.py
    │       ├── index.rst
    │       ├── lexnlp.rst
    │       ├── license.rst
    │       └── modules
    │           ├── extract
    │               ├── de
    │               │   ├── amounts.rst
    │               │   ├── citations.rst
    │               │   ├── dates.rst
    │               │   ├── durations.rst
    │               │   └── percents.rst
    │               ├── en
    │               │   ├── acts.rst
    │               │   ├── amounts.rst
    │               │   ├── citations.rst
    │               │   ├── companies.rst
    │               │   ├── conditions.rst
    │               │   ├── constraints.rst
    │               │   ├── copyright.rst
    │               │   ├── courts.rst
    │               │   ├── cusip.rst
    │               │   ├── dates.rst
    │               │   ├── definitions.rst
    │               │   ├── distances.rst
    │               │   ├── durations.rst
    │               │   ├── geoentities.rst
    │               │   ├── money.rst
    │               │   ├── percents.rst
    │               │   ├── pii.rst
    │               │   ├── ratios.rst
    │               │   ├── regulations.rst
    │               │   ├── trademarks.rst
    │               │   └── urls.rst
    │               ├── es
    │               │   └── dates.rst
    │               └── extract.rst
    │           └── nlp
    │               ├── en
    │                   ├── segments_pages.rst
    │                   ├── segments_paragraphs.rst
    │                   ├── segments_sections.rst
    │                   ├── segments_sentences.rst
    │                   ├── segments_titles.rst
    │                   ├── segments_utils.rst
    │                   ├── tokens.rst
    │                   ├── transforms_character.rst
    │                   └── transforms_tokens.rst
    │               └── nlp.rst
├── index.rst
├── lexnlp
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── de
    │   │   └── de_courts.csv
    │   ├── en
    │   │   ├── __init__.py
    │   │   ├── au_courts.csv
    │   │   ├── ca_courts.csv
    │   │   ├── company_types.csv
    │   │   ├── company_types.py
    │   │   ├── geoentities_config.py
    │   │   ├── us_courts.csv
    │   │   └── us_state_courts.csv
    │   ├── es
    │   │   ├── es_courts.csv
    │   │   └── es_regulations.csv
    │   └── stanford.py
    ├── extract
    │   ├── __init__.py
    │   ├── all_locales
    │   │   ├── __init__.py
    │   │   ├── amounts.py
    │   │   ├── citations.py
    │   │   ├── copyrights.py
    │   │   ├── court_citations.py
    │   │   ├── courts.py
    │   │   ├── dates.py
    │   │   ├── definitions.py
    │   │   ├── durations.py
    │   │   ├── geoentities.py
    │   │   ├── languages.py
    │   │   ├── money.py
    │   │   ├── percents.py
    │   │   └── tests
    │   │   │   ├── __init__.py
    │   │   │   └── test_locales.py
    │   ├── common
    │   │   ├── __init__.py
    │   │   ├── annotation_locator_type.py
    │   │   ├── annotation_type.py
    │   │   ├── annotations
    │   │   │   ├── __init__.py
    │   │   │   ├── act_annotation.py
    │   │   │   ├── address_annotation.py
    │   │   │   ├── amount_annotation.py
    │   │   │   ├── citation_annotation.py
    │   │   │   ├── company_annotation.py
    │   │   │   ├── condition_annotation.py
    │   │   │   ├── constraint_annotation.py
    │   │   │   ├── copyright_annotation.py
    │   │   │   ├── court_annotation.py
    │   │   │   ├── court_citation_annotation.py
    │   │   │   ├── cusip_annotation.py
    │   │   │   ├── date_annotation.py
    │   │   │   ├── definition_annotation.py
    │   │   │   ├── distance_annotation.py
    │   │   │   ├── duration_annotation.py
    │   │   │   ├── geo_annotation.py
    │   │   │   ├── law_annotation.py
    │   │   │   ├── money_annotation.py
    │   │   │   ├── percent_annotation.py
    │   │   │   ├── phone_annotation.py
    │   │   │   ├── phrase_position_finder.py
    │   │   │   ├── ratio_annotation.py
    │   │   │   ├── regulation_annotation.py
    │   │   │   ├── ssn_annotation.py
    │   │   │   ├── text_annotation.py
    │   │   │   ├── trademark_annotation.py
    │   │   │   └── url_annotation.py
    │   │   ├── base_path.py
    │   │   ├── copyrights
    │   │   │   ├── __init__.py
    │   │   │   ├── copyright_en_style_parser.py
    │   │   │   ├── copyright_parser.py
    │   │   │   ├── copyright_parsing_methods.py
    │   │   │   └── copyright_pattern_found.py
    │   │   ├── date_parsing
    │   │   │   ├── __init__.py
    │   │   │   └── datefinder.py
    │   │   ├── dates.py
    │   │   ├── dates_classifier_model.py
    │   │   ├── definitions
    │   │   │   ├── __init__.py
    │   │   │   ├── common_definition_patterns.py
    │   │   │   ├── definition_match.py
    │   │   │   └── universal_definition_parser.py
    │   │   ├── durations
    │   │   │   ├── __init__.py
    │   │   │   └── durations_parser.py
    │   │   ├── entities
    │   │   │   ├── __init__.py
    │   │   │   └── entity_banlist.py
    │   │   ├── fact_extracting.py
    │   │   ├── geoentity_detector.py
    │   │   ├── language_dictionary_reader.py
    │   │   ├── money_detector.py
    │   │   ├── ocr_rating
    │   │   │   ├── __init__.py
    │   │   │   ├── lang_vector_distribution_builder.py
    │   │   │   ├── ocr_rating_calculator.py
    │   │   │   └── reference_vectors
    │   │   │   │   ├── de.pickle
    │   │   │   │   └── en.pickle
    │   │   ├── pattern_found.py
    │   │   ├── special_characters.py
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── definitions_text_annotator.py
    │   │   │   ├── test_annotation.py
    │   │   │   ├── test_date_classifier_model.py
    │   │   │   ├── test_datefinder.py
    │   │   │   ├── test_datefinder_tokenizer.py
    │   │   │   ├── test_entity_banlist.py
    │   │   │   ├── test_fact_extractor.py
    │   │   │   ├── test_lang_vector_distribution_builder.py
    │   │   │   ├── test_ocr_rating.py
    │   │   │   ├── test_phrase_position_finder.py
    │   │   │   ├── test_text_beautifier.py
    │   │   │   └── test_universal_courts_parser.py
    │   │   ├── text_beautifier.py
    │   │   ├── text_pattern_collector.py
    │   │   ├── universal_court_parser.py
    │   │   └── year_parser.py
    │   ├── de
    │   │   ├── __init__.py
    │   │   ├── amounts.py
    │   │   ├── citations.py
    │   │   ├── copyrights.py
    │   │   ├── court_citations.py
    │   │   ├── courts.py
    │   │   ├── data
    │   │   │   └── abbreviations.txt
    │   │   ├── date_model.pickle
    │   │   ├── date_model.py
    │   │   ├── dates.py
    │   │   ├── dates_de_classifier.py
    │   │   ├── de_date_parser.py
    │   │   ├── definitions.py
    │   │   ├── durations.py
    │   │   ├── geoentities.py
    │   │   ├── language_tokens.py
    │   │   ├── laws.py
    │   │   ├── model.pickle
    │   │   ├── money.py
    │   │   ├── percents.py
    │   │   └── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── test_amounts.py
    │   │   │   ├── test_citations.py
    │   │   │   ├── test_copyrights.py
    │   │   │   ├── test_court_citations.py
    │   │   │   ├── test_courts.py
    │   │   │   ├── test_dates.py
    │   │   │   ├── test_definitions.py
    │   │   │   ├── test_durations.py
    │   │   │   ├── test_geoentities.py
    │   │   │   ├── test_laws.py
    │   │   │   ├── test_money.py
    │   │   │   └── test_percents.py
    │   ├── en
    │   │   ├── __init__.py
    │   │   ├── acts.py
    │   │   ├── addresses
    │   │   │   ├── __init__.py
    │   │   │   ├── address_features.py
    │   │   │   ├── addresses.py
    │   │   │   ├── addresses_clf.pickle
    │   │   │   ├── data
    │   │   │   │   ├── building_suffixes.csv
    │   │   │   │   ├── city_name_words.pickle
    │   │   │   │   ├── nltk_pos_tag_indexes.json
    │   │   │   │   ├── provinces.txt
    │   │   │   │   ├── street_directions.csv
    │   │   │   │   └── street_suffixes.csv
    │   │   │   └── tests
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_addresses.py
    │   │   ├── amounts.py
    │   │   ├── citations.py
    │   │   ├── conditions.py
    │   │   ├── constraints.py
    │   │   ├── contracts
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── contract_type_detector.py
    │   │   │   ├── predictors.py
    │   │   │   └── tests
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── test_contract_type.py
    │   │   │   │   └── test_contracts.py
    │   │   ├── copyright.py
    │   │   ├── courts.py
    │   │   ├── cusip.py
    │   │   ├── data
    │   │   │   ├── abbreviations.txt
    │   │   │   ├── en_company_banlist.csv
    │   │   │   └── pronouns.txt
    │   │   ├── date_model.pickle
    │   │   ├── date_model.py
    │   │   ├── dates.py
    │   │   ├── definition_parsing_methods.py
    │   │   ├── definitions.py
    │   │   ├── dict_entities.py
    │   │   ├── distances.py
    │   │   ├── durations.py
    │   │   ├── en_language_tokens.py
    │   │   ├── entities
    │   │   │   ├── __init__.py
    │   │   │   ├── company_detector.py
    │   │   │   ├── company_np_extractor.py
    │   │   │   ├── nltk_maxent.py
    │   │   │   ├── nltk_re.py
    │   │   │   ├── nltk_tokenizer.py
    │   │   │   ├── stanford_ner.py
    │   │   │   └── tests
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── test_get_companies.py
    │   │   │   │   ├── test_nltk_maxent.py
    │   │   │   │   └── test_stanford_ner.py
    │   │   ├── geoentities.py
    │   │   ├── introductory_words_detector.py
    │   │   ├── money.py
    │   │   ├── percents.py
    │   │   ├── pii.py
    │   │   ├── preprocessing
    │   │   │   ├── __init__.py
    │   │   │   └── span_tokenizer.py
    │   │   ├── ratios.py
    │   │   ├── regulations.py
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── test_acts.py
    │   │   │   ├── test_amounts.py
    │   │   │   ├── test_amounts_plain.py
    │   │   │   ├── test_citations.py
    │   │   │   ├── test_citations_plain.py
    │   │   │   ├── test_conditions.py
    │   │   │   ├── test_conditions_plain.py
    │   │   │   ├── test_constraints.py
    │   │   │   ├── test_constraints_plain.py
    │   │   │   ├── test_copyright.py
    │   │   │   ├── test_copyright_plain.py
    │   │   │   ├── test_courts.py
    │   │   │   ├── test_courts_plain.py
    │   │   │   ├── test_cusip.py
    │   │   │   ├── test_dates.py
    │   │   │   ├── test_dates_plain.py
    │   │   │   ├── test_definitions.py
    │   │   │   ├── test_definitions_template.py
    │   │   │   ├── test_dict_entities.py
    │   │   │   ├── test_distance.py
    │   │   │   ├── test_distances_plain.py
    │   │   │   ├── test_durations.py
    │   │   │   ├── test_durations_plain.py
    │   │   │   ├── test_geoentities.py
    │   │   │   ├── test_geoentities_plain.py
    │   │   │   ├── test_introductory_words_detector.py
    │   │   │   ├── test_money.py
    │   │   │   ├── test_money_plain.py
    │   │   │   ├── test_parsing_speed.py
    │   │   │   ├── test_percent_plain.py
    │   │   │   ├── test_percents.py
    │   │   │   ├── test_phone_plain.py
    │   │   │   ├── test_pii.py
    │   │   │   ├── test_ratios.py
    │   │   │   ├── test_ratios_plain.py
    │   │   │   ├── test_regulations.py
    │   │   │   ├── test_regulations_plain.py
    │   │   │   ├── test_span_tokenizer.py
    │   │   │   ├── test_ssn_plain.py
    │   │   │   ├── test_trademarks.py
    │   │   │   ├── test_trademarks_plain.py
    │   │   │   ├── test_urls.py
    │   │   │   └── test_urls_plain.py
    │   │   ├── trademarks.py
    │   │   ├── urls.py
    │   │   └── utils.py
    │   ├── es
    │   │   ├── __init__.py
    │   │   ├── copyrights.py
    │   │   ├── courts.py
    │   │   ├── dates.py
    │   │   ├── definitions.py
    │   │   ├── language_tokens.py
    │   │   ├── regulations.py
    │   │   └── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── test_copyrights.py
    │   │   │   ├── test_courts.py
    │   │   │   ├── test_dates.py
    │   │   │   ├── test_definitions.py
    │   │   │   └── test_regulations.py
    │   └── ml
    │   │   ├── __init__.py
    │   │   ├── classifier
    │   │       ├── __init__.py
    │   │       ├── base_token_sequence_classifier_model.py
    │   │       ├── data
    │   │       │   ├── unicode_character_categories.pickle
    │   │       │   ├── unicode_character_category_mapping.pickle
    │   │       │   └── unicode_character_top_category_mapping.pickle
    │   │       ├── spacy_token_sequence_model.py
    │   │       └── token_sequence_model.py
    │   │   ├── detector
    │   │       ├── __init__.py
    │   │       ├── artifact_detector.py
    │   │       ├── detecting_settings.py
    │   │       ├── phrase_constructor.py
    │   │       ├── sample_processor.py
    │   │       └── tests
    │   │       │   ├── __init__.py
    │   │       │   └── test_phrase_constructor.py
    │   │   ├── en
    │   │       ├── __init__.py
    │   │       ├── data
    │   │       │   └── definition_model_layered.pickle.gzip
    │   │       └── definitions
    │   │       │   ├── __init__.py
    │   │       │   ├── definition_phrase_detector.py
    │   │       │   ├── definition_term_detector.py
    │   │       │   ├── layered_definition_detector.py
    │   │       │   └── tests
    │   │       │       ├── __init__.py
    │   │       │       └── test_layered_definition_detector.py
    │   │   └── environment.py
    ├── ml
    │   ├── README.md
    │   ├── __init__.py
    │   ├── catalog
    │   │   ├── __init__.py
    │   │   └── download.py
    │   ├── gensim_utils.py
    │   ├── normalizers.py
    │   ├── predictor.py
    │   ├── sklearn_transformers.py
    │   └── vectorizers.py
    ├── nlp
    │   ├── __init__.py
    │   ├── en
    │   │   ├── __init__.py
    │   │   ├── collocation_bigrams_100.pickle
    │   │   ├── collocation_bigrams_1000.pickle
    │   │   ├── collocation_bigrams_10000.pickle
    │   │   ├── collocation_bigrams_100000.pickle
    │   │   ├── collocation_bigrams_50000.pickle
    │   │   ├── collocation_trigrams_100.pickle
    │   │   ├── collocation_trigrams_1000.pickle
    │   │   ├── collocation_trigrams_10000.pickle
    │   │   ├── collocation_trigrams_100000.pickle
    │   │   ├── collocation_trigrams_50000.pickle
    │   │   ├── segments
    │   │   │   ├── __init__.py
    │   │   │   ├── heading_heuristics.py
    │   │   │   ├── page_segmenter.pickle
    │   │   │   ├── pages.py
    │   │   │   ├── paragraph_segmenter.pickle
    │   │   │   ├── paragraphs.py
    │   │   │   ├── section_segmenter.pickle
    │   │   │   ├── sections.py
    │   │   │   ├── sentence_segmenter.pickle
    │   │   │   ├── sentences.py
    │   │   │   ├── title_locator.pickle
    │   │   │   ├── titles.py
    │   │   │   └── utils.py
    │   │   ├── stanford.py
    │   │   ├── stopwords.pickle
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── test_heading_heuristics.py
    │   │   │   ├── test_pages.py
    │   │   │   ├── test_paragraphs.py
    │   │   │   ├── test_sections.py
    │   │   │   ├── test_sentences.py
    │   │   │   ├── test_stanford.py
    │   │   │   ├── test_stanford_missing.py
    │   │   │   ├── test_titles.py
    │   │   │   ├── test_tokens.py
    │   │   │   └── test_transforms.py
    │   │   ├── tokens.py
    │   │   └── transforms
    │   │   │   ├── __init__.py
    │   │   │   ├── characters.py
    │   │   │   └── tokens.py
    │   └── train
    │   │   ├── __init__.py
    │   │   ├── en
    │   │       ├── __init__.py
    │   │       ├── tests
    │   │       │   ├── __init__.py
    │   │       │   └── test_train_section_segmentizer.py
    │   │       └── train_section_segmanizer.py
    │   │   └── train_data_manager.py
    ├── tests
    │   ├── __init__.py
    │   ├── dictionary_comparer.py
    │   ├── lexnlp_tests.py
    │   ├── tests
    │   │   ├── test_lexnlp_tests.py
    │   │   └── test_upload_benchmarks.py
    │   ├── typed_annotations_tests.py
    │   ├── upload_benchmarks.py
    │   ├── utility_for_testing.py
    │   └── values_comparer.py
    └── utils
    │   ├── __init__.py
    │   ├── amount_delimiting.py
    │   ├── decorators.py
    │   ├── iterating_helpers.py
    │   ├── lines_processing
    │       ├── __init__.py
    │       ├── line_processor.py
    │       ├── parsed_text_corrector.py
    │       ├── parsed_text_quality_estimator.py
    │       └── phrase_finder.py
    │   ├── map.py
    │   ├── parse_df.py
    │   ├── pos_adjustments.py
    │   ├── tests
    │       ├── __init__.py
    │       ├── test_line_processor.py
    │       ├── test_map.py
    │       ├── test_parse_df.py
    │       ├── test_parsed_text_corrector.py
    │       ├── test_parsed_text_quality_estimator.py
    │       └── test_phrase_finder.py
    │   ├── unicode
    │       ├── __init__.py
    │       ├── tests
    │       │   ├── __init__.py
    │       │   └── test_unicode_lookup.py
    │       ├── unicode_character_categories.pickle
    │       ├── unicode_character_category_mapping.pickle
    │       ├── unicode_character_top_category_mapping.pickle
    │       └── unicode_lookup.py
    │   └── unpickler.py
├── libs
    ├── download_stanford_nlp.sh
    └── download_wiki.sh
├── notebooks
    ├── classification
    │   ├── contract-type-classifier
    │   │   ├── 0_download_corpora.ipynb
    │   │   ├── 1_preprocess_training_data.ipynb
    │   │   ├── 2_train_gensim_doc2vec_model.ipynb
    │   │   └── 3_train_sklearn_is_contract_classifier.ipynb
    │   └── is-contract-classifier
    │   │   ├── 0_download_corpora.ipynb
    │   │   ├── 1_preprocess_training_data.ipynb
    │   │   ├── 2_train_gensim_doc2vec_model.ipynb
    │   │   └── 3_train_sklearn_is_contract_classifier.ipynb
    ├── embeddings
    │   ├── 10k
    │   │   ├── build_word2vec_model.ipynb
    │   │   ├── build_word2vec_model_spacy.ipynb
    │   │   ├── build_word2vec_model_spacy.py
    │   │   └── test_w2v.ipynb
    │   └── contracts
    │   │   ├── build_doc2vec_model_all.py
    │   │   ├── build_word2vec_model.ipynb
    │   │   ├── build_word2vec_model_all.py
    │   │   ├── build_word2vec_model_articles.py
    │   │   ├── build_word2vec_model_credit.py
    │   │   ├── build_word2vec_model_employment.py
    │   │   ├── build_word2vec_model_leases.py
    │   │   ├── build_word2vec_model_operating.py
    │   │   ├── build_word2vec_model_services.py
    │   │   ├── build_word2vec_model_underwriting.py
    │   │   └── contract_classifier
    │   │       ├── build_classifier_doc2vec.ipynb
    │   │       ├── build_classifier_doc2vec_v2.ipynb
    │   │       └── build_classifier_word2vec.ipynb
    ├── extraction
    │   ├── employment
    │   │   └── code_employment.ipynb
    │   └── en
    │   │   ├── build_date_locator.ipynb
    │   │   ├── build_duration_locator.ipynb
    │   │   ├── test_dates.ipynb
    │   │   └── test_durations.ipynb
    └── nlp
    │   └── en
    │       ├── build_collocation_pickle.py
    │       ├── build_stopword_pickle.py
    │       ├── page_segmentation.ipynb
    │       ├── paragraph_segmentation.ipynb
    │       ├── section_segmentation.ipynb
    │       ├── sentence_segmentation.ipynb
    │       ├── stopwords_collocations.ipynb
    │       ├── term_locator_example.ipynb
    │       ├── test_segmenter.ipynb
    │       └── test_tokens.ipynb
├── python-requirements-dev.txt
├── python-requirements-full.txt
├── python-requirements-notes.txt
├── python-requirements.txt
├── readthedocs.yml
├── scripts
    ├── create_release_branch.sh
    ├── download_contract_samples.sh
    ├── download_tika.sh
    ├── run_tika.sh
    └── unify_py_file_structure.py
├── setup.py
└── test_data
    ├── 1007273_2014-03-11_2
    ├── 1031296_2004-11-04
    ├── 1100644_2016-11-21
    ├── 1205332_2008-05-08_3
    ├── 1582586_2015-08-31
    ├── lexnlp
        ├── extract
        │   ├── common
        │   │   ├── entities
        │   │   │   ├── en_banlist_full.csv
        │   │   │   └── en_banlist_one_col.csv
        │   │   └── ocr_grade
        │   │   │   ├── lorem_ipsum.txt
        │   │   │   ├── pretty_en_file.txt
        │   │   │   └── totem_und_tabu.txt
        │   ├── de
        │   │   ├── laws
        │   │   │   ├── de_concept_sample.csv
        │   │   │   ├── gesetze_list.csv
        │   │   │   └── verordnungen_list.csv
        │   │   ├── sample_de_court_citations01.txt
        │   │   ├── sample_de_courts01.txt
        │   │   ├── sample_de_courts02.txt
        │   │   ├── sample_de_definitions01.txt
        │   │   ├── sample_de_definitions02.txt
        │   │   ├── sample_de_definitions03.txt
        │   │   └── sample_de_definitions04.txt
        │   ├── en
        │   │   ├── addresses
        │   │   │   └── tests
        │   │   │   │   └── test_addresses
        │   │   │   │       ├── test_bad_cases.csv
        │   │   │   │       └── test_get_address.csv
        │   │   ├── contracts
        │   │   │   └── tests
        │   │   │   │   └── test_contracts
        │   │   │   │       └── test_is_contract.csv
        │   │   ├── copyrights
        │   │   │   └── bigfile.txt
        │   │   ├── courts
        │   │   │   └── courts_sample_01.txt
        │   │   ├── definitions
        │   │   │   ├── definitions_fp_collections.txt
        │   │   │   ├── definitions_hit_or_miss.txt
        │   │   │   ├── en_definitions_sample_doc.txt
        │   │   │   └── pure_definitions.txt
        │   │   ├── entities
        │   │   │   └── tests
        │   │   │   │   ├── test_nltk_maxent
        │   │   │   │       ├── test_companies.csv
        │   │   │   │       ├── test_companies_count.csv
        │   │   │   │       ├── test_companies_rs.csv
        │   │   │   │       ├── test_gpe_in.csv
        │   │   │   │       ├── test_gpes.csv
        │   │   │   │       ├── test_gpes_rs.csv
        │   │   │   │       ├── test_noun_phrases.csv
        │   │   │   │       ├── test_person_in.csv
        │   │   │   │       ├── test_persons.csv
        │   │   │   │       └── test_persons_rs.csv
        │   │   │   │   ├── test_nltk_re
        │   │   │   │       ├── test_companies_in_article.csv
        │   │   │   │       ├── test_company_article_regex.csv
        │   │   │   │       ├── test_company_as.csv
        │   │   │   │       └── test_company_regex.csv
        │   │   │   │   └── test_stanford_ner
        │   │   │   │       ├── test_stanford_locations.csv
        │   │   │   │       ├── test_stanford_name_example_in.csv
        │   │   │   │       └── test_stanford_org_example_in.csv
        │   │   └── tests
        │   │   │   ├── test_amounts
        │   │   │       ├── test_get_amount.csv
        │   │   │       ├── test_get_amount_non_round_float.csv
        │   │   │       └── test_get_amount_source.csv
        │   │   │   ├── test_citations
        │   │   │       └── test_get_citations.csv
        │   │   │   ├── test_conditions
        │   │   │       └── test_condition_fixed_example.csv
        │   │   │   ├── test_constraints
        │   │   │       └── test_constraint_fixed_example.csv
        │   │   │   ├── test_copyright
        │   │   │       └── test_copyright.csv
        │   │   │   ├── test_courts
        │   │   │       ├── test_courts.csv
        │   │   │       ├── test_courts_longest_match.csv
        │   │   │       ├── test_courts_rs.csv
        │   │   │       └── us_courts.csv
        │   │   │   ├── test_dates
        │   │   │       ├── test_fixed_dates.csv
        │   │   │       ├── test_fixed_dates_nonstrict.csv
        │   │   │       ├── test_fixed_dates_source.csv
        │   │   │       └── test_fixed_raw_dates.csv
        │   │   │   ├── test_definitions
        │   │   │       ├── bad_def.txt
        │   │   │       ├── test_definition_fixed.csv
        │   │   │       └── test_definition_in_sentences.csv
        │   │   │   ├── test_dict_entities
        │   │   │       └── test_normalize_text.csv
        │   │   │   ├── test_distance
        │   │   │       ├── test_get_distance.csv
        │   │   │       └── test_get_distance_source.csv
        │   │   │   ├── test_durations
        │   │   │       ├── test_get_durations.csv
        │   │   │       └── test_get_durations_source.csv
        │   │   │   ├── test_geoentities
        │   │   │       ├── geoaliases.csv
        │   │   │       ├── geoentities.csv
        │   │   │       ├── test_geoentities.csv
        │   │   │       ├── test_geoentities_alias_filtering.csv
        │   │   │       ├── test_geoentities_en_equal_match_take_lowest_id.csv
        │   │   │       └── test_geoentities_en_equal_match_take_top_prio.csv
        │   │   │   ├── test_money
        │   │   │       ├── test_get_money.csv
        │   │   │       └── test_get_money_source.csv
        │   │   │   ├── test_percents
        │   │   │       ├── test_get_percents.csv
        │   │   │       └── test_get_percents_source.csv
        │   │   │   ├── test_pii
        │   │   │       ├── test_pii_list.csv
        │   │   │       ├── test_pii_list_source.csv
        │   │   │       ├── test_ssn_list.csv
        │   │   │       ├── test_ssn_list_source.csv
        │   │   │       ├── test_us_phone_list.csv
        │   │   │       └── test_us_phone_list_source.csv
        │   │   │   ├── test_ratios
        │   │   │       ├── test_get_ratios.csv
        │   │   │       └── test_get_ratios_source.csv
        │   │   │   ├── test_regulations
        │   │   │       └── test_get_regulations.csv
        │   │   │   ├── test_trademarks
        │   │   │       └── test_trademarks.csv
        │   │   │   └── test_urls
        │   │   │       └── test_urls.csv
        │   └── es
        │   │   ├── definitions
        │   │       └── eula.txt
        │   │   └── sample_es_regulations.txt
        ├── ml
        │   └── en
        │   │   └── layered_definitions_train_data.jsonl
        ├── nlp
        │   └── en
        │   │   ├── heading
        │   │       ├── heading_doc_paragraphs.csv
        │   │       ├── heading_doc_sections.txt
        │   │       ├── heading_doc_sentences.txt
        │   │       └── heading_document.txt
        │   │   └── tests
        │   │       ├── test_pages
        │   │           └── test_page_examples.csv
        │   │       ├── test_paragraphs
        │   │           └── test_paragraph_examples.csv
        │   │       ├── test_sections
        │   │           └── skewed_document.txt
        │   │       ├── test_sentences
        │   │           ├── test_pre_process_document.csv
        │   │           └── test_sentence_segmenter.csv
        │   │       ├── test_stanford
        │   │           ├── test_stanford_noun_lemmas.csv
        │   │           ├── test_stanford_nouns.csv
        │   │           ├── test_stanford_tokens.csv
        │   │           ├── test_stanford_tokens_lc.csv
        │   │           ├── test_stanford_tokens_lc_sw.csv
        │   │           ├── test_stanford_tokens_sw.csv
        │   │           ├── test_stanford_verb_lemmas.csv
        │   │           └── test_stanford_verbs.csv
        │   │       └── test_tokens
        │   │           ├── test_adjectives.csv
        │   │           ├── test_adjectives_lemma.csv
        │   │           ├── test_adverbs.csv
        │   │           ├── test_adverbs_lemma.csv
        │   │           ├── test_lemmas.csv
        │   │           ├── test_lemmas_lc.csv
        │   │           ├── test_lemmas_lc_sw.csv
        │   │           ├── test_lemmas_sw.csv
        │   │           ├── test_nouns.csv
        │   │           ├── test_nouns_lemma.csv
        │   │           ├── test_stems.csv
        │   │           ├── test_stems_lowercase.csv
        │   │           ├── test_stems_lowercase_no_stopwords.csv
        │   │           ├── test_verb_lemmas.csv
        │   │           └── test_verbs.csv
        ├── typed_annotations
        │   ├── de
        │   │   ├── amount
        │   │   │   └── amounts.txt
        │   │   ├── citation
        │   │   │   └── citations.txt
        │   │   ├── copyright
        │   │   │   └── copyrights.txt
        │   │   ├── court
        │   │   │   └── courts.txt
        │   │   ├── court_citation
        │   │   │   └── court_citations.txt
        │   │   ├── date
        │   │   │   └── dates.txt
        │   │   ├── definition
        │   │   │   └── definitions.txt
        │   │   ├── duration
        │   │   │   └── durations.txt
        │   │   ├── geoentity
        │   │   │   └── geoentities.txt
        │   │   ├── law
        │   │   │   └── laws.txt
        │   │   └── percent
        │   │   │   └── percents.txt
        │   ├── en
        │   │   ├── act
        │   │   │   └── acts.txt
        │   │   ├── amount
        │   │   │   └── amounts.txt
        │   │   ├── citation
        │   │   │   └── citations.txt
        │   │   ├── condition
        │   │   │   └── conditions.txt
        │   │   ├── constraint
        │   │   │   └── constraints.txt
        │   │   ├── copyright
        │   │   │   └── copyrights.txt
        │   │   ├── court
        │   │   │   └── courts.txt
        │   │   ├── cusip
        │   │   │   └── cusips.txt
        │   │   ├── date
        │   │   │   └── dates.txt
        │   │   ├── definition
        │   │   │   └── definitions.txt
        │   │   ├── distance
        │   │   │   └── distances.txt
        │   │   ├── duration
        │   │   │   └── durations.txt
        │   │   ├── geoentity
        │   │   │   └── geoentities.txt
        │   │   ├── money
        │   │   │   └── money.txt
        │   │   ├── percent
        │   │   │   └── percents.txt
        │   │   ├── phone
        │   │   │   └── phones.txt
        │   │   ├── ratio
        │   │   │   └── ratios.txt
        │   │   ├── regulation
        │   │   │   └── regulations.txt
        │   │   ├── ssn
        │   │   │   └── ssn.txt
        │   │   ├── trademark
        │   │   │   └── trademarks.txt
        │   │   └── url
        │   │   │   └── urls.txt
        │   └── es
        │   │   ├── copyright
        │   │       └── copyrights.txt
        │   │   ├── court
        │   │       └── courts.txt
        │   │   ├── date
        │   │       └── dates.txt
        │   │   ├── definition
        │   │       └── definitions.txt
        │   │   └── regulation
        │   │       └── regulations.txt
        └── utils
        │   ├── parsing
        │       ├── pdf_malformat_parsed_default.txt
        │       ├── pdf_malformat_parsed_stripper.txt
        │       └── text_abusing_headers.txt
        │   └── unicode_data.txt
    ├── long_parsed_text.txt
    ├── output
        ├── .gitkeep
        ├── de_definitions_01.html
        └── es_definitions_01.html
    ├── sample_es_regulations.html
    ├── table_sample.pdf
    ├── tabular02.pdf
    ├── test_get_section_spans_1.txt
    └── test_lexnlp_tests
        └── test_test_extraction_func_on_test_data.csv


/.pylintrc:
--------------------------------------------------------------------------------
 1 | [FORMAT]
 2 | max-line-length=120
 3 | ignore-long-lines=^\s*(# )?<?https?://\S+>?$
 4 | 
 5 | 
 6 | [MISCELLANEOUS]
 7 | notes=FIXME,TODO
 8 | 
 9 | [MESSAGES CONTROL]
10 | disable=r,c,w0511
11 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.md
 2 | include README.rst
 3 | include index.rst
 4 | include Pipfile
 5 | include Pipfile.lock
 6 | recursive-include lexnlp *.pickle
 7 | recursive-include lexnlp/extract/en/addresses *.json *.txt *.xml
 8 | recursive-include lexnlp *.csv
 9 | recursive-include libs *
10 | recursive-include scripts *
11 | recursive-include documentation *
12 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | beautifulsoup4 = "*"
 8 | cloudpickle = "*"
 9 | Cython = "*"
10 | dateparser = "*"
11 | elasticsearch = "*"
12 | gensim = "==4.1.2"
13 | importlib-metadata = "*"
14 | joblib = "*"
15 | lxml = "*"
16 | nltk = "*"
17 | num2words = "*"
18 | pandas = "*"
19 | psutil = "*"
20 | pycountry = "*"
21 | python-dateutil = "*"
22 | regex = "*"
23 | reporters-db = "*"
24 | requests = "*"
25 | scikit-learn = "==0.24"
26 | scipy = "*"
27 | tqdm = "*"
28 | unidecode = "*"
29 | us = "*"
30 | zahlwort2num = "*"
31 | numpy = "*"
32 | 
33 | [dev-packages]
34 | coverage = "*"
35 | memory-profiler = "*"
36 | nose = "*"
37 | pylint = "*"
38 | pytest = "*"
39 | sphinx = "*"
40 | 
41 | [requires]
42 | python_version = "3.8"
43 | 


--------------------------------------------------------------------------------
/documentation/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = LexNLP
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.config.en.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.config.en package
 2 | ========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.config.en.company\_types module
 8 | --------------------------------------
 9 | 
10 | .. automodule:: lexnlp.config.en.company_types
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | lexnlp.config.en.geoentities\_config module
16 | -------------------------------------------
17 | 
18 | .. automodule:: lexnlp.config.en.geoentities_config
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.config.en
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.config.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.config package
 2 | =====================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.config.en
11 | 
12 | Submodules
13 | ----------
14 | 
15 | lexnlp.config.stanford module
16 | -----------------------------
17 | 
18 | .. automodule:: lexnlp.config.stanford
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.config
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.all_locales.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.all\_locales.tests package
 2 | =========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.all\_locales.tests.test\_locales module
 8 | ------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.all_locales.tests.test_locales
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.extract.all_locales.tests
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.common.date_parsing.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.common.date\_parsing package
 2 | ===========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.common.date\_parsing.datefinder module
 8 | -----------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.common.date_parsing.datefinder
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.extract.common.date_parsing
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.common.definitions.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.common.definitions package
 2 | =========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.common.definitions.common\_definition\_patterns module
 8 | ---------------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.common.definitions.common_definition_patterns
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | lexnlp.extract.common.definitions.definition\_match module
16 | ----------------------------------------------------------
17 | 
18 | .. automodule:: lexnlp.extract.common.definitions.definition_match
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | lexnlp.extract.common.definitions.universal\_definition\_parser module
24 | ----------------------------------------------------------------------
25 | 
26 | .. automodule:: lexnlp.extract.common.definitions.universal_definition_parser
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: lexnlp.extract.common.definitions
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.common.durations.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.common.durations package
 2 | =======================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.common.durations.durations\_parser module
 8 | --------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.common.durations.durations_parser
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.extract.common.durations
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.common.entities.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.common.entities package
 2 | ======================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.common.entities.entity\_banlist module
 8 | -----------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.common.entities.entity_banlist
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.extract.common.entities
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.common.ocr_rating.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.common.ocr\_rating package
 2 | =========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.common.ocr\_rating.lang\_vector\_distribution\_builder module
 8 | ----------------------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.common.ocr_rating.lang_vector_distribution_builder
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | lexnlp.extract.common.ocr\_rating.ocr\_rating\_calculator module
16 | ----------------------------------------------------------------
17 | 
18 | .. automodule:: lexnlp.extract.common.ocr_rating.ocr_rating_calculator
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.extract.common.ocr_rating
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.en.addresses.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.addresses package
 2 | ===================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.extract.en.addresses.tests
11 | 
12 | Submodules
13 | ----------
14 | 
15 | lexnlp.extract.en.addresses.address\_features module
16 | ----------------------------------------------------
17 | 
18 | .. automodule:: lexnlp.extract.en.addresses.address_features
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | lexnlp.extract.en.addresses.addresses module
24 | --------------------------------------------
25 | 
26 | .. automodule:: lexnlp.extract.en.addresses.addresses
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: lexnlp.extract.en.addresses
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.en.addresses.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.addresses.tests package
 2 | =========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.en.addresses.tests.test\_addresses module
 8 | --------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.en.addresses.tests.test_addresses
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.extract.en.addresses.tests
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.en.contracts.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.contracts package
 2 | ===================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.extract.en.contracts.tests
11 | 
12 | Submodules
13 | ----------
14 | 
15 | lexnlp.extract.en.contracts.contract\_type\_detector module
16 | -----------------------------------------------------------
17 | 
18 | .. automodule:: lexnlp.extract.en.contracts.contract_type_detector
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | lexnlp.extract.en.contracts.predictors module
24 | ---------------------------------------------
25 | 
26 | .. automodule:: lexnlp.extract.en.contracts.predictors
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: lexnlp.extract.en.contracts
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.en.contracts.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.contracts.tests package
 2 | =========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.en.contracts.tests.test\_contract\_type module
 8 | -------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.en.contracts.tests.test_contract_type
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | lexnlp.extract.en.contracts.tests.test\_contracts module
16 | --------------------------------------------------------
17 | 
18 | .. automodule:: lexnlp.extract.en.contracts.tests.test_contracts
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.extract.en.contracts.tests
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.en.entities.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.entities.tests package
 2 | ========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.en.entities.tests.test\_get\_companies module
 8 | ------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.en.entities.tests.test_get_companies
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | lexnlp.extract.en.entities.tests.test\_nltk\_maxent module
16 | ----------------------------------------------------------
17 | 
18 | .. automodule:: lexnlp.extract.en.entities.tests.test_nltk_maxent
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | lexnlp.extract.en.entities.tests.test\_stanford\_ner module
24 | -----------------------------------------------------------
25 | 
26 | .. automodule:: lexnlp.extract.en.entities.tests.test_stanford_ner
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: lexnlp.extract.en.entities.tests
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.en.preprocessing.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.preprocessing package
 2 | =======================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.en.preprocessing.span\_tokenizer module
 8 | ------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.en.preprocessing.span_tokenizer
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.extract.en.preprocessing
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.ml.classifier.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.ml.classifier package
 2 | ====================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.ml.classifier.base\_token\_sequence\_classifier\_model module
 8 | ----------------------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.ml.classifier.base_token_sequence_classifier_model
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | lexnlp.extract.ml.classifier.spacy\_token\_sequence\_model module
16 | -----------------------------------------------------------------
17 | 
18 | .. automodule:: lexnlp.extract.ml.classifier.spacy_token_sequence_model
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | lexnlp.extract.ml.classifier.token\_sequence\_model module
24 | ----------------------------------------------------------
25 | 
26 | .. automodule:: lexnlp.extract.ml.classifier.token_sequence_model
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: lexnlp.extract.ml.classifier
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.ml.detector.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.ml.detector.tests package
 2 | ========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.ml.detector.tests.test\_phrase\_constructor module
 8 | -----------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.ml.detector.tests.test_phrase_constructor
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.extract.ml.detector.tests
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.ml.en.definitions.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.ml.en.definitions.tests package
 2 | ==============================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.ml.en.definitions.tests.test\_layered\_definition\_detector module
 8 | ---------------------------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.ml.en.definitions.tests.test_layered_definition_detector
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.extract.ml.en.definitions.tests
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.ml.en.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.ml.en package
 2 | ============================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.extract.ml.en.definitions
11 | 
12 | Module contents
13 | ---------------
14 | 
15 | .. automodule:: lexnlp.extract.ml.en
16 |    :members:
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.ml.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.ml package
 2 | =========================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.extract.ml.classifier
11 |    lexnlp.extract.ml.detector
12 |    lexnlp.extract.ml.en
13 | 
14 | Submodules
15 | ----------
16 | 
17 | lexnlp.extract.ml.environment module
18 | ------------------------------------
19 | 
20 | .. automodule:: lexnlp.extract.ml.environment
21 |    :members:
22 |    :undoc-members:
23 |    :show-inheritance:
24 | 
25 | Module contents
26 | ---------------
27 | 
28 | .. automodule:: lexnlp.extract.ml
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.extract.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract package
 2 | ======================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.extract.all_locales
11 |    lexnlp.extract.common
12 |    lexnlp.extract.de
13 |    lexnlp.extract.en
14 |    lexnlp.extract.es
15 |    lexnlp.extract.ml
16 | 
17 | Module contents
18 | ---------------
19 | 
20 | .. automodule:: lexnlp.extract
21 |    :members:
22 |    :undoc-members:
23 |    :show-inheritance:
24 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.ml.catalog.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.ml.catalog package
 2 | =========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.ml.catalog.download module
 8 | ---------------------------------
 9 | 
10 | .. automodule:: lexnlp.ml.catalog.download
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.ml.catalog
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.nlp.en.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.nlp.en package
 2 | =====================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.nlp.en.segments
11 |    lexnlp.nlp.en.tests
12 |    lexnlp.nlp.en.transforms
13 | 
14 | Submodules
15 | ----------
16 | 
17 | lexnlp.nlp.en.stanford module
18 | -----------------------------
19 | 
20 | .. automodule:: lexnlp.nlp.en.stanford
21 |    :members:
22 |    :undoc-members:
23 |    :show-inheritance:
24 | 
25 | lexnlp.nlp.en.tokens module
26 | ---------------------------
27 | 
28 | .. automodule:: lexnlp.nlp.en.tokens
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 | 
33 | Module contents
34 | ---------------
35 | 
36 | .. automodule:: lexnlp.nlp.en
37 |    :members:
38 |    :undoc-members:
39 |    :show-inheritance:
40 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.nlp.en.transforms.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.nlp.en.transforms package
 2 | ================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.nlp.en.transforms.characters module
 8 | ------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.nlp.en.transforms.characters
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | lexnlp.nlp.en.transforms.tokens module
16 | --------------------------------------
17 | 
18 | .. automodule:: lexnlp.nlp.en.transforms.tokens
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.nlp.en.transforms
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.nlp.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.nlp package
 2 | ==================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.nlp.en
11 |    lexnlp.nlp.train
12 | 
13 | Module contents
14 | ---------------
15 | 
16 | .. automodule:: lexnlp.nlp
17 |    :members:
18 |    :undoc-members:
19 |    :show-inheritance:
20 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.nlp.train.en.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.nlp.train.en package
 2 | ===========================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.nlp.train.en.tests
11 | 
12 | Submodules
13 | ----------
14 | 
15 | lexnlp.nlp.train.en.train\_section\_segmanizer module
16 | -----------------------------------------------------
17 | 
18 | .. automodule:: lexnlp.nlp.train.en.train_section_segmanizer
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.nlp.train.en
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.nlp.train.en.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.nlp.train.en.tests package
 2 | =================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.nlp.train.en.tests.test\_train\_section\_segmentizer module
 8 | ------------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.nlp.train.en.tests.test_train_section_segmentizer
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.nlp.train.en.tests
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.nlp.train.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.nlp.train package
 2 | ========================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.nlp.train.en
11 | 
12 | Submodules
13 | ----------
14 | 
15 | lexnlp.nlp.train.train\_data\_manager module
16 | --------------------------------------------
17 | 
18 | .. automodule:: lexnlp.nlp.train.train_data_manager
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.nlp.train
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.rst:
--------------------------------------------------------------------------------
 1 | lexnlp package
 2 | ==============
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.config
11 |    lexnlp.extract
12 |    lexnlp.ml
13 |    lexnlp.nlp
14 |    lexnlp.tests
15 |    lexnlp.utils
16 | 
17 | Module contents
18 | ---------------
19 | 
20 | .. automodule:: lexnlp
21 |    :members:
22 |    :undoc-members:
23 |    :show-inheritance:
24 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.utils.unicode.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.utils.unicode package
 2 | ============================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    lexnlp.utils.unicode.tests
11 | 
12 | Submodules
13 | ----------
14 | 
15 | lexnlp.utils.unicode.unicode\_lookup module
16 | -------------------------------------------
17 | 
18 | .. automodule:: lexnlp.utils.unicode.unicode_lookup
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.utils.unicode
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/api/lexnlp.utils.unicode.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.utils.unicode.tests package
 2 | ==================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.utils.unicode.tests.test\_unicode\_lookup module
 8 | -------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.utils.unicode.tests.test_unicode_lookup
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.utils.unicode.tests
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/api/modules.rst:
--------------------------------------------------------------------------------
1 | lexnlp
2 | ======
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    lexnlp
8 | 


--------------------------------------------------------------------------------
/documentation/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | set SPHINXPROJ=LexNLP
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/documentation/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx_rtd_theme
3 | sphinx-markdown-tables
4 | recommonmark
5 | pyyaml


--------------------------------------------------------------------------------
/documentation/docs/source/_static/img/lexnlp_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/documentation/docs/source/_static/img/lexnlp_logo.png


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.config.en.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.config.en package
 2 | ========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.config.en.company\_types module
 8 | --------------------------------------
 9 | 
10 | .. automodule:: lexnlp.config.en.company_types
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | lexnlp.config.en.geoentities\_config module
16 | -------------------------------------------
17 | 
18 | .. automodule:: lexnlp.config.en.geoentities_config
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: lexnlp.config.en
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.config.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.config package
 2 | =====================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.config.en
10 | 
11 | Submodules
12 | ----------
13 | 
14 | lexnlp.config.stanford module
15 | -----------------------------
16 | 
17 | .. automodule:: lexnlp.config.stanford
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 | 
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.config
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.common.date_parsing.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.common.date\_parsing package
 2 | ===========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.common.date\_parsing.datefinder module
 8 | -----------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.common.date_parsing.datefinder
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: lexnlp.extract.common.date_parsing
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.common.durations.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.common.durations package
 2 | =======================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.common.durations.durations\_parser module
 8 | --------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.common.durations.durations_parser
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: lexnlp.extract.common.durations
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.addresses.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.addresses package
 2 | ===================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.extract.en.addresses.tests
10 | 
11 | Submodules
12 | ----------
13 | 
14 | lexnlp.extract.en.addresses.address\_features module
15 | ----------------------------------------------------
16 | 
17 | .. automodule:: lexnlp.extract.en.addresses.address_features
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 | 
22 | lexnlp.extract.en.addresses.addresses module
23 | --------------------------------------------
24 | 
25 | .. automodule:: lexnlp.extract.en.addresses.addresses
26 |    :members:
27 |    :undoc-members:
28 |    :show-inheritance:
29 | 
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: lexnlp.extract.en.addresses
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.addresses.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.addresses.tests package
 2 | =========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.en.addresses.tests.test\_addresses module
 8 | --------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.en.addresses.tests.test_addresses
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: lexnlp.extract.en.addresses.tests
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.amounts.get_amounts.rst:
--------------------------------------------------------------------------------
1 | get_amounts
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.extract.en.amounts
5 | 
6 | .. autofunction:: get_amounts
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.amounts.get_np.rst:
--------------------------------------------------------------------------------
1 | get_np
2 | ======
3 | 
4 | .. currentmodule:: lexnlp.extract.en.amounts
5 | 
6 | .. autofunction:: get_np
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.amounts.text2num.rst:
--------------------------------------------------------------------------------
1 | text2num
2 | ========
3 | 
4 | .. currentmodule:: lexnlp.extract.en.amounts
5 | 
6 | .. autofunction:: text2num
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.citations.get_citations.rst:
--------------------------------------------------------------------------------
1 | get_citations
2 | =============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.citations
5 | 
6 | .. autofunction:: get_citations
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.conditions.create_condition_pattern.rst:
--------------------------------------------------------------------------------
1 | create_condition_pattern
2 | ========================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.conditions
5 | 
6 | .. autofunction:: create_condition_pattern
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.conditions.get_conditions.rst:
--------------------------------------------------------------------------------
1 | get_conditions
2 | ==============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.conditions
5 | 
6 | .. autofunction:: get_conditions
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.constraints.create_constraint_pattern.rst:
--------------------------------------------------------------------------------
1 | create_constraint_pattern
2 | =========================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.constraints
5 | 
6 | .. autofunction:: create_constraint_pattern
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.constraints.get_constraints.rst:
--------------------------------------------------------------------------------
1 | get_constraints
2 | ===============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.constraints
5 | 
6 | .. autofunction:: get_constraints
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.contracts.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.contracts package
 2 | ===================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.extract.en.contracts.tests
10 | 
11 | Submodules
12 | ----------
13 | 
14 | lexnlp.extract.en.contracts.detector module
15 | -------------------------------------------
16 | 
17 | .. automodule:: lexnlp.extract.en.contracts.detector
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 | 
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.extract.en.contracts
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.contracts.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.contracts.tests package
 2 | =========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.en.contracts.tests.test\_contracts module
 8 | --------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.en.contracts.tests.test_contracts
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: lexnlp.extract.en.contracts.tests
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.copyright.CopyrightNPExtractor.rst:
--------------------------------------------------------------------------------
 1 | CopyrightNPExtractor
 2 | ====================
 3 | 
 4 | .. currentmodule:: lexnlp.extract.en.copyright
 5 | 
 6 | .. autoclass:: CopyrightNPExtractor
 7 |    :show-inheritance:
 8 | 
 9 |    .. rubric:: Attributes Summary
10 | 
11 |    .. autosummary::
12 | 
13 |       ~CopyrightNPExtractor.allowed_pos
14 |       ~CopyrightNPExtractor.allowed_sym
15 | 
16 |    .. rubric:: Methods Summary
17 | 
18 |    .. autosummary::
19 | 
20 |       ~CopyrightNPExtractor.strip_np
21 | 
22 |    .. rubric:: Attributes Documentation
23 | 
24 |    .. autoattribute:: allowed_pos
25 |    .. autoattribute:: allowed_sym
26 | 
27 |    .. rubric:: Methods Documentation
28 | 
29 |    .. automethod:: strip_np
30 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.copyright.get_copyright.rst:
--------------------------------------------------------------------------------
1 | get_copyright
2 | =============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.copyright
5 | 
6 | .. autofunction:: get_copyright
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dates.build_date_model.rst:
--------------------------------------------------------------------------------
1 | build_date_model
2 | ================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dates
5 | 
6 | .. autofunction:: build_date_model
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dates.get_date_features.rst:
--------------------------------------------------------------------------------
1 | get_date_features
2 | =================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dates
5 | 
6 | .. autofunction:: get_date_features
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dates.get_dates.rst:
--------------------------------------------------------------------------------
1 | get_dates
2 | =========
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dates
5 | 
6 | .. autofunction:: get_dates
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dates.get_dates_list.rst:
--------------------------------------------------------------------------------
1 | get_dates_list
2 | ==============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dates
5 | 
6 | .. autofunction:: get_dates_list
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dates.get_raw_date_list.rst:
--------------------------------------------------------------------------------
1 | get_raw_date_list
2 | =================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dates
5 | 
6 | .. autofunction:: get_raw_date_list
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dates.get_raw_dates.rst:
--------------------------------------------------------------------------------
1 | get_raw_dates
2 | =============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dates
5 | 
6 | .. autofunction:: get_raw_dates
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dates.train_default_model.rst:
--------------------------------------------------------------------------------
1 | train_default_model
2 | ===================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dates
5 | 
6 | .. autofunction:: train_default_model
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.definitions.get_definitions.rst:
--------------------------------------------------------------------------------
1 | get_definitions
2 | ===============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.definitions
5 | 
6 | .. autofunction:: get_definitions
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.SearchResultPosition.rst:
--------------------------------------------------------------------------------
 1 | SearchResultPosition
 2 | ====================
 3 | 
 4 | .. currentmodule:: lexnlp.extract.en.dict_entities
 5 | 
 6 | .. autoclass:: SearchResultPosition
 7 |    :show-inheritance:
 8 | 
 9 |    .. rubric:: Attributes Summary
10 | 
11 |    .. autosummary::
12 | 
13 |       ~SearchResultPosition.alias_text
14 |       ~SearchResultPosition.entities_dict
15 |       ~SearchResultPosition.start
16 | 
17 |    .. rubric:: Attributes Documentation
18 | 
19 |    .. autoattribute:: alias_text
20 |    .. autoattribute:: entities_dict
21 |    .. autoattribute:: start
22 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.add_alias_to_entity.rst:
--------------------------------------------------------------------------------
1 | add_alias_to_entity
2 | ===================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: add_alias_to_entity
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.add_aliases_to_entity.rst:
--------------------------------------------------------------------------------
1 | add_aliases_to_entity
2 | =====================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: add_aliases_to_entity
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.alias_is_blacklisted.rst:
--------------------------------------------------------------------------------
1 | alias_is_blacklisted
2 | ====================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: alias_is_blacklisted
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.conflicts_take_first_by_id.rst:
--------------------------------------------------------------------------------
1 | conflicts_take_first_by_id
2 | ==========================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: conflicts_take_first_by_id
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.conflicts_top_by_priority.rst:
--------------------------------------------------------------------------------
1 | conflicts_top_by_priority
2 | =========================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: conflicts_top_by_priority
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.entity_alias.rst:
--------------------------------------------------------------------------------
1 | entity_alias
2 | ============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: entity_alias
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.entity_config.rst:
--------------------------------------------------------------------------------
1 | entity_config
2 | =============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: entity_config
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.find_dict_entities.rst:
--------------------------------------------------------------------------------
1 | find_dict_entities
2 | ==================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: find_dict_entities
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_alias_id.rst:
--------------------------------------------------------------------------------
1 | get_alias_id
2 | ============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: get_alias_id
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_alias_text.rst:
--------------------------------------------------------------------------------
1 | get_alias_text
2 | ==============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: get_alias_text
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_entity_aliases.rst:
--------------------------------------------------------------------------------
1 | get_entity_aliases
2 | ==================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: get_entity_aliases
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_entity_id.rst:
--------------------------------------------------------------------------------
1 | get_entity_id
2 | =============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: get_entity_id
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_entity_name.rst:
--------------------------------------------------------------------------------
1 | get_entity_name
2 | ===============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: get_entity_name
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_entity_priority.rst:
--------------------------------------------------------------------------------
1 | get_entity_priority
2 | ===================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: get_entity_priority
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.normalize_text.rst:
--------------------------------------------------------------------------------
1 | normalize_text
2 | ==============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: normalize_text
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.dict_entities.prepare_alias_blacklist_dict.rst:
--------------------------------------------------------------------------------
1 | prepare_alias_blacklist_dict
2 | ============================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.dict_entities
5 | 
6 | .. autofunction:: prepare_alias_blacklist_dict
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.distances.get_distances.rst:
--------------------------------------------------------------------------------
1 | get_distances
2 | =============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.distances
5 | 
6 | .. autofunction:: get_distances
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.durations.get_durations.rst:
--------------------------------------------------------------------------------
1 | get_durations
2 | =============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.durations
5 | 
6 | .. autofunction:: get_durations
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.geoentities.get_geoentities.rst:
--------------------------------------------------------------------------------
1 | get_geoentities
2 | ===============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.geoentities
5 | 
6 | .. autofunction:: get_geoentities
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.money.get_money.rst:
--------------------------------------------------------------------------------
1 | get_money
2 | =========
3 | 
4 | .. currentmodule:: lexnlp.extract.en.money
5 | 
6 | .. autofunction:: get_money
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.percents.get_percents.rst:
--------------------------------------------------------------------------------
1 | get_percents
2 | ============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.percents
5 | 
6 | .. autofunction:: get_percents
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.pii.get_pii.rst:
--------------------------------------------------------------------------------
1 | get_pii
2 | =======
3 | 
4 | .. currentmodule:: lexnlp.extract.en.pii
5 | 
6 | .. autofunction:: get_pii
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.pii.get_ssns.rst:
--------------------------------------------------------------------------------
1 | get_ssns
2 | ========
3 | 
4 | .. currentmodule:: lexnlp.extract.en.pii
5 | 
6 | .. autofunction:: get_ssns
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.pii.get_us_phones.rst:
--------------------------------------------------------------------------------
1 | get_us_phones
2 | =============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.pii
5 | 
6 | .. autofunction:: get_us_phones
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.preprocessing.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.en.preprocessing package
 2 | =======================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.en.preprocessing.span\_tokenizer module
 8 | ------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.en.preprocessing.span_tokenizer
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: lexnlp.extract.en.preprocessing
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.ratios.get_ratios.rst:
--------------------------------------------------------------------------------
1 | get_ratios
2 | ==========
3 | 
4 | .. currentmodule:: lexnlp.extract.en.ratios
5 | 
6 | .. autofunction:: get_ratios
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.regulations.get_regulations.rst:
--------------------------------------------------------------------------------
1 | get_regulations
2 | ===============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.regulations
5 | 
6 | .. autofunction:: get_regulations
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.trademarks.get_trademarks.rst:
--------------------------------------------------------------------------------
1 | get_trademarks
2 | ==============
3 | 
4 | .. currentmodule:: lexnlp.extract.en.trademarks
5 | 
6 | .. autofunction:: get_trademarks
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.urls.get_urls.rst:
--------------------------------------------------------------------------------
1 | get_urls
2 | ========
3 | 
4 | .. currentmodule:: lexnlp.extract.en.urls
5 | 
6 | .. autofunction:: get_urls
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.utils.NPExtractor.rst:
--------------------------------------------------------------------------------
 1 | NPExtractor
 2 | ===========
 3 | 
 4 | .. currentmodule:: lexnlp.extract.en.utils
 5 | 
 6 | .. autoclass:: NPExtractor
 7 |    :show-inheritance:
 8 | 
 9 |    .. rubric:: Attributes Summary
10 | 
11 |    .. autosummary::
12 | 
13 |       ~NPExtractor.exception_pos
14 |       ~NPExtractor.exception_sym
15 |       ~NPExtractor.sym_with_space
16 |       ~NPExtractor.sym_without_space
17 | 
18 |    .. rubric:: Methods Summary
19 | 
20 |    .. autosummary::
21 | 
22 |       ~NPExtractor.cleanup_leaves
23 |       ~NPExtractor.get_np
24 |       ~NPExtractor.get_tokenizer
25 |       ~NPExtractor.join
26 |       ~NPExtractor.sep
27 |       ~NPExtractor.strip_np
28 | 
29 |    .. rubric:: Attributes Documentation
30 | 
31 |    .. autoattribute:: exception_pos
32 |    .. autoattribute:: exception_sym
33 |    .. autoattribute:: sym_with_space
34 |    .. autoattribute:: sym_without_space
35 | 
36 |    .. rubric:: Methods Documentation
37 | 
38 |    .. automethod:: cleanup_leaves
39 |    .. automethod:: get_np
40 |    .. automethod:: get_tokenizer
41 |    .. automethod:: join
42 |    .. automethod:: sep
43 |    .. automethod:: strip_np
44 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.en.utils.strip_unicode_punctuation.rst:
--------------------------------------------------------------------------------
1 | strip_unicode_punctuation
2 | =========================
3 | 
4 | .. currentmodule:: lexnlp.extract.en.utils
5 | 
6 | .. autofunction:: strip_unicode_punctuation
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.ml.classifier.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.ml.classifier package
 2 | ====================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.ml.classifier.base\_token\_sequence\_classifier\_model module
 8 | ----------------------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.ml.classifier.base_token_sequence_classifier_model
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | lexnlp.extract.ml.classifier.spacy\_token\_sequence\_model module
16 | -----------------------------------------------------------------
17 | 
18 | .. automodule:: lexnlp.extract.ml.classifier.spacy_token_sequence_model
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | lexnlp.extract.ml.classifier.token\_sequence\_model module
24 | ----------------------------------------------------------
25 | 
26 | .. automodule:: lexnlp.extract.ml.classifier.token_sequence_model
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | 
32 | Module contents
33 | ---------------
34 | 
35 | .. automodule:: lexnlp.extract.ml.classifier
36 |    :members:
37 |    :undoc-members:
38 |    :show-inheritance:
39 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.ml.detector.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.ml.detector.tests package
 2 | ========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.ml.detector.tests.test\_phrase\_constructor module
 8 | -----------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.ml.detector.tests.test_phrase_constructor
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: lexnlp.extract.ml.detector.tests
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.ml.en.definitions.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.ml.en.definitions.tests package
 2 | ==============================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.extract.ml.en.definitions.tests.test\_layered\_definition\_detector module
 8 | ---------------------------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.extract.ml.en.definitions.tests.test_layered_definition_detector
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: lexnlp.extract.ml.en.definitions.tests
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.ml.en.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.ml.en package
 2 | ============================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.extract.ml.en.definitions
10 | 
11 | Module contents
12 | ---------------
13 | 
14 | .. automodule:: lexnlp.extract.ml.en
15 |    :members:
16 |    :undoc-members:
17 |    :show-inheritance:
18 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.ml.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract.ml package
 2 | =========================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.extract.ml.classifier
10 |    lexnlp.extract.ml.detector
11 |    lexnlp.extract.ml.en
12 | 
13 | Submodules
14 | ----------
15 | 
16 | lexnlp.extract.ml.environment module
17 | ------------------------------------
18 | 
19 | .. automodule:: lexnlp.extract.ml.environment
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 
24 | 
25 | Module contents
26 | ---------------
27 | 
28 | .. automodule:: lexnlp.extract.ml
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.extract.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.extract package
 2 | ======================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.extract.common
10 |    lexnlp.extract.de
11 |    lexnlp.extract.en
12 |    lexnlp.extract.es
13 |    lexnlp.extract.ml
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp.extract
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.nlp.en package
 2 | =====================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.nlp.en.segments
10 |    lexnlp.nlp.en.tests
11 |    lexnlp.nlp.en.transforms
12 | 
13 | Submodules
14 | ----------
15 | 
16 | lexnlp.nlp.en.stanford module
17 | -----------------------------
18 | 
19 | .. automodule:: lexnlp.nlp.en.stanford
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 
24 | lexnlp.nlp.en.tokens module
25 | ---------------------------
26 | 
27 | .. automodule:: lexnlp.nlp.en.tokens
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 
32 | 
33 | Module contents
34 | ---------------
35 | 
36 | .. automodule:: lexnlp.nlp.en
37 |    :members:
38 |    :undoc-members:
39 |    :show-inheritance:
40 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.pages.MODULE_PATH.rst:
--------------------------------------------------------------------------------
1 | MODULE_PATH
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.pages
5 | 
6 | .. autodata:: MODULE_PATH
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.pages.PAGE_SEGMENTER_MODEL.rst:
--------------------------------------------------------------------------------
1 | PAGE_SEGMENTER_MODEL
2 | ====================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.pages
5 | 
6 | .. autodata:: PAGE_SEGMENTER_MODEL
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.pages.build_page_break_features.rst:
--------------------------------------------------------------------------------
1 | build_page_break_features
2 | =========================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.pages
5 | 
6 | .. autofunction:: build_page_break_features
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.pages.get_pages.rst:
--------------------------------------------------------------------------------
1 | get_pages
2 | =========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.pages
5 | 
6 | .. autofunction:: get_pages
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.MODULE_PATH.rst:
--------------------------------------------------------------------------------
1 | MODULE_PATH
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs
5 | 
6 | .. autodata:: MODULE_PATH
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.Optional.rst:
--------------------------------------------------------------------------------
1 | Optional
2 | ========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs
5 | 
6 | .. autodata:: Optional
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.PARAGRAPH_SEGMENTER_MODEL.rst:
--------------------------------------------------------------------------------
1 | PARAGRAPH_SEGMENTER_MODEL
2 | =========================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs
5 | 
6 | .. autodata:: PARAGRAPH_SEGMENTER_MODEL
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.RE_NEW_LINE.rst:
--------------------------------------------------------------------------------
1 | RE_NEW_LINE
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs
5 | 
6 | .. autodata:: RE_NEW_LINE
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.Union.rst:
--------------------------------------------------------------------------------
1 | Union
2 | =====
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs
5 | 
6 | .. autodata:: Union
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.build_paragraph_break_features.rst:
--------------------------------------------------------------------------------
1 | build_paragraph_break_features
2 | ==============================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs
5 | 
6 | .. autofunction:: build_paragraph_break_features
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.get_paragraphs.rst:
--------------------------------------------------------------------------------
1 | get_paragraphs
2 | ==============
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs
5 | 
6 | .. autofunction:: get_paragraphs
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.splitlines_with_spans.rst:
--------------------------------------------------------------------------------
1 | splitlines_with_spans
2 | =====================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs
5 | 
6 | .. autofunction:: splitlines_with_spans
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sections.MODULE_PATH.rst:
--------------------------------------------------------------------------------
1 | MODULE_PATH
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sections
5 | 
6 | .. autodata:: MODULE_PATH
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sections.SECTION_SEGMENTER_MODEL.rst:
--------------------------------------------------------------------------------
1 | SECTION_SEGMENTER_MODEL
2 | =======================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sections
5 | 
6 | .. autodata:: SECTION_SEGMENTER_MODEL
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sections.build_section_break_features.rst:
--------------------------------------------------------------------------------
1 | build_section_break_features
2 | ============================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sections
5 | 
6 | .. autofunction:: build_section_break_features
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sections.get_sections.rst:
--------------------------------------------------------------------------------
1 | get_sections
2 | ============
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sections
5 | 
6 | .. autofunction:: get_sections
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.Any.rst:
--------------------------------------------------------------------------------
1 | Any
2 | ===
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autodata:: Any
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.MODULE_PATH.rst:
--------------------------------------------------------------------------------
1 | MODULE_PATH
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autodata:: MODULE_PATH
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.PRE_PROCESS_TEXT_REMOVE.rst:
--------------------------------------------------------------------------------
1 | PRE_PROCESS_TEXT_REMOVE
2 | =======================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autodata:: PRE_PROCESS_TEXT_REMOVE
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.SENTENCE_SEGMENTER_MODEL.rst:
--------------------------------------------------------------------------------
1 | SENTENCE_SEGMENTER_MODEL
2 | ========================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autodata:: SENTENCE_SEGMENTER_MODEL
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.SENTENCE_SPLITTERS.rst:
--------------------------------------------------------------------------------
1 | SENTENCE_SPLITTERS
2 | ==================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autodata:: SENTENCE_SPLITTERS
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.SENTENCE_SPLITTERS_LOWER_EXCLUDE.rst:
--------------------------------------------------------------------------------
1 | SENTENCE_SPLITTERS_LOWER_EXCLUDE
2 | ================================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autodata:: SENTENCE_SPLITTERS_LOWER_EXCLUDE
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.STRIP_GROUP.rst:
--------------------------------------------------------------------------------
1 | STRIP_GROUP
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autodata:: STRIP_GROUP
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.Union.rst:
--------------------------------------------------------------------------------
1 | Union
2 | =====
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autodata:: Union
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.build_sentence_model.rst:
--------------------------------------------------------------------------------
1 | build_sentence_model
2 | ====================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autofunction:: build_sentence_model
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.extra_abbreviations.rst:
--------------------------------------------------------------------------------
1 | extra_abbreviations
2 | ===================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autodata:: extra_abbreviations
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.get_sentence__with_coords_list.rst:
--------------------------------------------------------------------------------
1 | get_sentence__with_coords_list
2 | ==============================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autofunction:: get_sentence__with_coords_list
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.get_sentence_list.rst:
--------------------------------------------------------------------------------
1 | get_sentence_list
2 | =================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autofunction:: get_sentence_list
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.get_sentence_span.rst:
--------------------------------------------------------------------------------
1 | get_sentence_span
2 | =================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autofunction:: get_sentence_span
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.get_sentence_span_list.rst:
--------------------------------------------------------------------------------
1 | get_sentence_span_list
2 | ======================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autofunction:: get_sentence_span_list
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.post_process_sentence.rst:
--------------------------------------------------------------------------------
1 | post_process_sentence
2 | =====================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autofunction:: post_process_sentence
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.pre_process_document.rst:
--------------------------------------------------------------------------------
1 | pre_process_document
2 | ====================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences
5 | 
6 | .. autofunction:: pre_process_document
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.titles.MODULE_PATH.rst:
--------------------------------------------------------------------------------
1 | MODULE_PATH
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.titles
5 | 
6 | .. autodata:: MODULE_PATH
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.titles.SECTION_SEGMENTER_MODEL.rst:
--------------------------------------------------------------------------------
1 | SECTION_SEGMENTER_MODEL
2 | =======================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.titles
5 | 
6 | .. autodata:: SECTION_SEGMENTER_MODEL
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.titles.UNICODE_CHAR_TOP_CATEGORY_MAPPING.rst:
--------------------------------------------------------------------------------
1 | UNICODE_CHAR_TOP_CATEGORY_MAPPING
2 | =================================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.titles
5 | 
6 | .. autodata:: UNICODE_CHAR_TOP_CATEGORY_MAPPING
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.titles.build_document_title_features.rst:
--------------------------------------------------------------------------------
1 | build_document_title_features
2 | =============================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.titles
5 | 
6 | .. autofunction:: build_document_title_features
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.titles.build_model.rst:
--------------------------------------------------------------------------------
1 | build_model
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.titles
5 | 
6 | .. autofunction:: build_model
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.titles.build_title_features.rst:
--------------------------------------------------------------------------------
1 | build_title_features
2 | ====================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.titles
5 | 
6 | .. autofunction:: build_title_features
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.titles.get_titles.rst:
--------------------------------------------------------------------------------
1 | get_titles
2 | ==========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.titles
5 | 
6 | .. autofunction:: get_titles
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.utils.build_document_distribution.rst:
--------------------------------------------------------------------------------
1 | build_document_distribution
2 | ===========================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.utils
5 | 
6 | .. autofunction:: build_document_distribution
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.segments.utils.build_document_line_distribution.rst:
--------------------------------------------------------------------------------
1 | build_document_line_distribution
2 | ================================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.segments.utils
5 | 
6 | .. autofunction:: build_document_line_distribution
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.BIGRAM_COLLOCATIONS.rst:
--------------------------------------------------------------------------------
1 | BIGRAM_COLLOCATIONS
2 | ===================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autodata:: BIGRAM_COLLOCATIONS
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.COLLOCATION_SIZE.rst:
--------------------------------------------------------------------------------
1 | COLLOCATION_SIZE
2 | ================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autodata:: COLLOCATION_SIZE
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.DEFAULT_LEMMATIZER.rst:
--------------------------------------------------------------------------------
1 | DEFAULT_LEMMATIZER
2 | ==================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autodata:: DEFAULT_LEMMATIZER
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.DEFAULT_STEMMER.rst:
--------------------------------------------------------------------------------
1 | DEFAULT_STEMMER
2 | ===============
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autodata:: DEFAULT_STEMMER
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.MODULE_PATH.rst:
--------------------------------------------------------------------------------
1 | MODULE_PATH
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autodata:: MODULE_PATH
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.STOPWORDS.rst:
--------------------------------------------------------------------------------
1 | STOPWORDS
2 | =========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autodata:: STOPWORDS
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.TRIGRAM_COLLOCATIONS.rst:
--------------------------------------------------------------------------------
1 | TRIGRAM_COLLOCATIONS
2 | ====================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autodata:: TRIGRAM_COLLOCATIONS
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_adjectives.rst:
--------------------------------------------------------------------------------
1 | get_adjectives
2 | ==============
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_adjectives
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_adverbs.rst:
--------------------------------------------------------------------------------
1 | get_adverbs
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_adverbs
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_lemma_list.rst:
--------------------------------------------------------------------------------
1 | get_lemma_list
2 | ==============
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_lemma_list
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_lemmas.rst:
--------------------------------------------------------------------------------
1 | get_lemmas
2 | ==========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_lemmas
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_nouns.rst:
--------------------------------------------------------------------------------
1 | get_nouns
2 | =========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_nouns
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_stem_list.rst:
--------------------------------------------------------------------------------
1 | get_stem_list
2 | =============
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_stem_list
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_stems.rst:
--------------------------------------------------------------------------------
1 | get_stems
2 | =========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_stems
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_token_list.rst:
--------------------------------------------------------------------------------
1 | get_token_list
2 | ==============
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_token_list
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_tokens.rst:
--------------------------------------------------------------------------------
1 | get_tokens
2 | ==========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_tokens
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_verbs.rst:
--------------------------------------------------------------------------------
1 | get_verbs
2 | =========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_verbs
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.tokens.get_wordnet_pos.rst:
--------------------------------------------------------------------------------
1 | get_wordnet_pos
2 | ===============
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.tokens
5 | 
6 | .. autofunction:: get_wordnet_pos
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.transforms.characters.MODULE_PATH.rst:
--------------------------------------------------------------------------------
1 | MODULE_PATH
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.transforms.characters
5 | 
6 | .. autodata:: MODULE_PATH
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.transforms.characters.get_character_distribution.rst:
--------------------------------------------------------------------------------
1 | get_character_distribution
2 | ==========================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.transforms.characters
5 | 
6 | .. autofunction:: get_character_distribution
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.transforms.characters.get_character_ngram_distribution.rst:
--------------------------------------------------------------------------------
1 | get_character_ngram_distribution
2 | ================================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.transforms.characters
5 | 
6 | .. autofunction:: get_character_ngram_distribution
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.transforms.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.nlp.en.transforms package
 2 | ================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.nlp.en.transforms.characters module
 8 | ------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.nlp.en.transforms.characters
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | lexnlp.nlp.en.transforms.tokens module
16 | --------------------------------------
17 | 
18 | .. automodule:: lexnlp.nlp.en.transforms.tokens
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: lexnlp.nlp.en.transforms
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.MODULE_PATH.rst:
--------------------------------------------------------------------------------
1 | MODULE_PATH
2 | ===========
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens
5 | 
6 | .. autodata:: MODULE_PATH
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.get_bigram_distribution.rst:
--------------------------------------------------------------------------------
1 | get_bigram_distribution
2 | =======================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens
5 | 
6 | .. autofunction:: get_bigram_distribution
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.get_ngram_distribution.rst:
--------------------------------------------------------------------------------
1 | get_ngram_distribution
2 | ======================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens
5 | 
6 | .. autofunction:: get_ngram_distribution
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.get_skipgram_distribution.rst:
--------------------------------------------------------------------------------
1 | get_skipgram_distribution
2 | =========================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens
5 | 
6 | .. autofunction:: get_skipgram_distribution
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.get_token_distribution.rst:
--------------------------------------------------------------------------------
1 | get_token_distribution
2 | ======================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens
5 | 
6 | .. autofunction:: get_token_distribution
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.get_trigram_distribution.rst:
--------------------------------------------------------------------------------
1 | get_trigram_distribution
2 | ========================
3 | 
4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens
5 | 
6 | .. autofunction:: get_trigram_distribution
7 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.nlp.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.nlp package
 2 | ==================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.nlp.en
10 | 
11 | Module contents
12 | ---------------
13 | 
14 | .. automodule:: lexnlp.nlp
15 |    :members:
16 |    :undoc-members:
17 |    :show-inheritance:
18 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.rst:
--------------------------------------------------------------------------------
 1 | lexnlp package
 2 | ==============
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.config
10 |    lexnlp.extract
11 |    lexnlp.nlp
12 |    lexnlp.tests
13 |    lexnlp.utils
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: lexnlp
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.utils.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.utils package
 2 | ====================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.utils.lines_processing
10 |    lexnlp.utils.tests
11 |    lexnlp.utils.unicode
12 | 
13 | Submodules
14 | ----------
15 | 
16 | lexnlp.utils.decorators module
17 | ------------------------------
18 | 
19 | .. automodule:: lexnlp.utils.decorators
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 
24 | lexnlp.utils.iterating\_helpers module
25 | --------------------------------------
26 | 
27 | .. automodule:: lexnlp.utils.iterating_helpers
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 
32 | lexnlp.utils.map module
33 | -----------------------
34 | 
35 | .. automodule:: lexnlp.utils.map
36 |    :members:
37 |    :undoc-members:
38 |    :show-inheritance:
39 | 
40 | lexnlp.utils.parse\_df module
41 | -----------------------------
42 | 
43 | .. automodule:: lexnlp.utils.parse_df
44 |    :members:
45 |    :undoc-members:
46 |    :show-inheritance:
47 | 
48 | 
49 | Module contents
50 | ---------------
51 | 
52 | .. automodule:: lexnlp.utils
53 |    :members:
54 |    :undoc-members:
55 |    :show-inheritance:
56 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.utils.unicode.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.utils.unicode package
 2 | ============================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlp.utils.unicode.tests
10 | 
11 | Submodules
12 | ----------
13 | 
14 | lexnlp.utils.unicode.unicode\_lookup module
15 | -------------------------------------------
16 | 
17 | .. automodule:: lexnlp.utils.unicode.unicode_lookup
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 | 
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: lexnlp.utils.unicode
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlp.utils.unicode.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlp.utils.unicode.tests package
 2 | ==================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlp.utils.unicode.tests.test\_unicode\_lookup module
 8 | -------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlp.utils.unicode.tests.test_unicode_lookup
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: lexnlp.utils.unicode.tests
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlpprivate.extract.en.addresses.rst:
--------------------------------------------------------------------------------
 1 | lexnlpprivate.extract.en.addresses package
 2 | ==========================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlpprivate.extract.en.addresses.tests
10 | 
11 | Submodules
12 | ----------
13 | 
14 | lexnlpprivate.extract.en.addresses.addresses\_train module
15 | ----------------------------------------------------------
16 | 
17 | .. automodule:: lexnlpprivate.extract.en.addresses.addresses_train
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 | 
22 | lexnlpprivate.extract.en.addresses.convert\_geonames\_cities\_to\_word\_set module
23 | ----------------------------------------------------------------------------------
24 | 
25 | .. automodule:: lexnlpprivate.extract.en.addresses.convert_geonames_cities_to_word_set
26 |    :members:
27 |    :undoc-members:
28 |    :show-inheritance:
29 | 
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: lexnlpprivate.extract.en.addresses
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlpprivate.extract.en.addresses.tests.rst:
--------------------------------------------------------------------------------
 1 | lexnlpprivate.extract.en.addresses.tests package
 2 | ================================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | lexnlpprivate.extract.en.addresses.tests.test\_addresses\_train module
 8 | ----------------------------------------------------------------------
 9 | 
10 | .. automodule:: lexnlpprivate.extract.en.addresses.tests.test_addresses_train
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: lexnlpprivate.extract.en.addresses.tests
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlpprivate.extract.en.rst:
--------------------------------------------------------------------------------
 1 | lexnlpprivate.extract.en package
 2 | ================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlpprivate.extract.en.addresses
10 | 
11 | Module contents
12 | ---------------
13 | 
14 | .. automodule:: lexnlpprivate.extract.en
15 |    :members:
16 |    :undoc-members:
17 |    :show-inheritance:
18 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlpprivate.extract.rst:
--------------------------------------------------------------------------------
 1 | lexnlpprivate.extract package
 2 | =============================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlpprivate.extract.en
10 | 
11 | Module contents
12 | ---------------
13 | 
14 | .. automodule:: lexnlpprivate.extract
15 |    :members:
16 |    :undoc-members:
17 |    :show-inheritance:
18 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/lexnlpprivate.rst:
--------------------------------------------------------------------------------
 1 | lexnlpprivate package
 2 | =====================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    lexnlpprivate.extract
10 | 
11 | Module contents
12 | ---------------
13 | 
14 | .. automodule:: lexnlpprivate
15 |    :members:
16 |    :undoc-members:
17 |    :show-inheritance:
18 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/modules.rst:
--------------------------------------------------------------------------------
 1 | lexpredict-contraxsuite-core
 2 | ============================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 4
 6 | 
 7 |    lexnlp
 8 |    lexnlpprivate
 9 |    setup
10 | 


--------------------------------------------------------------------------------
/documentation/docs/source/api/setup.rst:
--------------------------------------------------------------------------------
1 | setup module
2 | ============
3 | 
4 | .. automodule:: setup
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/documentation/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to the LexNLP documentation!
 2 | ==================================
 3 | 
 4 | .. image:: https://s3.amazonaws.com/lexpredict.com-marketing/graphics/lexpredict_lexnlp_logo_horizontal_1.png
 5 |    :width: 200px
 6 |    :alt: LexNLP
 7 |    :align: center
 8 | 
 9 | 
10 | |
11 | 
12 | 
13 | Table of Contents
14 | ------------
15 | .. toctree::
16 |     :maxdepth: 4
17 | 
18 |     about
19 |     lexnlp
20 |     changes
21 |     license
22 | 
23 | 
24 | Indices and tables
25 | ==================
26 | * :ref:`genindex`
27 | * :ref:`modindex`
28 | * :ref:`search`
29 | 


--------------------------------------------------------------------------------
/documentation/docs/source/lexnlp.rst:
--------------------------------------------------------------------------------
 1 | LexNLP package
 2 | ==============
 3 | 
 4 | 
 5 | .. image:: https://s3.amazonaws.com/lexpredict.com-marketing/graphics/lexpredict_lexnlp_logo_horizontal_1.png
 6 |    :width: 200px
 7 |    :alt: LexNLP
 8 |    :align: left
 9 | 
10 | 
11 | .. toctree::
12 |     :maxdepth: 4
13 |     :caption: Contents:
14 | 
15 |     modules/extract/extract
16 |     modules/nlp/nlp
17 | 
18 | 


--------------------------------------------------------------------------------
/documentation/docs/source/license.rst:
--------------------------------------------------------------------------------
 1 | .. _license:
 2 | 
 3 | ============
 4 | License
 5 | ============
 6 | 
 7 | AGPL License
 8 | ----------------
 9 | LexNLP is available by default under the terms of the GNU Affero General Public License v3.0.
10 | https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE
11 | 
12 | 
13 | License Release
14 | ----------------
15 | If you would like to request a release from the terms of the default AGPLv3 license, please contact us at:
16 | ContraxSuite Licensing <license@contraxsuite.com>.
17 | 
18 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/extract/de/dates.rst:
--------------------------------------------------------------------------------
 1 | .. _extract_de_dates:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.extract.de.dates`: Extracting date references
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.extract.de.dates` module contains methods that allow for the extraction
 8 | of dates from text.  Sample formats that are handled by this module include:
 9 | 
10 |  * vom 29. März 2017
11 |  * 16.5.2002
12 | 
13 | The full list of current unit test cases can be found here:
14 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/lexnlp/extract/common/tests/test_dates
15 | 
16 | 
17 | .. currentmodule:: lexnlp.extract.de.dates
18 | 
19 | 
20 | Extracting dates
21 | ----------------
22 | .. autofunction:: get_date_list
23 | 
24 | Example ::
25 | 
26 |     >>> import lexnlp.extract.de.dates
27 |     >>> text = " Artikel 39 des Gesetzes vom 29. März 2017 (BGBl. I S. 626) geändert worden ist"
28 |     >>> print((lexnlp.extract.de.dates.get_date_list(text))
29 |     [{'location_start': 29,
30 |      'location_end': 42,
31 |      'value': datetime.datetime(2017, 3, 29, 0, 0),
32 |      'source': '29. März 2017'}]
33 | 
34 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/extract/en/copyright.rst:
--------------------------------------------------------------------------------
 1 | .. _extract_en_copyright:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.extract.en.copyright`: Extracting copyright references
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.extract.en.copyright` module contains methods that allow for the extraction
 8 | of copyright references from text.
 9 | 
10 | 
11 | The full list of current unit test cases can be found here:
12 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_copyright
13 | 
14 | 
15 | .. currentmodule:: lexnlp.extract.en.copyright
16 | 
17 | 
18 | Extracting copyrights
19 | ----------------
20 | .. autofunction:: get_copyright
21 | 
22 | Example ::
23 | 
24 |     >>> import lexnlp.extract.en.copyright
25 |     >>> text = "(C) Copyright 1993-1996 Hughes Information Systems Company"
26 |     >>> print(list(lexnlp.extract.en.copyright.get_copyright(text)))
27 |     [('Copyright', '1993-1996', 'Hughes Information Systems Company')]
28 | 
29 |     >>> text = "Test copyrigh symbol © 2017, SIGN LLC"
30 |     >>> print(list(lexnlp.extract.en.conditions.get_conditions(text)))
31 |     print(list(lexnlp.extract.en.copyright.get_copyright(text)))
32 |     [('©', '2017', 'SIGN LLC')]
33 | 
34 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/extract/en/percents.rst:
--------------------------------------------------------------------------------
 1 | .. _extract_en_percents:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.extract.en.percents`: Extracting percents and rates
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.extract.en.percents` module contains methods that allow for the extraction
 8 | of percent and rate statements from text.  Example statements that are covered by default in this module include:
 9 | 
10 |  * one percent
11 |  * 1%
12 |  * 50 bps
13 |  * fifty basis points
14 | 
15 | The full list of current unit test cases can be found here:
16 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_percents
17 | 
18 | 
19 | .. currentmodule:: lexnlp.extract.en.percents
20 | 
21 | 
22 | Extracting conditions
23 | ----------------
24 | .. autofunction:: get_percents
25 | 
26 | Example ::
27 | 
28 |     >>> import lexnlp.extract.en.percents
29 |     >>> text = "At a discount of 1%"
30 |     >>> print(list(lexnlp.extract.en.percents.get_percents(text)))
31 |     [('%', 1.0, 0.01)]
32 |     >>> text = "At a discount of 10 basis points"
33 |     >>> print(list(lexnlp.extract.en.percents.get_percents(text)))
34 |     [('basis points', 10.0, 0.001)]
35 | 
36 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/extract/en/pii.rst:
--------------------------------------------------------------------------------
 1 | .. _extract_en_pii:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.extract.en.pii`: Extracting personally-identifiable information (PII)
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.extract.en.pii` module contains methods that allow for the extraction
 8 | of personally identifying information from text.  Examples include:
 9 | 
10 |  * phone numbers
11 |  * US social security numbers
12 |  * names
13 | 
14 | The full list of current unit test cases can be found here:
15 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_pii
16 | 
17 | 
18 | .. currentmodule:: lexnlp.extract.en.pii
19 | 
20 | 
21 | Extracting PII
22 | ----------------
23 | .. autofunction:: get_pii
24 | 
25 | Example ::
26 | 
27 |     >>> import lexnlp.extract.en.pii
28 |     >>> text = "John Doe (999-12-3456)"
29 |     >>> print(list(lexnlp.extract.en.pii.get_pii(text)))
30 |     [('ssn', '999-12-3456')]
31 |     >>> text = "Mary Doe (212-123-4567)"
32 |     >>> print(list(lexnlp.extract.en.pii.get_pii(text)))
33 |     [('us_phone', '(212) 123-4567')]
34 | 
35 | 
36 | .. autofunction:: get_ssns
37 | 
38 | .. autofunction:: get_us_phones
39 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/extract/en/ratios.rst:
--------------------------------------------------------------------------------
 1 | .. _extract_en_ratios:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.extract.en.ratios`: Extracting ratios
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.extract.en.ratios` module contains methods that allow for the extraction
 8 | of ratio statements from text.  Example statements include:
 9 | 
10 |  * 3:1
11 |  * 3.0:1.0
12 |  * three to one
13 | 
14 | 
15 | The full list of current unit test cases can be found here:
16 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_ratios
17 | 
18 | 
19 | .. currentmodule:: lexnlp.extract.en.ratios
20 | 
21 | 
22 | Extracting conditions
23 | ----------------
24 | .. autofunction:: get_ratios
25 | 
26 | Example ::
27 | 
28 |     >>> import lexnlp.extract.en.ratios
29 |     >>> text = "At a leverage ratio of no more than ten to one."
30 |     >>> print(list(lexnlp.extract.en.ratios.get_ratios(text)))
31 |     [(10, 1, 10.0)]
32 |     >>> text = "At a leverage ratio of no more than 2.5:1."
33 |     >>> print(list(lexnlp.extract.en.ratios.get_ratios(text)))
34 |     [(2.5, 1.0, 2.5)]
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/extract/en/trademarks.rst:
--------------------------------------------------------------------------------
 1 | .. _extract_en_trademarks:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.extract.en.trademarks`: Extracting trademark references
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.extract.en.trademarks` module contains methods that allow for the extraction
 8 | of trademarks references from text.  Examples include:
 9 | 
10 |  * Widget™
11 |  * Widget(TM)
12 |  * Widget®
13 |  * Widget(R)
14 | 
15 | The full list of current unit test cases can be found here:
16 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_trademarks
17 | 
18 | 
19 | .. currentmodule:: lexnlp.extract.en.trademarks
20 | 
21 | 
22 | Extracting conditions
23 | ----------------
24 | .. autofunction:: get_trademarks
25 | 
26 | Example ::
27 | 
28 |     >>> import lexnlp.extract.en.trademarks
29 |     >>> text = "Customer agrees to license HAL(TM)"
30 |     >>> print(list(lexnlp.extract.en.trademarks.get_trademarks(text)))
31 |     ['HAL (TM)']
32 |     >>> text = "Customer agrees to purchase a minimum quantity of 1000 Widget® units"
33 |     >>> print(list(lexnlp.extract.en.trademarks.get_trademarks(text)))
34 |     ['Widget®']
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/extract/en/urls.rst:
--------------------------------------------------------------------------------
 1 | .. _extract_en_urls:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.extract.en.url`: Extracting URLs
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.extract.en.urls` module contains methods that allow for the extraction
 8 | of URLs from text.
 9 | 
10 | The full list of current unit test cases can be found here:
11 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_urls
12 | 
13 | .. currentmodule:: lexnlp.extract.en.urls
14 | 
15 | Extracting constraints
16 | ----------------
17 | .. autofunction:: get_urls
18 | 
19 | Example ::
20 | 
21 |     >>> import lexnlp.extract.en.urls
22 |     >>> text = "A copy of the terms can be found at www.acme.com/terms"
23 |     >>> print(list(lexnlp.extract.en.urls.get_urls(text)))
24 |     ['www.acme.com/terms']
25 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/nlp/en/segments_pages.rst:
--------------------------------------------------------------------------------
 1 | .. _nlp_en_segments_pages:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.nlp.en.segments.pages`: Segmenting pages in text
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.nlp.en.segments.pages` module contains methods for segmenting text
 8 | into zero or more pages.
 9 | 
10 | 
11 | .. attention::
12 |     The sections below are a work in progress.  Thank you for your patience
13 |     while we continue to expand and improve our documentation coverage.
14 | 
15 |     If you have any questions in the meantime, please feel free to log issues on
16 |     GitHub at the URL below or contact us at the email below:
17 | 
18 |     - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp
19 |     - Email: support@contraxsuite.com
20 | 
21 | 
22 | .. automodapi:: lexnlp.nlp.en.segments.pages
23 |     :include-all-objects:
24 |     :members:
25 | 
26 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/nlp/en/segments_paragraphs.rst:
--------------------------------------------------------------------------------
 1 | .. _nlp_en_segments_paragraphs:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.nlp.en.segments.pages`: Segmenting paragraphs in text
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.nlp.en.segments.paragraphs` module contains methods for segmenting text
 8 | into zero or more paragraphs.
 9 | 
10 | .. attention::
11 |     The sections below are a work in progress.  Thank you for your patience
12 |     while we continue to expand and improve our documentation coverage.
13 | 
14 |     If you have any questions in the meantime, please feel free to log issues on
15 |     GitHub at the URL below or contact us at the email below:
16 | 
17 |     - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp
18 |     - Email: support@contraxsuite.com
19 | 
20 | .. automodapi:: lexnlp.nlp.en.segments.paragraphs
21 |     :include-all-objects:
22 |     :members:
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/nlp/en/segments_sections.rst:
--------------------------------------------------------------------------------
 1 | .. _nlp_en_segments_sections:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.nlp.en.segments.sections`: Segmenting sections in text
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.nlp.en.segments.sections` module contains methods for segmenting text
 8 | into zero or more sections.
 9 | 
10 | .. attention::
11 |     The sections below are a work in progress.  Thank you for your patience
12 |     while we continue to expand and improve our documentation coverage.
13 | 
14 |     If you have any questions in the meantime, please feel free to log issues on
15 |     GitHub at the URL below or contact us at the email below:
16 | 
17 |     - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp
18 |     - Email: support@contraxsuite.com
19 | 
20 | .. automodapi:: lexnlp.nlp.en.segments.sections
21 |     :include-all-objects:
22 |     :members:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/nlp/en/segments_sentences.rst:
--------------------------------------------------------------------------------
 1 | .. _nlp_en_segments_sentences:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.nlp.en.segments.sections`: Segmenting sentences in text
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.nlp.en.segments.sentences` module contains methods for segmenting text
 8 | into zero or more sentences.
 9 | 
10 | .. attention::
11 |     The sections below are a work in progress.  Thank you for your patience
12 |     while we continue to expand and improve our documentation coverage.
13 | 
14 |     If you have any questions in the meantime, please feel free to log issues on
15 |     GitHub at the URL below or contact us at the email below:
16 | 
17 |     - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp
18 |     - Email: support@contraxsuite.com
19 | 
20 | .. automodapi:: lexnlp.nlp.en.segments.sentences
21 |     :include-all-objects:
22 |     :members:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/nlp/en/segments_titles.rst:
--------------------------------------------------------------------------------
 1 | .. _nlp_en_segments_titles:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.nlp.en.segments.titles`: Segmenting and identifying titles in text
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.nlp.en.segments.titles` module contains methods for identifying titles and
 8 | segmenting text between zero or more titles.
 9 | 
10 | .. attention::
11 |     The sections below are a work in progress.  Thank you for your patience
12 |     while we continue to expand and improve our documentation coverage.
13 | 
14 |     If you have any questions in the meantime, please feel free to log issues on
15 |     GitHub at the URL below or contact us at the email below:
16 | 
17 |     - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp
18 |     - Email: support@contraxsuite.com
19 | 
20 | .. automodapi:: lexnlp.nlp.en.segments.titles
21 |     :include-all-objects:
22 |     :members:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/nlp/en/segments_utils.rst:
--------------------------------------------------------------------------------
 1 | .. _nlp_en_segments_utils:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.nlp.en.segments.utils`: Utilities for segmenting
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.nlp.en.segments.utils` module contains utility methods for
 8 | segmenting text.
 9 | 
10 | .. attention::
11 |     The sections below are a work in progress.  Thank you for your patience
12 |     while we continue to expand and improve our documentation coverage.
13 | 
14 |     If you have any questions in the meantime, please feel free to log issues on
15 |     GitHub at the URL below or contact us at the email below:
16 | 
17 |     - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp
18 |     - Email: support@contraxsuite.com
19 | 
20 | .. automodapi:: lexnlp.nlp.en.segments.utils
21 |     :include-all-objects:
22 |     :members:
23 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/nlp/en/transforms_character.rst:
--------------------------------------------------------------------------------
 1 | .. _nlp_en_transforms_characters:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.nlp.en.transforms.characters`: Transforming text into character-oriented features
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.nlp.en.transforms.characters` module contains methods
 8 | that transform text into character distributions or related feature vectors.
 9 | 
10 | .. attention::
11 |     The sections below are a work in progress.  Thank you for your patience
12 |     while we continue to expand and improve our documentation coverage.
13 | 
14 |     If you have any questions in the meantime, please feel free to log issues on
15 |     GitHub at the URL below or contact us at the email below:
16 | 
17 |     - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp
18 |     - Email: support@contraxsuite.com
19 | 
20 | .. automodapi:: lexnlp.nlp.en.transforms.characters
21 |     :include-all-objects:
22 |     :members:
23 | 
24 | 


--------------------------------------------------------------------------------
/documentation/docs/source/modules/nlp/en/transforms_tokens.rst:
--------------------------------------------------------------------------------
 1 | .. _nlp_en_transforms_tokens:
 2 | 
 3 | ============
 4 | :mod:`lexnlp.nlp.en.transforms.tokens`: Transforming text into token-oriented features
 5 | ============
 6 | 
 7 | The :mod:`lexnlp.nlp.en.transforms.tokens` module contains methods
 8 | that transform text into token distributions or related feature vectors.
 9 | 
10 | .. attention::
11 |     The sections below are a work in progress.  Thank you for your patience
12 |     while we continue to expand and improve our documentation coverage.
13 | 
14 |     If you have any questions in the meantime, please feel free to log issues on
15 |     GitHub at the URL below or contact us at the email below:
16 | 
17 |     - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp
18 |     - Email: support@contraxsuite.com
19 | 
20 | .. automodapi:: lexnlp.nlp.en.transforms.tokens
21 |     :include-all-objects:
22 |     :members:
23 | 
24 | 


--------------------------------------------------------------------------------
/lexnlp/config/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/config/en/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/config/en/geoentities_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Geo Entities extraction configuration.
 3 | """
 4 | 
 5 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 6 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 7 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 8 | __version__ = "2.3.0"
 9 | __maintainer__ = "LexPredict, LLC"
10 | __email__ = "support@contraxsuite.com"
11 | 
12 | 
13 | # Minimal length of geo entity aliases to search for.
14 | # Allows avoiding false-positives on first and last names abbreviations (A.M. Best) e.t.c.
15 | 
16 | 
17 | MIN_ALIAS_LEN = 2
18 | 
19 | # List of aliases to exclude from search: [(alias:str, language:str, is_abbrev:bool), ...]
20 | ALIAS_BLACK_LIST = []
21 | 


--------------------------------------------------------------------------------
/lexnlp/config/es/es_regulations.csv:
--------------------------------------------------------------------------------
 1 | trigger,position
 2 | junta de,start
 3 | Administración,start
 4 | Apartado \p{Lu} de,start
 5 | Auditoría Superior de,start
 6 | Comisión,start
 7 | Comisiones,start
 8 | Comité de,start
 9 | Congreso de,start
10 | Cuenta de,start
11 | Ejecutivo,start
12 | Código Fiscal,start
13 | Gobierno,start
14 | Hacienda Pública,start
15 | INSTITUCIONES DE,start
16 | Instituto para,start
17 | ley de,start
18 | Nacional Financiera,start
19 | Plan Nacional de,start
20 | Programa Nacional de,start
21 | Registro Nacional,start
22 | Reglamento,start
23 | Secretaría de,start
24 | Secretarío,start
25 | Finanzas Públicas,start
26 | Subsecretario de,start
27 | Tesorería de,start


--------------------------------------------------------------------------------
/lexnlp/extract/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/all_locales/__init__.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | # -*- coding: utf-8 -*-
10 | 


--------------------------------------------------------------------------------
/lexnlp/extract/all_locales/citations.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 4 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 5 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 6 | __version__ = "2.3.0"
 7 | __maintainer__ = "LexPredict, LLC"
 8 | __email__ = "support@contraxsuite.com"
 9 | 
10 | 
11 | from typing import Generator
12 | 
13 | from lexnlp.extract.all_locales.languages import LANG_EN, LANG_DE, DEFAULT_LANGUAGE, Locale
14 | from lexnlp.extract.common.annotations.citation_annotation import CitationAnnotation
15 | from lexnlp.extract.en.citations import get_citation_annotations as get_citation_annotations_en
16 | from lexnlp.extract.de.citations import get_citation_annotations as get_citation_annotations_de
17 | 
18 | 
19 | ROUTINE_BY_LOCALE = {
20 |     LANG_EN.code: get_citation_annotations_en,
21 |     LANG_DE.code: get_citation_annotations_de
22 | }
23 | 
24 | 
25 | def get_citation_annotations(
26 |         locale: str,
27 |         text: str) -> Generator[CitationAnnotation, None, None]:
28 |     routine = ROUTINE_BY_LOCALE.get(Locale(locale).language, ROUTINE_BY_LOCALE[DEFAULT_LANGUAGE.code])
29 |     yield from routine(text)
30 | 


--------------------------------------------------------------------------------
/lexnlp/extract/all_locales/court_citations.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 4 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 5 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 6 | __version__ = "2.3.0"
 7 | __maintainer__ = "LexPredict, LLC"
 8 | __email__ = "support@contraxsuite.com"
 9 | 
10 | 
11 | from typing import Generator
12 | 
13 | from lexnlp.extract.all_locales.languages import LANG_DE, Locale
14 | from lexnlp.extract.common.annotations.court_citation_annotation import CourtCitationAnnotation
15 | from lexnlp.extract.de.court_citations import get_court_citation_annotations as get_court_citation_annotations_de
16 | 
17 | 
18 | ROUTINE_BY_LOCALE = {
19 |     LANG_DE.code: get_court_citation_annotations_de
20 | }
21 | 
22 | 
23 | def get_court_citation_annotations(locale: str, text: str, language: str = None) -> \
24 |         Generator[CourtCitationAnnotation, None, None]:
25 |     routine = ROUTINE_BY_LOCALE.get(Locale(locale).language, ROUTINE_BY_LOCALE[LANG_DE.code])
26 |     yield from routine(text, language)
27 | 


--------------------------------------------------------------------------------
/lexnlp/extract/all_locales/money.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 4 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 5 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 6 | __version__ = "2.3.0"
 7 | __maintainer__ = "LexPredict, LLC"
 8 | __email__ = "support@contraxsuite.com"
 9 | 
10 | 
11 | from typing import Generator
12 | 
13 | from lexnlp.extract.all_locales.languages import LANG_EN, LANG_DE, DEFAULT_LANGUAGE, Locale
14 | from lexnlp.extract.common.annotations.money_annotation import MoneyAnnotation
15 | from lexnlp.extract.en.money import get_money_annotations as get_money_annotations_en
16 | from lexnlp.extract.de.money import get_money_annotations as get_money_annotations_de
17 | 
18 | 
19 | ROUTINE_BY_LOCALE = {
20 |     LANG_EN.code: get_money_annotations_en,
21 |     LANG_DE.code: get_money_annotations_de
22 | }
23 | 
24 | 
25 | def get_money_annotations(
26 |     locale: str,
27 |     text: str,
28 |     float_digits: int = 4,
29 | ) -> Generator[MoneyAnnotation, None, None]:
30 |     routine = ROUTINE_BY_LOCALE.get(Locale(locale).language, ROUTINE_BY_LOCALE[DEFAULT_LANGUAGE.code])
31 |     yield from routine(text, float_digits)
32 | 


--------------------------------------------------------------------------------
/lexnlp/extract/all_locales/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/all_locales/tests/test_locales.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: UTF-8 -*-
 3 | 
 4 | """
 5 | Languages unit tests.
 6 | """
 7 | 
 8 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 9 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
10 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
11 | __version__ = "2.3.0"
12 | __maintainer__ = "LexPredict, LLC"
13 | __email__ = "support@contraxsuite.com"
14 | 
15 | 
16 | from unittest import TestCase
17 | 
18 | from lexnlp.extract.all_locales.languages import Locale
19 | 
20 | 
21 | class TestLocales(TestCase):
22 | 
23 |     def test_locales_convert(self):
24 |         data = [
25 |             {'input': 'en', 'output_locale_code': 'EN'},
26 |             {'input': 'en-US', 'output_locale_code': 'US'},
27 |             {'input': 'en/Gb', 'output_locale_code': 'GB'},
28 |             {'input': 'En_us', 'output_locale_code': 'US'},
29 |         ]
30 |         output_language_code = 'en'
31 |         for item in data:
32 |             locale_obj = Locale(item['input'])
33 |             self.assertEqual(locale_obj.language, output_language_code)
34 |             self.assertEqual(locale_obj.locale_code, item['output_locale_code'])
35 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/annotation_locator_type.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | from enum import Enum
10 | 
11 | 
12 | class AnnotationLocatorType(Enum):
13 |     RegexpBased = 1
14 |     MlWordVectorBased = 2
15 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/annotation_type.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | from enum import Enum
10 | 
11 | 
12 | class AnnotationType(Enum):
13 |     act = 1
14 |     amount = 2
15 |     citation = 3
16 |     condition = 4
17 |     constraint = 5
18 |     copyright = 6
19 |     court = 7
20 |     court_citation = 8
21 |     cusip = 9
22 |     date = 10
23 |     definition = 11
24 |     distance = 12
25 |     duration = 13
26 |     geoentity = 14
27 |     money = 15
28 |     percent = 16
29 |     pii = 17
30 |     phone = 18
31 |     ssn = 19
32 |     ratio = 20
33 |     regulation = 21
34 |     trademark = 22
35 |     url = 23
36 |     laws = 24
37 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/annotations/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/base_path.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | import os
10 | 
11 | 
12 | lexnlp_base_path = os.path.abspath(os.path.dirname(__file__) + '/../../../')
13 | 
14 | lexnlp_test_path = os.path.join(lexnlp_base_path, 'test_data/')
15 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/copyrights/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/date_parsing/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/definitions/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/definitions/definition_match.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | class DefinitionMatch:
10 |     """
11 |     used inside EsDefinitionsParser and SpanishParsingMethods
12 |     to store intermediate parsing results
13 |     """
14 |     def __init__(self):
15 |         self.name = None    # type: str
16 |         self.start = 0
17 |         self.end = 0
18 |         self.probability = 0
19 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/durations/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/entities/__init__.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | # -*- coding: utf-8 -*-
10 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/ocr_rating/__init__.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | # -*- coding: utf-8 -*-
10 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/ocr_rating/reference_vectors/de.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/common/ocr_rating/reference_vectors/de.pickle


--------------------------------------------------------------------------------
/lexnlp/extract/common/ocr_rating/reference_vectors/en.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/common/ocr_rating/reference_vectors/en.pickle


--------------------------------------------------------------------------------
/lexnlp/extract/common/pattern_found.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | class PatternFound:
10 |     """
11 |     used inside EsDefinitionsParser and SpanishParsingMethods
12 |     to store intermediate parsing results
13 |     """
14 |     def __init__(self):
15 |         self.name = None    # type: str
16 |         self.start = 0
17 |         self.end = 0
18 |         self.probability = 0
19 | 
20 |     # pylint: disable=unused-argument
21 |     def pattern_worse_than_target(self, p, text: str) -> bool:  # p: PatternFound
22 |         """
23 |         check what pattern is better then 2 patterns are considered duplicated
24 |         "text" may be used in derived classes
25 |         """
26 |         spans = self.start <= p.start <= self.end and \
27 |                 self.start <= p.end <= self.end
28 |         if not spans:
29 |             return False
30 |         return self.name.find(p.name) >= 0
31 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/special_characters.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | class SpecialCharacters:
10 |     punctuation = {'.', ',', ':', '-', ';', ')', '(', ']', '{', '}'
11 |                    '[', '*', '/', '\\', '"', '\'', '!', '?', '%',
12 |                    '$', '^', '&', '@'}
13 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/common/tests/definitions_text_annotator.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | from typing import List
10 | from lexnlp.extract.common.annotations.definition_annotation import DefinitionAnnotation
11 | from lexnlp.tests.utility_for_testing import save_test_document, annotate_text
12 | 
13 | 
14 | def annotate_definitions_text(text: str,
15 |                               definitions: List[DefinitionAnnotation],
16 |                               save_path: str) -> None:
17 |     markup = annotate_text(text, definitions)
18 |     save_test_document(save_path, markup)
19 | 


--------------------------------------------------------------------------------
/lexnlp/extract/de/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/de/date_model.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/de/date_model.pickle


--------------------------------------------------------------------------------
/lexnlp/extract/de/date_model.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | import string
10 | 
11 | 
12 | DE_UNICODE_ALPHAS = 'äöüẞ'
13 | DE_ALPHA_CHAR_SET = set(string.ascii_letters + DE_UNICODE_ALPHAS + DE_UNICODE_ALPHAS.upper())
14 | 
15 | DE_ALPHABET = DE_UNICODE_ALPHAS + DE_UNICODE_ALPHAS.upper()
16 | DATE_MODEL_CHARS = []
17 | DATE_MODEL_CHARS.extend(DE_ALPHABET + string.ascii_letters)
18 | DATE_MODEL_CHARS.extend(string.digits)
19 | DATE_MODEL_CHARS.extend(['-', '/', ' ', '%', '#', '$', '.', ','])
20 | MONTH_NAMES = ['Januar', 'Februar', 'März', 'April', 'Mai', 'Juni',
21 |                'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember']
22 | 


--------------------------------------------------------------------------------
/lexnlp/extract/de/language_tokens.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | import os
10 | 
11 | from lexnlp.extract.common.language_dictionary_reader import LanguageDictionaryReader
12 | 
13 | 
14 | class DeLanguageTokens:
15 |     abbreviations = {'nr.', 'abs.', 'no.', 'act.', 'inc.', 'p.', 'Inc.'}
16 |     articles = ['der', 'die', 'das', 'des', 'dem', 'den',
17 |                 'ein', 'eine', 'eines', 'einer', 'einem', 'einen']
18 |     conjunctions = ['und', 'oder']
19 | 
20 |     @staticmethod
21 |     def init():
22 |         abr_file_path = os.path.join(os.path.dirname(__file__),
23 |                                      'data/abbreviations.txt')
24 |         if os.path.isfile(abr_file_path):
25 |             file_set = LanguageDictionaryReader.read_str_set(abr_file_path)
26 |             DeLanguageTokens.abbreviations = \
27 |                 DeLanguageTokens.abbreviations.union(file_set)
28 | 
29 | 
30 | DeLanguageTokens.init()
31 | 


--------------------------------------------------------------------------------
/lexnlp/extract/de/model.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/de/model.pickle


--------------------------------------------------------------------------------
/lexnlp/extract/de/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/addresses/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/addresses/addresses_clf.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/en/addresses/addresses_clf.pickle


--------------------------------------------------------------------------------
/lexnlp/extract/en/addresses/data/city_name_words.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/en/addresses/data/city_name_words.pickle


--------------------------------------------------------------------------------
/lexnlp/extract/en/addresses/data/nltk_pos_tag_indexes.json:
--------------------------------------------------------------------------------
1 | {"VBD": 36, "PDT": 24, "TO": 33, "WP": 42, "SYM": 32, "NNS": 23, "EX": 12, "(": 3, "VBP": 39, ",": 5, "VBZ": 40, "MD": 19, "JJ": 15, "NNP": 21, "WRB": 44, "DT": 11, "--": 6, "RB": 28, "FW": 13, "PRP": 26, "RBR": 29, "LS": 18, "JJS": 17, "CD": 10, "JJR": 16, "IN": 14, "WP$": 43, "''": 2, "RBS": 30, "UH": 34, "``": 45, "VBG": 37, "RP": 31, "PRP$": 27, "VB": 35, "$": 1, ".": 7, ")": 4, "WDT": 41, "NNPS": 22, "NN": 20, "CC": 9, "POS": 25, ":": 8, "VBN": 38}


--------------------------------------------------------------------------------
/lexnlp/extract/en/addresses/data/street_directions.csv:
--------------------------------------------------------------------------------
 1 | "CENTRAL"
 2 | "NORTH"
 3 | "SOUTH"
 4 | "EAST"
 5 | "WEST"
 6 | "NORTH-EAST"
 7 | "NORTH-WEST"
 8 | "SOUTH-EAST"
 9 | "SOUTH-WEST"
10 | "NE"
11 | "NW"
12 | "SE"
13 | "SW"
14 | "N"
15 | "S"
16 | "E"
17 | "W"
18 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/addresses/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/contracts/README.md:
--------------------------------------------------------------------------------
 1 | # Contract Classification
 2 | 
 3 | *Date (ISO 8601): 2022-04-19*
 4 | 
 5 | ---
 6 | 
 7 | ## `Is-Contract?` Classifier
 8 | 
 9 | ### Usage
10 | 
11 | Download the default Scikit-Learn pipeline:
12 | 
13 | ```python
14 | from lexnlp.ml.catalog.download import download_github_release
15 | download_github_release('pipeline/is-contract/<version>')
16 | ```
17 | 
18 | Instantiate the classifier:
19 | 
20 | ```python
21 | 
22 | from lexnlp.extract.en.contracts.predictors import ProbabilityPredictorIsContract
23 | probability_predictor_is_contract: ProbabilityPredictorIsContract = ProbabilityPredictorIsContract()
24 | ```
25 | 
26 | Use the `ProbabilityPredictorIsContract`
27 | 
28 | ```python
29 | probability_predictor_is_contract.is_contract(
30 |     text='...',
31 |     min_probability=0.5,
32 |     return_probability=True,
33 | )
34 | ```
35 | 
36 | ### Training
37 | 
38 | Training processes can be found under `notebooks/classification/contracts/`
39 | 
40 | ---
41 | 
42 | ## Contract Type Classifier
43 | 
44 | *Not yet implemented*
45 | 
46 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/contracts/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/contracts/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/contracts/tests/test_contract_type.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | import codecs
10 | from lexnlp.extract.en.contracts.contract_type_detector import ContractTypeDetector
11 | 
12 | 
13 | def non_test_contract_type():
14 |     model_folder = ''
15 |     d2v_path = f'{model_folder}/d2v_size100_window10.json'
16 |     rf_path = f'{model_folder}/rf_size100_window10_depth64'
17 |     d = ContractTypeDetector(rf_path, d2v_path)
18 | 
19 |     with codecs.open(
20 |             '/home/andrey/Downloads/src_files/text/src_txt_files/1274055_2010-03-23_4.txt',
21 |             'r', encoding='utf-8') as fr:
22 |         doc_text = fr.read()
23 |     v = d.detect_contract_type_vector(doc_text)
24 |     print(d.detect_contract_type(v, 0.15, 99, '?'))
25 |     print(d.detect_contract_type(v, 0.15, 75, '?'))
26 |     print(d.detect_contract_type(v, 0.19, 99, '?'))
27 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/contracts/tests/test_contracts.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | from lexnlp.extract.en.contracts.predictors import ProbabilityPredictorIsContract
10 | from lexnlp.tests import lexnlp_tests
11 | 
12 | 
13 | def actual_data_converter(val):
14 |     return [str(val)]
15 | 
16 | 
17 | def test_is_contract():
18 | 
19 |     probability_predictor_is_contract: ProbabilityPredictorIsContract = \
20 |         ProbabilityPredictorIsContract(pipeline=ProbabilityPredictorIsContract.get_default_pipeline())
21 | 
22 |     lexnlp_tests.test_extraction_func_on_test_data(
23 |         probability_predictor_is_contract.is_contract,
24 |         actual_data_converter=actual_data_converter,
25 |         min_probability=0.3)
26 | 
27 | # def test_bad_cases():
28 | #    lexnlp_tests.test_extraction_func_on_test_data(get_addresses)
29 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/date_model.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/en/date_model.pickle


--------------------------------------------------------------------------------
/lexnlp/extract/en/date_model.py:
--------------------------------------------------------------------------------
 1 | """Date extraction for English.
 2 | 
 3 | This module implements date extraction functionality in English.
 4 | """
 5 | 
 6 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 7 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 8 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 9 | __version__ = "2.3.0"
10 | __maintainer__ = "LexPredict, LLC"
11 | __email__ = "support@contraxsuite.com"
12 | 
13 | 
14 | # pylint: disable=bare-except
15 | 
16 | # Standard imports
17 | import os
18 | import string
19 | import joblib
20 | 
21 | 
22 | # Setup path
23 | 
24 | 
25 | MODULE_PATH = os.path.dirname(os.path.abspath(__file__))
26 | 
27 | # Load model
28 | MODEL_DATE = joblib.load(os.path.join(MODULE_PATH, "./date_model.pickle"))
29 | 
30 | ALPHA_CHAR_SET = set(string.ascii_letters)
31 | DATE_MODEL_CHARS = []
32 | DATE_MODEL_CHARS.extend(string.ascii_letters)
33 | DATE_MODEL_CHARS.extend(string.digits)
34 | DATE_MODEL_CHARS.extend(["-", "/", " ", "%", "#", "$"])
35 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/entities/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/entities/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/tests/test_conditions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: UTF-8 -*-
 3 | 
 4 | """Condition unit tests for English.
 5 | 
 6 | This module implements unit tests for the condition extraction functionality in English.
 7 | 
 8 | Todo:
 9 |     * Better testing for exact test in return sources
10 |     * More pathological and difficult cases
11 | """
12 | 
13 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
14 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
15 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
16 | __version__ = "2.3.0"
17 | __maintainer__ = "LexPredict, LLC"
18 | __email__ = "support@contraxsuite.com"
19 | 
20 | 
21 | from lexnlp.extract.en.conditions import get_conditions
22 | from lexnlp.tests import lexnlp_tests
23 | 
24 | 
25 | def test_condition_fixed_example():
26 |     lexnlp_tests.test_extraction_func_on_test_data(get_conditions,
27 |                                                    actual_data_converter=lambda t: [elem[0] for elem in t],
28 |                                                    test_only_expected_in=True)
29 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/tests/test_conditions_plain.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | from unittest import TestCase
10 | 
11 | from lexnlp.extract.common.annotations.condition_annotation import ConditionAnnotation
12 | from lexnlp.extract.en.conditions import get_condition_annotations
13 | from lexnlp.tests.typed_annotations_tests import TypedAnnotationsTester
14 | 
15 | 
16 | class TestConditionsPlain(TestCase):
17 | 
18 |     def test_file_samples(self):
19 |         tester = TypedAnnotationsTester()
20 |         tester.test_and_raise_errors(
21 |             get_condition_annotations,
22 |             'lexnlp/typed_annotations/en/condition/conditions.txt',
23 |             ConditionAnnotation)
24 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/tests/test_constraints.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: UTF-8 -*-
 3 | 
 4 | """Constraints unit tests for English.
 5 | 
 6 | This module implements unit tests for the constraint extraction functionality in English.
 7 | 
 8 | Todo:
 9 |     * Better testing for exact test in return sources
10 |     * More pathological and difficult cases
11 | """
12 | 
13 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
14 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
15 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
16 | __version__ = "2.3.0"
17 | __maintainer__ = "LexPredict, LLC"
18 | __email__ = "support@contraxsuite.com"
19 | 
20 | 
21 | from lexnlp.extract.en.constraints import get_constraints
22 | from lexnlp.tests import lexnlp_tests
23 | 
24 | 
25 | def test_constraint_fixed_example():
26 |     lexnlp_tests.test_extraction_func_on_test_data(get_constraints,
27 |                                                    actual_data_converter=lambda t: [elem[0] for elem in t],
28 |                                                    test_only_expected_in=True)
29 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/tests/test_copyright.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: UTF-8 -*-
 3 | 
 4 | """Copyright unit tests for English.
 5 | 
 6 | This module implements unit tests for the copyright extraction functionality in English.
 7 | 
 8 | Todo:
 9 |     * Better testing for exact test in return sources
10 |     * More pathological and difficult cases
11 | """
12 | 
13 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
14 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
15 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
16 | __version__ = "2.3.0"
17 | __maintainer__ = "LexPredict, LLC"
18 | __email__ = "support@contraxsuite.com"
19 | 
20 | 
21 | from lexnlp.extract.en.copyright import get_copyrights
22 | from lexnlp.tests import lexnlp_tests
23 | 
24 | 
25 | def test_copyright():
26 |     lexnlp_tests.test_extraction_func_on_test_data(get_copyrights, return_sources=True)
27 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/tests/test_definitions_template.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | from unittest import TestCase
10 | 
11 | from lexnlp.extract.common.annotations.definition_annotation import DefinitionAnnotation
12 | from lexnlp.extract.en.definitions import get_definition_annotations
13 | from lexnlp.tests.typed_annotations_tests import TypedAnnotationsTester
14 | 
15 | 
16 | class TestDefinitionsTemplate(TestCase):
17 | 
18 |     def test_file_samples(self):
19 |         tester = TypedAnnotationsTester()
20 |         tester.test_and_raise_errors(
21 |             get_definitions_sorted,
22 |             'lexnlp/typed_annotations/en/definition/definitions.txt',
23 |             DefinitionAnnotation)
24 | 
25 | 
26 | def get_definitions_sorted(text: str):
27 |     annotations = list(get_definition_annotations(text))
28 |     annotations.sort(key=lambda a: a.coords[0])
29 |     return annotations
30 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/tests/test_trademarks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: UTF-8 -*-
 3 | 
 4 | """Trademark unit tests for English.
 5 | 
 6 | This module implements unit tests for the Trademark extraction functionality in English.
 7 | 
 8 | Todo:
 9 |     * Better testing for exact test in return sources
10 |     * More pathological and difficult cases
11 | """
12 | 
13 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
14 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
15 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
16 | __version__ = "2.3.0"
17 | __maintainer__ = "LexPredict, LLC"
18 | __email__ = "support@contraxsuite.com"
19 | 
20 | 
21 | # Project imports
22 | from lexnlp.extract.en.trademarks import get_trademarks
23 | from lexnlp.tests import lexnlp_tests
24 | 
25 | 
26 | def test_trademarks():
27 |     lexnlp_tests.test_extraction_func_on_test_data(get_trademarks)
28 | 


--------------------------------------------------------------------------------
/lexnlp/extract/en/tests/test_urls.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: UTF-8 -*-
 3 | 
 4 | """Urls unit tests for English.
 5 | 
 6 | This module implements unit tests for the urls extraction functionality in English.
 7 | 
 8 | Todo:
 9 |     * Better testing for exact test in return sources
10 |     * More pathological and difficult cases
11 | """
12 | 
13 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
14 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
15 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
16 | __version__ = "2.3.0"
17 | __maintainer__ = "LexPredict, LLC"
18 | __email__ = "support@contraxsuite.com"
19 | 
20 | 
21 | # Project imports
22 | from lexnlp.extract.en.urls import get_urls
23 | from lexnlp.tests import lexnlp_tests
24 | 
25 | 
26 | def test_urls():
27 |     lexnlp_tests.test_extraction_func_on_test_data(get_urls)
28 | 


--------------------------------------------------------------------------------
/lexnlp/extract/es/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/es/language_tokens.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | class EsLanguageTokens:
10 |     """
11 |     Spanish parts of speech, used in a number of parsing methods
12 |     """
13 |     abbreviations = {'nr.', 'abs.', 'no.', 'act.', 'inc.', 'p.'}
14 |     articles = ['el', 'la', 'los', 'las']
15 |     conjunctions = ['und', 'oder']
16 | 


--------------------------------------------------------------------------------
/lexnlp/extract/es/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/ml/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/ml/classifier/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/ml/classifier/data/unicode_character_categories.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/ml/classifier/data/unicode_character_categories.pickle


--------------------------------------------------------------------------------
/lexnlp/extract/ml/classifier/data/unicode_character_category_mapping.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/ml/classifier/data/unicode_character_category_mapping.pickle


--------------------------------------------------------------------------------
/lexnlp/extract/ml/classifier/data/unicode_character_top_category_mapping.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/ml/classifier/data/unicode_character_top_category_mapping.pickle


--------------------------------------------------------------------------------
/lexnlp/extract/ml/detector/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/ml/detector/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/ml/en/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/ml/en/data/definition_model_layered.pickle.gzip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/ml/en/data/definition_model_layered.pickle.gzip


--------------------------------------------------------------------------------
/lexnlp/extract/ml/en/definitions/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/ml/en/definitions/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/extract/ml/environment.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | import os
10 | 
11 | 
12 | ENV_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
13 | ENV_EN_DATA_DIRECTORY = os.path.join(ENV_DIRECTORY, 'en/data')
14 | 


--------------------------------------------------------------------------------
/lexnlp/ml/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/nlp/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/nlp/en/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/nlp/en/collocation_bigrams_100.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_bigrams_100.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/collocation_bigrams_1000.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_bigrams_1000.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/collocation_bigrams_10000.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_bigrams_10000.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/collocation_bigrams_100000.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_bigrams_100000.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/collocation_bigrams_50000.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_bigrams_50000.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/collocation_trigrams_100.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_trigrams_100.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/collocation_trigrams_1000.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_trigrams_1000.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/collocation_trigrams_10000.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_trigrams_10000.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/collocation_trigrams_100000.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_trigrams_100000.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/collocation_trigrams_50000.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_trigrams_50000.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/segments/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/nlp/en/segments/page_segmenter.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/segments/page_segmenter.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/segments/paragraph_segmenter.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/segments/paragraph_segmenter.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/segments/section_segmenter.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/segments/section_segmenter.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/segments/sentence_segmenter.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/segments/sentence_segmenter.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/segments/title_locator.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/segments/title_locator.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/stopwords.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/stopwords.pickle


--------------------------------------------------------------------------------
/lexnlp/nlp/en/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/nlp/en/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/nlp/train/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/nlp/train/en/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/nlp/train/en/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/tests/values_comparer.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | import numbers
10 | 
11 | 
12 | def values_look_equal(a, b) -> bool:
13 |     if a == b:
14 |         return True
15 | 
16 |     if (isinstance(a, str) and not a and not b) or (isinstance(b, str) and not b and not a):
17 |         return True
18 | 
19 |     if isinstance(a, numbers.Number) and isinstance(b, numbers.Number):
20 |         a = float(a)
21 |         b = float(b)
22 | 
23 |         delta = abs(a - b)
24 |         da = 0 if a == 0 else 100 * delta / abs(a)
25 |         db = 0 if b == 0 else 100 * delta / abs(b)
26 |         dmax = max(da, db)
27 |         # delta less than 0.001%
28 |         return dmax < 0.001
29 | 
30 |     try:
31 |         sa = str(a)
32 |         sb = str(b)
33 |         if sa == sb:
34 |             return True
35 |     except:  # pylint:disable=bare-except
36 |         pass
37 |     return False
38 | 


--------------------------------------------------------------------------------
/lexnlp/utils/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/utils/iterating_helpers.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | try:
10 |     from collections import Iterable
11 | except ImportError:
12 |     from collections.abc import Iterable
13 | from typing import Callable, Any
14 | 
15 | 
16 | def collapse_sequence(sequence: Iterable,
17 |                       predicate: Callable[[Any, Any], Any],
18 |                       accumulator: Any = 0.0) -> Any:
19 |     for item in sequence:
20 |         accumulator = predicate(item, accumulator)
21 |     return accumulator
22 | 
23 | 
24 | def count_sequence_matches(sequence: Iterable,
25 |                            predicate: Callable[[Any], bool]) -> int:
26 |     return collapse_sequence(sequence,
27 |                              lambda i, a: a + 1 if predicate(i) else a, 0)
28 | 


--------------------------------------------------------------------------------
/lexnlp/utils/lines_processing/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/utils/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/utils/tests/test_map.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | from unittest import TestCase
10 | from lexnlp.utils.map import Map
11 | 
12 | 
13 | class TestMap(TestCase):
14 | 
15 |     def test_map(self):
16 |         m = Map({'name': 'Siemens', 'age': 108})
17 |         self.assertEqual('Siemens', m['name'])
18 |         self.assertEqual('Siemens', m.name)
19 | 
20 |         m = Map({'name': {'company': 'Siemens', 'trademark': '(c)Siemens'}})
21 |         self.assertEqual('Siemens', m.name['company'])
22 |         self.assertEqual('Siemens', m.name.company)
23 |         m.name.specie = Map()
24 |         m.name.specie.legal = 'xXx'
25 |         self.assertEqual('xXx', m.name.specie.legal)
26 | 


--------------------------------------------------------------------------------
/lexnlp/utils/tests/test_phrase_finder.py:
--------------------------------------------------------------------------------
 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
 4 | __version__ = "2.3.0"
 5 | __maintainer__ = "LexPredict, LLC"
 6 | __email__ = "support@contraxsuite.com"
 7 | 
 8 | 
 9 | from unittest import TestCase
10 | 
11 | from lexnlp.utils.lines_processing.phrase_finder import PhraseFinder
12 | 
13 | 
14 | class TestPhraseFinder(TestCase):
15 |     def test_abbreviation(self):
16 |         text = "In C.D. Ill. we should find"
17 |         finder = PhraseFinder(['C.D. Ill.'])
18 |         rst = finder.find_word(text, True)
19 |         self.assertEqual(1, len(rst))
20 | 
21 |         finder = PhraseFinder(['C.D. Ill.', 'sh', 'should', 'find'])
22 |         rst = finder.find_word(text, True)
23 |         self.assertEqual(3, len(rst))
24 | 


--------------------------------------------------------------------------------
/lexnlp/utils/unicode/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/utils/unicode/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC"
2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC"
3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE"
4 | __version__ = "2.3.0"
5 | __maintainer__ = "LexPredict, LLC"
6 | __email__ = "support@contraxsuite.com"
7 | 


--------------------------------------------------------------------------------
/lexnlp/utils/unicode/unicode_character_categories.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/utils/unicode/unicode_character_categories.pickle


--------------------------------------------------------------------------------
/lexnlp/utils/unicode/unicode_character_category_mapping.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/utils/unicode/unicode_character_category_mapping.pickle


--------------------------------------------------------------------------------
/lexnlp/utils/unicode/unicode_character_top_category_mapping.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/utils/unicode/unicode_character_top_category_mapping.pickle


--------------------------------------------------------------------------------
/lexnlp/utils/unpickler.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | 
 4 | class RenameUnpickler(pickle.Unpickler):
 5 |     def find_class(self, module, name):
 6 |         renamed_module = module
 7 |         if module == "sklearn.tree.tree":
 8 |             renamed_module = "sklearn.tree"
 9 |         if module == "sklearn.ensemble.forest":
10 |             renamed_module = "sklearn.ensemble._forest"
11 |         return super(RenameUnpickler, self).find_class(renamed_module, name)
12 | 
13 | 
14 | def renamed_load(file_obj):
15 |     return RenameUnpickler(file_obj).load()
16 | 


--------------------------------------------------------------------------------
/libs/download_wiki.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Path
 3 | WIKI_PATH="wiki"
 4 | 
 5 | mkdir -p $WIKI_PATH
 6 | wget https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles-multistream.xml.bz2 -O "$WIKI_PATH/en.xml.bz2"
 7 | wget https://dumps.wikimedia.org/eswiki/latest/eswiki-latest-pages-articles-multistream.xml.bz2 -O "$WIKI_PATH/es.xml.bz2"
 8 | #wget https://dumps.wikimedia.org/dewiki/latest/frwiki-latest-pages-articles-multistream.xml.bz2 -O "$WIKI_PATH/fr.xml.bz2"
 9 | #wget https://dumps.wikimedia.org/frwiki/latest/dewiki-latest-pages-articles-multistream.xml.bz2 -O "$WIKI_PATH/de.xml.bz2"
10 | 


--------------------------------------------------------------------------------
/python-requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.11.1
 2 | cloudpickle==2.1.0
 3 | dateparser==1.1.1
 4 | docutils==0.17.1
 5 | gensim==4.1.2
 6 | joblib==1.1.0
 7 | elasticsearch==7.8.0
 8 | ipdb==0.13.9
 9 | lxml==4.9.0
10 | memory-profiler==0.60.0
11 | nltk==3.7
12 | nose==1.3.7
13 | num2words==0.5.10
14 | numpy==1.22.3
15 | pandas==1.4.2
16 | psutil==5.9.1
17 | pycountry==22.3.5
18 | pytest-cov==3.0.0
19 | pytest-pep8==1.0.6
20 | pytest-pylint==0.18.0
21 | pytest-xdist==1.33.1
22 | python-dateutil==2.8.2
23 | regex==2022.3.2
24 | reporters-db==3.2.18
25 | requests==2.27.1
26 | scikit-learn==0.23.1
27 | scipy==1.8.1
28 | sphinx==5.0.1
29 | sphinx-rtd-theme==1.0.0
30 | tika==1.24
31 | twine==4.0.1
32 | Unidecode==1.3.4
33 | us==2.0.2
34 | zahlwort2num==0.3.0


--------------------------------------------------------------------------------
/python-requirements-full.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.11.1
 2 | cloudpickle==2.1.0
 3 | coverage==6.4.1
 4 | https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
 5 | dateparser==1.1.1
 6 | docutils==0.17.1
 7 | elasticsearch==7.8.0
 8 | gensim==4.1.2
 9 | joblib==1.1.0
10 | ipdb==0.13.9
11 | lxml==4.9.0
12 | memory-profiler==0.60.0
13 | nltk==3.7
14 | nose==1.3.7
15 | num2words==0.5.10
16 | numpy==1.22.3
17 | pandas==1.4.2
18 | psutil==5.9.1
19 | pycountry==22.3.5
20 | pylint==2.14.1
21 | pytest==7.1.2
22 | pytest-cache==1.0
23 | pytest-cov==3.0.0
24 | pytest-pep8==1.0.6
25 | pytest-pylint==0.18.0
26 | pytest-xdist==1.33.0
27 | python-coveralls==2.9.3
28 | python-dateutil==2.8.1
29 | regex==2022.3.2
30 | reporters-db==3.2.18
31 | requests==2.27.1
32 | scikit-learn==0.23.1
33 | scipy==1.8.1
34 | sphinx==5.0.1
35 | sphinx-rtd-theme==1.0.0
36 | tika==1.24
37 | twine==4.0.1
38 | Unidecode==1.3.4
39 | us==2.0.2
40 | zahlwort2num==0.3.0
41 | 


--------------------------------------------------------------------------------
/python-requirements-notes.txt:
--------------------------------------------------------------------------------
1 | 1. used dateparser==0.7.2 instead of 0.7.6 because 0.7.6 parses incorrectly "one 10-11-2017" as "01-10-2017"
2 | 2. used pandas 0.24.2 instead of 1.0.5 because 0.25.0 and latest version breaks feature dataframe for
3 | page/paragraph/sentence/section pickled models and they predict wrong results
4 | 3. Sphinx, twine install the newest docutils==0.16 but it's incompatible with gensim==3.8.3>>botocore. Installed docutils==0.15.2.


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Build documentation in the docs/ directory with Sphinx
 9 | sphinx:
10 |   configuration: documentation/docs/source/conf.py
11 | 
12 | # Optionally build your docs in additional formats such as PDF and ePub
13 | formats: all
14 | 
15 | # Optionally set the version of Python and requirements required to build your docs
16 | python:
17 |   version: "3.8"
18 |   install:
19 |     - requirements: python-requirements.txt
20 | 
21 | build:
22 |   image: latest


--------------------------------------------------------------------------------
/scripts/download_contract_samples.sh:
--------------------------------------------------------------------------------
1 | # Make data path
2 | mkdir -p data/samples/
3 | cd data/samples/
4 | wget https://github.com/LexPredict/lexpredict-contraxsuite-samples/archive/master.zip
5 | unzip -q master.zip
6 | rm -f master.zip
7 | 


--------------------------------------------------------------------------------
/scripts/download_tika.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Based on https://github.com/vaites/php-apache-tika
 3 | 
 4 | if [ "$LEXNLP_USE_TIKA" = true ]; then
 5 | 
 6 | BINARIES=${APACHE_TIKA_BINARIES:-bin}
 7 | VERSION=${APACHE_TIKA_VERSION:-"1.16"}
 8 | LATEST="1.16"
 9 | 
10 | mkdir --parents $BINARIES
11 | 
12 | if [ $VERSION == $LATEST ]; then
13 |    MIRROR="http://www-us.apache.org"
14 | else
15 |    MIRROR="https://archive.apache.org"
16 | fi
17 | 
18 | if [ ! -f "$BINARIES/tika-app-$VERSION.jar" ]; then
19 |     wget "$MIRROR/dist/tika/tika-app-$VERSION.jar" -O "$BINARIES/tika-app-$VERSION.jar"
20 | fi
21 | 
22 | if [ ! -f "$BINARIES/tika-server-$VERSION.jar" ]; then
23 |     wget "$MIRROR/dist/tika/tika-server-$VERSION.jar" -O "$BINARIES/tika-server-$VERSION.jar"
24 | fi
25 | 
26 | fi
27 | 


--------------------------------------------------------------------------------
/scripts/run_tika.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Based on https://github.com/vaites/php-apache-tika
 3 | 
 4 | if [ "$LEXNLP_USE_TIKA" = true ]; then
 5 | 
 6 | PORT=9998
 7 | BINARIES=${APACHE_TIKA_BINARIES:-bin}
 8 | VERSION=${APACHE_TIKA_VERSION:-"1.16"}
 9 | 
10 | RUNNING=`ps aux | grep -c tika-server-$VERSION`
11 | 
12 | if [ $RUNNING -lt 2 ]; then
13 |     java -version
14 |     echo "Starting Tika Server $VERSION"
15 |     java -jar "$BINARIES/tika-server-$VERSION.jar" -p $PORT 2> /tmp/tika-server-$VERSION.log &
16 |     ((PORT++))
17 |     sleep 5
18 | else
19 |     echo "Tika Server $VERSION already running"
20 | fi
21 | 
22 | fi


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/de/laws/de_concept_sample.csv:
--------------------------------------------------------------------------------
1 | b,predicate,a
2 | Deutschland,broader,Countries


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/de/laws/gesetze_list.csv:
--------------------------------------------------------------------------------
1 | Abkürzung,Kurztitel,Titel
2 | AABG,,Gesetz zur Begrenzung der Arzneimittelausgaben der gesetzlichen Krankenversicherung
3 | AAG,Aufwendungsausgleichsgesetz,Gesetz über den Ausgleich der Arbeitgeberaufwendungen für Entgeltfortzahlung
4 | ÄArbVtrG,,Gesetz über befristete Arbeitsverträge mit Ärzten in der Weiterbildung
5 | AAÜG,Anspruchs- und Anwartschaftsüberführungsgesetz,Gesetz zur Überführung der Ansprüche und Anwartschaften aus Zusatz- und Sonderversorgungssystemen des Beitrittsgebiets
6 | AAÜG-ÄndG,,Gesetz zur Änderung und Ergänzung des Anspruchs- und Anwartschaftsüberführungsgesetzes


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/entities/tests/test_nltk_maxent/test_companies_count.csv:
--------------------------------------------------------------------------------
1 | "Text","Company Name","Company Type","Count"
2 | "This Amendment to Employment Agreement (“Amendment”) is made and entered into this 18th day of July,
3 | 2005, by and between OSI SYSTEMS, INC. (“Company”), a California corporation, and Anuj Wadhawan (“Employee”).","OSI SYSTEMS","CORP",1
4 | "Fox Factory Holding Corp and Fox Factory Holding Corporation are the same company.","Fox Factory Holding","CORP",2
5 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/entities/tests/test_nltk_maxent/test_companies_rs.csv:
--------------------------------------------------------------------------------
 1 | Text,Company Name,Company Type
 2 | "This Amendment to Employment Agreement (“Amendment”) is made and entered into this 18th day of July,
 3 | 2005, by and between OSI SYSTEMS, INC. (“Company”), a California corporation, and Anuj Wadhawan (“Employee”).",OSI SYSTEMS,INC
 4 | "AMERICAN RESIDENTIAL GAP LLC (ARG), a Michigan Limited Liability Company
 5 | with address at 380, N. Old Woodward Avenue, Ste. 300, Birmingham, MI 48009.
 6 | And
 7 | PROGREEN CONSTRUCTION LLC (PGC), a Michigan Limited Liability Company
 8 | with address at 380 N. Old Woodward Avenue, Ste. 226, Birmingham, MI 48009.
 9 | ",AMERICAN RESIDENTIAL GAP,LLC
10 | ,PROGREEN CONSTRUCTION,LLC
11 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/entities/tests/test_nltk_re/test_companies_in_article.csv:
--------------------------------------------------------------------------------
 1 | Text,Company Name,Company Type,Company Description
 2 | "The Amendment, dated as of May 31,
 3 | 1999, between California Federal Bank, A Federal Savings Bank, (the ""Company"")
 4 | successor by merger to First Nationwide Bank, A Federal Savings Bank, (""FNB"")
 5 | and Christie S. Flanagan (the ""Executive"").",California Federal,,Bank
 6 | ,Federal Savings,,Bank
 7 | "This Amendment to Employment Agreement (“Amendment”) is made and entered into this 18th day of July,
 8 | 2005, by and between OSI SYSTEMS, INC. (“Company”), a California corporation, and Anuj Wadhawan (“Employee”).",OSI SYSTEMS,INC,
 9 | " By and between American Residential Gap LLC (ARG), a Michigan Limited Liability Company with 
10 |                 address at 380, N. Old Woodward Avenue, Ste. 300, Birmingham, MI 48009, and Progreen Construction LLC 
11 |                 (PGC), a Michigan Limited Liability Company with address at 380 N. Old Woodward Avenue, Ste. 226, 
12 |                 Birmingham, MI 48009.",American Residential Gap,LLC,
13 | ,Progreen Construction,LLC,
14 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/entities/tests/test_nltk_re/test_company_article_regex.csv:
--------------------------------------------------------------------------------
 1 | "Text","Company Name","Company Type","Company Type Abbr","Company Type Label","Company Description"
 2 | "MK GOLD EXPLORATION B.V., a Dutch private company with limited liability (“Borrower”), and LEUCADIA NATIONAL CORPORATION, a New York corporation (“Lender”)","MK GOLD EXPLORATION","B.V.","BV","Besloten vennootschap",
 3 | ,"LEUCADIA NATIONAL","CORPORATION","CORP","Corporation",
 4 | "CREDIT AGREEMENT
 5 | 
 6 | Dated as of April 20, 2011
 7 | Among
 8 | THE HANOVER INSURANCE GROUP, INC.
 9 | 
10 | as Borrower
11 | THE
12 | LENDERS NAMED HEREIN
13 | as Lenders
14 | GOLDMAN SACHS BANK USA
15 | as Sole Arranger and Bookrunner
16 | 
17 | MORGAN STANLEY SENIOR FUNDING, INC
18 | as Syndication Agent
19 | WELLS FARGO BANK, NATIONAL ASSOCIATION
20 | 
21 | as Documentation Agent
22 | and
23 | GOLDMAN SACHS BANK USA
24 | 
25 | as Administrative Agent","THE HANOVER INSURANCE GROUP","INC","CORP","Corporation",
26 | ,"GOLDMAN SACHS",,,,"BANK"
27 | ,"MORGAN STANLEY SENIOR FUNDING","INC","CORP","Corporation",
28 | ,"WELLS FARGO","NATIONAL ASSOCIATION","NA","National Association","BANK"
29 | ,"GOLDMAN SACHS",,,,"BANK"
30 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/entities/tests/test_nltk_re/test_company_as.csv:
--------------------------------------------------------------------------------
1 | Text,Company Name,Company Type,Company Description,Party Type
2 | "Acme, Inc. as Lead Borrower","Acme",Inc,,Lead Borrower
3 | "HF Logistics-SKX T1, LLC, as Borrower","HF Logistics-SKX T1",LLC,,Borrower
4 | "dated as of 5 May, 2017"
5 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/entities/tests/test_nltk_re/test_company_regex.csv:
--------------------------------------------------------------------------------
1 | Text,Company Name,Company Type,Company Description
2 | "ACME, INC.",ACME,INC,
3 | "MK GOLD EXPLORATION B.V., a Dutch private company with limited liability (“Borrower”), and LEUCADIA NATIONAL CORPORATION, a New York corporation (“Lender”)",MK GOLD EXPLORATION,B.V.,
4 | ,LEUCADIA NATIONAL,CORPORATION,
5 | "Wells Fargo Bank Minnesota, National Association",Wells Fargo Bank Minnesota,National Association,Bank
6 | "Deutsche Bank Securities Inc.",Deutsche Bank Securities,Inc,Bank
7 | "This is The Depository Trust & Clearing Corporation (“DTCC“)","The Depository Trust & Clearing","Corporation","Trust"
8 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/entities/tests/test_stanford_ner/test_stanford_org_example_in.csv:
--------------------------------------------------------------------------------
1 | Text,Organization
2 | "This Amendment to Executive Employment Agreement, dated effective as of February 22, 
3 |                             2011, is between Allis-Chalmers Energy Inc. (the “Company”) and Theodore F. Pound III 
4 |                             (“Executive”).",Allis-Chalmers Energy Inc
5 | "The following agreement effective 1 January 2006 is hereby entered into between Art 
6 |                              Hicks (hereinafter known as Executive) and Cybex International (together with its 
7 |                              affiliated corporations hereinafter known as the “Company”) and having its principal 
8 |                              offices at 10 Trotter Drive, Medway, MA. 02053.",Cybex International
9 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_citations/test_get_citations.csv:
--------------------------------------------------------------------------------
 1 | Text,Volume,Reporter,Reporter Full Name,Page,Page2,Court,Year,Source Text
 2 | "bob lissner v. test 1 F.2d 1, 2-5 (2d Cir., 1982)",1,F.2d,Federal Reporter,1,2-5,2d Cir.,1982,"1 F.2d 1, 2-5 (2d Cir., 1982)"
 3 | "bob lissner v. test 1 F.2d 1, 2-5 (1982)",1,F.2d,Federal Reporter,1,2-5,,1982,"1 F.2d 1, 2-5 (1982)"
 4 | "bob lissner v. test 1 F.2d 1, 25 (1982)",1,F.2d,Federal Reporter,1,25,,1982,"1 F.2d 1, 25 (1982)"
 5 | bob lissner v. test 1 F.2d 1 (1982),1,F.2d,Federal Reporter,1,,,1982,1 F.2d 1 (1982)
 6 | bob lissner v. test 1 F.2d 1,1,F.2d,Federal Reporter,1,,,,1 F.2d 1
 7 | "bob lissner v. test 1 F.2d 1, 2-5 (25 Fed. Cl. 20)",1,F.2d,Federal Reporter,1,2-5,,,"1 F.2d 1, 2-5"
 8 | ,25,Fed. Cl.,United States Claims Court Reporter,20,,,,25 Fed. Cl. 20
 9 | "green cow v. boy 1 Wash. 1, 25 (1795)",1,Wash.,"Virginia Reports, Washington",1,25,,1795,"1 Wash. 1, 25 (1795)"
10 | "green cow v. boy 1 Wash. 1, 25 (1900)",1,Wash.,Washington Reports,1,25,,1900,"1 Wash. 1, 25 (1900)"
11 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_copyright/test_copyright.csv:
--------------------------------------------------------------------------------
1 | Text,Symbol,Year,Name,Text
2 | (C)Copyright 1993-1996 Hughes Information Systems Company,Copyright,1993-1996,Hughes Information Systems Company,(C) Copyright 1993-1996 Hughes Information Systems Company
3 | "(C)Maverick(R) International Processing Services, Inc. 1999",(C),1999,"Maverick (R) International Processing Services, Inc","(C) Maverick (R) International Processing Services, Inc. 1999"
4 | "Copyright  (C)  1998,  Avid  Technology,  Inc.  and its  licensors.  All  rights
5 | reserved.",(C),1998,"Avid Technology, Inc","Copyright (C) 1998, Avid Technology, Inc"
6 | "Test copyrigh symbol © 2017, SIGN LLC",©,2017,SIGN LLC,"© 2017, SIGN LLC"


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_courts/test_courts_longest_match.csv:
--------------------------------------------------------------------------------
 1 | "Text","Court Type","Court Name"
 2 | "One one Bankr. E.D.N.C. two two two.","Bankruptcy Court","Eastern District of North Carolina"
 3 | "One Bankr.      E.D.N.C.  E.D.N.C. two E.D.N.C.  three","Bankruptcy Court","Eastern District of North Carolina"
 4 | ,"Federal District Court","Eastern District of North Carolina"
 5 | "E.D.N.C. ","Federal District Court","Eastern District of North Carolina"
 6 | "One Bankr. E.D.N.C. Northern District of Mississippi  two three
 7 | Northern District of New York 
 8 | ","Bankruptcy Court","Eastern District of North Carolina"
 9 | ,"Bankruptcy Court","Northern District of Mississippi"
10 | ,"Federal District Court","Northern District of Mississippi"
11 | ,"Federal District Court","Northern District of New York"
12 | ,"Bankruptcy Court","Northern District of New York"
13 | "One Bankr.E.D.N.C.  E.D.  N.C. two E.D.N.C.  three","Bankruptcy Court","Eastern District of North Carolina"
14 | ,"Federal District Court","Eastern District of North Carolina"
15 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_dates/test_fixed_dates_nonstrict.csv:
--------------------------------------------------------------------------------
 1 | Text,Date
 2 | "The term of this lease shall
 3 | be for a period of five years, commencing
 4 | on the 1st day of April, 1995, and terminating on the 31st day of
 5 | March,
 6 | 2000 with an option for an additional five years at the same terms and
 7 | conditions in this lease, provided that TENANT shall have given the
 8 | LANDLORD written notice of TENANT’s intention to do so six (6) months prior
 9 | to the expiration of this lease and that the Tenant is not in default
10 | of the Lease.",1995-04-01
11 | ,2000-03-31
12 | t may 
13 |  Lockheed Martin Corporation 
14 |  he Decided to make a break 
15 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_dates/test_fixed_raw_dates.csv:
--------------------------------------------------------------------------------
 1 | Text,Date
 2 | No later than 2017-06-01.,2017-06-01
 3 | "Dated as of June 1, 2017",2017-06-01
 4 | Will be completed by June 2017,2017-06-01
 5 | Will be completed by June,06-01
 6 | "Will be completed by the 1st day of June, 2017",2017-06-01
 7 | Commencement Date: 07/01/2004.,2004-07-01
 8 | "From 12:01 a.m. on March 1, 1999 (the 'Commencement Date')
 9 | through 1l:59 p.m. on November 30, 2002
10 | (the 'Expiration Date')",2002-11-30
11 | ,1999-03-01 00:01:00
12 | "Commencement Date: August 1, 2013.",2013-08-01
13 | "No date here",


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_dict_entities/test_normalize_text.csv:
--------------------------------------------------------------------------------
1 | "Text","Normalized Text"
2 | "Bankr. E.D.N.C."," bankr . e . d . n . c . "
3 | "Bankr.E.D.N.C."," bankr . e . d . n . c . "
4 | "Something Bankr. E.D.N.C. else."," something bankr . e . d . n . c . else . "
5 | "SomethingBankr.E.D.N.C. else."," somethingbankr . e . d . n . c . else . "
6 | "Something/Bankr. E.D.N.C. else."," something/bankr . e . d . n . c . else . "
7 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_distance/test_get_distance.csv:
--------------------------------------------------------------------------------
 1 | Text,Distance,Units
 2 | That is at least 10 miles away.,10,mile
 3 | That is at least 10mi away.,10,mile
 4 | That is at least 10 kilometers away.,10,kilometer
 5 | That is at least 10km away.,10,kilometer
 6 | That is somewhere between 5 miles and 10km from here.,5,mile
 7 | ,10,kilometer
 8 | There are 10 dogs.
 9 | That is a 20Hz oscillation.
10 | This is a 5khz test.
11 | ", 500 miles to go",500,mile
12 | ",500.5 miles to go",500.5,mile
13 | ", fifty miles to the 5khz test.",50,mile
14 |  .5 miles to go,0.5,mile
15 | "There is no , distance here"
16 | There are many miles to go
17 | There are ten miles to go,10,mile
18 | There are 50 thousand miles to go,50000,mile
19 | There are fifty thousand miles to go,50000,mile
20 | This is not a 5.4.3.2.1 mi distance
21 | There are 5.4.3 thousand mi reasons
22 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_distance/test_get_distance_source.csv:
--------------------------------------------------------------------------------
 1 | "Text","Distance","Unit","Source"
 2 | "That is at least 10 miles away.",10,"mile","10 miles"
 3 | "That is at least 10mi away.",10,"mile","10mi"
 4 | "That is at least 10 kilometers away.",10,"kilometer","10 kilometers"
 5 | "That is at least 10km away.",10,"kilometer","10km"
 6 | "That is somewhere between 5 miles and 10km from here.",5,"mile","5 miles"
 7 | ,10,"kilometer","10km"
 8 | "There are 10 dogs.",,,
 9 | "That is a 20Hz oscillation.",,,
10 | "This is a 5khz test.",,,
11 | ", 500 miles to go",500,"mile","500 miles"
12 | ",500.5 miles to go",500.5,"mile","500.5 miles"
13 | ", fifty miles to the 5khz test.",50,"mile","fifty miles"
14 | " .5 miles to go",0.5,"mile",".5 miles"
15 | "There is no , distance here",,,
16 | "There are many miles to go",,,
17 | "There are ten miles to go",10,"mile","ten miles"
18 | "There are 50 thousand miles to go",50000,"mile","50 thousand miles"
19 | "There are fifty thousand miles to go",50000,"mile","fifty thousand miles"
20 | "This is not a 5.4.3.2.1 mi distance",,,
21 | "There are 5.4.3 thousand mi reasons",,,
22 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_geoentities/test_geoentities_alias_filtering.csv:
--------------------------------------------------------------------------------
1 | "Text","input_text_languages_str","input_min_alias_len_int","Geo Entity"
2 | "Community of Mmaaddrid should not be detected for single letter Mm especially in name A.M. Best but New York should be too.",,2,"New York"
3 | "Community of Mmaaddrid should be detected for single letter Mm especially in name A.M. Best and New York should be too.",,1,"New York"
4 | ,,,"Community of Madrid"
5 | "There should be no Afghanistan in this sentence because “Afghanistan” and . Only USA.",,,"United States"
6 | "There should not be Aallbbania here – AL is blacklisted for English.","en",,
7 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_geoentities/test_geoentities_en_equal_match_take_lowest_id.csv:
--------------------------------------------------------------------------------
1 | "Text","Geo Entity","Source Text"
2 | "MS abbrev can be either MMMississippi or MMMonserrat. But for this test conflict resolving is enabled and it should detect MS as the entity having the first id in config (geoaliases.csv, entity_id). And this is Monserrat.","Montserrat","MS"
3 | "Here we expect CcaaNNaaddaa to be returned as having lower id than CcaaLLifornia for alias CA.","Canada","CA"
4 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_geoentities/test_geoentities_en_equal_match_take_top_prio.csv:
--------------------------------------------------------------------------------
1 | "Text","Geo Entity","Source Text"
2 | "Here we expect CcaaLLifornia to be returned as having greater priority than CcaaNNaaddaa for alias CA.","California","CA"
3 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_pii/test_pii_list.csv:
--------------------------------------------------------------------------------
1 | "Text","Type","Value"
2 | "Employee ID: 078-05-1120","ssn","078-05-1120"
3 | "My ID is 078-05-1120 and my phone number is 212-212-2121","ssn","078-05-1120"
4 | ,"us_phone","(212) 212-2121"
5 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_pii/test_pii_list_source.csv:
--------------------------------------------------------------------------------
1 | "Text","Type","Value","Source"
2 | "Employee ID: 078-05-1120","ssn","078-05-1120","078-05-1120"
3 | "My ID is 078-05-1120 and my phone number is 212-212-2121","ssn","078-05-1120","078-05-1120"
4 | ,"us_phone","(212) 212-2121","212-212-2121"
5 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_pii/test_ssn_list.csv:
--------------------------------------------------------------------------------
1 | Text,SSN
2 | Employee ID: 123-45-6789,123-45-6789
3 | There is no 12-34-45 SSN here.
4 | Some poor soul had 078-05-1120 once upon a time.,078-05-1120
5 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_pii/test_ssn_list_source.csv:
--------------------------------------------------------------------------------
1 | "Text","SSN","Source"
2 | "Employee ID: 123-45-6789","123-45-6789","123-45-6789"
3 | "There is no 12-34-45 SSN here.",,
4 | "Some poor soul had 078-05-1120 once upon a time.","078-05-1120","078-05-1120"
5 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_pii/test_us_phone_list.csv:
--------------------------------------------------------------------------------
1 | Text,Phone
2 | Home Phone: (212) 212-2121,(212) 212-2121
3 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_pii/test_us_phone_list_source.csv:
--------------------------------------------------------------------------------
1 | "Text","Phone","Source"
2 | "Home Phone: (212) 212-2121","(212) 212-2121","(212) 212-2121"
3 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_ratios/test_get_ratios.csv:
--------------------------------------------------------------------------------
 1 | Text,Numerator,Consequent,Decimal
 2 | Ratio of not greater than 3.0:1.0.,3.0,1.0,3.0
 3 | Ratio of no more than four to one,4.0,1.0,4.0
 4 | Ratio of no more than four t one
 5 | Ratio of no more than four ot one
 6 | Ratio of no more than 4..0:1.0
 7 | Ratio of no more than 4.0:1..0
 8 | "Level I ----               1.0:1.0                  .18%
 9 | Level II       1.0:1.0                2.0:1.0                 .21%
10 | Level III      2.0:1.0                -------                 .24%",1.0,1.0,1.0
11 | ,1.0,1.0,1.0
12 | ,2.0,1.0,2.0
13 | ,2.0,1.0,2.0
14 | Ratio of 2.0::1.0
15 | Don't catch time 8:30 a.m.
16 | Don't catch time 8:30 am
17 | Don't catch time 8:30 AM
18 | Don't catch time 8:30 p.m.
19 | Don't catch 0:30 pseudo ratio
20 | Don't catch 30:0 pseudo ratio
21 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_ratios/test_get_ratios_source.csv:
--------------------------------------------------------------------------------
 1 | Text,Numerator,Consequent,Decimal,Source
 2 | Ratio of not greater than 3.0:1.0.,3.0,1.0,3.0,3.0:1.0.
 3 | Ratio of no more than four to one,4,1,4.0,four to one
 4 | Ratio of no more than four t one
 5 | Ratio of no more than four ot one
 6 | Ratio of no more than 4..0:1.0
 7 | Ratio of no more than 4.0:1..0
 8 | "Level I ----               1.0:1.0                  .18%
 9 | Level II       1.0:1.0                2.0:1.0                 .21%
10 | Level III      2.0:1.0                -------                 .24%",1.0,1.0,1.0,1.0:1.0
11 | ,1.0,1.0,1.0,1.0:1.0
12 | ,2.0,1.0,2.0,2.0:1.0
13 | ,2.0,1.0,2.0,2.0:1.0
14 | Ratio of 2.0::1.0
15 | Don't catch time 8:30 a.m.
16 | Don't catch time 8:30 am
17 | Don't catch time 8:30 AM
18 | Don't catch time 8:30 p.m.
19 | Don't catch 0:30 pseudo ratio
20 | Don't catch 30:0 pseudo ratio
21 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_regulations/test_get_regulations.csv:
--------------------------------------------------------------------------------
 1 | Text,Regulation Type,Regulation Code,Regulation Str
 2 | test 55 C.F.R. 77 code,Code of Federal Regulations,55 CFR 77,55 C.F.R. 77
 3 | test 55  CFR  77a-22B code,Code of Federal Regulations,55 CFR 77a-22B,55  CFR  77a-22B
 4 | "test 123 U.S.C 
 5 | 456, code",United States Code,123 USC 456,"123 U.S.C 
 6 | 456"
 7 | "test 123 U.S.C § 456, code",United States Code,123 USC § 456,123 U.S.C § 456
 8 | "test 123 U.S.C Section 456, code",United States Code,123 USC Section 456,123 U.S.C Section 456
 9 | "test 123 U.S.C Sec. 456, code",United States Code,123 USC Section 456,123 U.S.C Sec. 456
10 | test Public Law No.   123-456 code,Public Law,Public Law No. 123-456,Public Law No.   123-456
11 | test Public Law 123-456 code,Public Law,Public Law No. 123-456,Public Law 123-456
12 | test Pub. Law 123-456 code,Public Law,Public Law No. 123-456,Pub. Law 123-456
13 | test Pub. L. 123-456 code,Public Law,Public Law No. 123-456,Pub. L. 123-456
14 | test 123 Stat. 456 code,Public Law,123 Stat. 456,123 Stat. 456
15 | test Stat. 456 code
16 | test AB USC 456 code
17 | test 678 USC UPD code
18 | test 678 USC UPD code
19 | test 10 Public Law codes
20 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/extract/en/tests/test_urls/test_urls.csv:
--------------------------------------------------------------------------------
1 | "test http://www.demo.com/2/some-url/document.txt in the text","http://www.demo.com/2/some-url/document.txt"
2 | "test https - https://alpha.demo.com/document.txt in the text","https://alpha.demo.com/document.txt"
3 | "test get arguments - https://alpha.demo.com/document.txt?arg1=1&arg2=2 in the text","https://alpha.demo.com/document.txt?arg1=1&arg2=2"
4 | "test hash - https://alpha.demo.com#hash in the text","https://alpha.demo.com#hash"
5 | "test 2 urls http://alpha.demo.com/ and http://beta.demo.com/","http://alpha.demo.com/"
6 | ,"http://beta.demo.com/"
7 | "test UPPERCASE HTTP://WWW.DEMO.COM in the text","HTTP://WWW.DEMO.COM"


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_noun_lemmas.csv:
--------------------------------------------------------------------------------
1 | Text,Noun Lemma
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,Associated
3 | ,General
4 | ,Contractors
5 | ,America
6 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_nouns.csv:
--------------------------------------------------------------------------------
1 | Text,Noun
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,Associated
3 | ,General
4 | ,Contractors
5 | ,America
6 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_tokens.csv:
--------------------------------------------------------------------------------
 1 | Text,Token
 2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,It
 3 | ,has
 4 | ,been
 5 | ,approved
 6 | ,and
 7 | ,endorsed
 8 | ,by
 9 | ,The
10 | ,Associated
11 | ,General
12 | ,Contractors
13 | ,of
14 | ,America
15 | ,.
16 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_tokens_lc.csv:
--------------------------------------------------------------------------------
 1 | Text,Token Lowercase
 2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,it
 3 | ,has
 4 | ,been
 5 | ,approved
 6 | ,and
 7 | ,endorsed
 8 | ,by
 9 | ,the
10 | ,associated
11 | ,general
12 | ,contractors
13 | ,of
14 | ,america
15 | ,.
16 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_tokens_lc_sw.csv:
--------------------------------------------------------------------------------
1 | Text,"Token (Lowercase, No Stop Word)"
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,approved
3 | ,endorsed
4 | ,associated
5 | ,general
6 | ,contractors
7 | ,america
8 | ,.
9 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_tokens_sw.csv:
--------------------------------------------------------------------------------
1 | Text,Token (No Stop Words)
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,approved
3 | ,endorsed
4 | ,Associated
5 | ,General
6 | ,Contractors
7 | ,America
8 | ,.
9 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_verb_lemmas.csv:
--------------------------------------------------------------------------------
1 | Text,Verb Lemma
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,have
3 | ,be
4 | ,approve
5 | ,endorse
6 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_verbs.csv:
--------------------------------------------------------------------------------
1 | Text,Verb
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,has
3 | ,been
4 | ,approved
5 | ,endorsed
6 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_adjectives.csv:
--------------------------------------------------------------------------------
 1 | Text,Adjective
 2 | "Builder shall comply with laws, rules, regulations and requirements of any Regulatory Authorities
 3 | that are applicable and existing at the time of the execution of this Agreement that are in effect or which shall
 4 | become effective as to any vessels built during the Project Schedule and which affect the construction of works, plants
 5 | and vessels, in or on navigable waters and the shores thereof, and all other waters subject to the control of the United
 6 | States as set forth in the Contract Documents and shall procure at its own expense such permits from the United States
 7 | and from state and local authorities in the jurisdiction in which Builder is constructing the Vessels as may be
 8 | necessary in connection with beginning or carrying on the completion of the Work, and shall at times comply with all
 9 | United States, state and local laws in the jurisdiction in which Builder is constructing the Vessels in any way
10 | affecting the Work and affecting any documentation of such work with the U.S. Coast Guard.",applicable
11 | ,effective
12 | ,navigable
13 | ,other
14 | ,own
15 | ,such
16 | ,local
17 | ,necessary
18 | ,local
19 | ,such
20 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_adverbs.csv:
--------------------------------------------------------------------------------
1 | Text,Adverb
2 | shall promptly provide notice,promptly
3 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_adverbs_lemma.csv:
--------------------------------------------------------------------------------
1 | Text,Adverb Lemma
2 | shall promptly provide notice,promptly
3 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_lemmas.csv:
--------------------------------------------------------------------------------
 1 | Text,Lemma
 2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,It
 3 | ,have
 4 | ,be
 5 | ,approve
 6 | ,and
 7 | ,endorse
 8 | ,by
 9 | ,The
10 | ,Associated
11 | ,General
12 | ,Contractors
13 | ,of
14 | ,America
15 | ,.
16 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_lemmas_lc.csv:
--------------------------------------------------------------------------------
 1 | Text,Lemma (Lowercase)
 2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,it
 3 | ,have
 4 | ,be
 5 | ,approve
 6 | ,and
 7 | ,endorse
 8 | ,by
 9 | ,the
10 | ,associated
11 | ,general
12 | ,contractors
13 | ,of
14 | ,america
15 | ,.
16 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_lemmas_lc_sw.csv:
--------------------------------------------------------------------------------
1 | Text,Lemma (Lowercase and No Stop Words)
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,approve
3 | ,endorse
4 | ,associated
5 | ,general
6 | ,contractors
7 | ,america
8 | ,.
9 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_lemmas_sw.csv:
--------------------------------------------------------------------------------
1 | Text,Lemma (No Stop Words)
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,approve
3 | ,endorse
4 | ,Associated
5 | ,General
6 | ,Contractors
7 | ,America
8 | ,.
9 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_nouns.csv:
--------------------------------------------------------------------------------
1 | Text,Noun
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,Associated
3 | ,General
4 | ,Contractors
5 | ,America
6 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_nouns_lemma.csv:
--------------------------------------------------------------------------------
1 | Text,Noun Lemma
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,Associated
3 | ,General
4 | ,Contractors
5 | ,America
6 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_verb_lemmas.csv:
--------------------------------------------------------------------------------
1 | Text,Verb Lemma
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,have
3 | ,be
4 | ,approve
5 | ,endorse
6 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/nlp/en/tests/test_tokens/test_verbs.csv:
--------------------------------------------------------------------------------
1 | Text,Verb
2 | It has been  approved  and endorsed by The  Associated  General  Contractors  of America.,has
3 | ,been
4 | ,approved
5 | ,endorsed
6 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/typed_annotations/de/copyright/copyrights.txt:
--------------------------------------------------------------------------------
 1 | siemens.com globale Website Siemens © 1996 – 2019
 2 | -------------------------------------------------------------------------------
 3 | total=1
 4 | 0)locale=de
 5 | 0)company=Website Siemens
 6 | 0)year_start=1996
 7 | 0)year_end=
 8 | 0)coords=(36, 49)
 9 | 0)get_cite()=/de/copyright/Website Siemens/1996
10 | 
11 | 
12 | -------------------------------------------------------------------------------
13 | Copyright 2019, Siemens
14 | -------------------------------------------------------------------------------
15 | total=1
16 | 0)company=Siemens
17 | 0)year_start=2019
18 | 0)year_end=
19 | 0)coords=(0, 23)
20 | 0)get_cite()=/de/copyright/Siemens/2019
21 | 
22 | 
23 | -------------------------------------------------------------------------------
24 | Eigentumsrecht 2019, Siemens
25 | -------------------------------------------------------------------------------
26 | total=1
27 | 0)company=Siemens
28 | 0)year_start=2019
29 | 0)year_end=
30 | 0)coords=(0, 28)
31 | 0)get_cite()=/de/copyright/Siemens/2019
32 | 
33 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/typed_annotations/de/geoentity/geoentities.txt:
--------------------------------------------------------------------------------
 1 | ein seltsamer Text und Georgien darin erwähnt
 2 | -------------------------------------------------------------------------------
 3 | total=1
 4 | 0)locale=de
 5 | 0)coords=(23, 31)
 6 | 0)entity_id=83
 7 | 0)name=Georgien
 8 | 0)name_en=Georgia
 9 | 0)alias=Georgien
10 | 0)iso_3166_2=GE
11 | 0)iso_3166_3=GEO
12 | 0)get_cite()=/de/geoentity/Georgien
13 | 
14 | 
15 | -------------------------------------------------------------------------------
16 | ein seltsamer Text und Geeorgia darin erwähnt
17 | -------------------------------------------------------------------------------
18 | total=0


--------------------------------------------------------------------------------
/test_data/lexnlp/typed_annotations/en/act/acts.txt:
--------------------------------------------------------------------------------
 1 | accordance with sections 751(a)(1) and 777(i)(1) of the Act, and 19 CFR 351
 2 | -------------------------------------------------------------------------------
 3 | total=1
 4 | 0)act_name=Act
 5 | 0)coords=(16, 61)
 6 | 0)section=751(a)(1) and 777(i)(1)
 7 | 0)year=
 8 | 0)ambiguous=True
 9 | ### '#s#' replaces ' '
10 | 0)text=sections 751(a)(1) and 777(i)(1) of the Act,#s#
11 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/typed_annotations/en/phone/phones.txt:
--------------------------------------------------------------------------------
 1 | Dial +1-541-754-3010 in case of murder
 2 | -------------------------------------------------------------------------------
 3 | total=1
 4 | 0)locale=en
 5 | 0)coords=(8, 20)
 6 | 0)phone=(541) 754-3010
 7 | 0)get_cite()=/en/phone/(541) 754-3010
 8 | 
 9 | 
10 | -------------------------------------------------------------------------------
11 | Dial 3.141592564, +1-541 754 3010 in case of murder
12 | -------------------------------------------------------------------------------
13 | total=1
14 | 0)locale=en
15 | 0)coords=(21, 33)
16 | 0)phone=(541) 754-3010
17 | 0)get_cite()=/en/phone/(541) 754-3010
18 | 
19 | 
20 | -------------------------------------------------------------------------------
21 | Dial +1-5417543010 in case of murder
22 | -------------------------------------------------------------------------------
23 | total=0
24 | 


--------------------------------------------------------------------------------
/test_data/lexnlp/typed_annotations/en/ssn/ssn.txt:
--------------------------------------------------------------------------------
 1 | Somewhere in the form I filled out my SSN (123-45-6789) number
 2 | -------------------------------------------------------------------------------
 3 | total=1
 4 | 0)locale=en
 5 | 0)coords=(43, 54)
 6 | 0)get_cite()=/en/ssn/123-45-6789
 7 | 0)number=123-45-6789
 8 | 
 9 | 
10 | -------------------------------------------------------------------------------
11 | Here's an invalid SSN number: 123-00-6789
12 | -------------------------------------------------------------------------------
13 | total=0


--------------------------------------------------------------------------------
/test_data/lexnlp/typed_annotations/es/court/courts.txt:
--------------------------------------------------------------------------------
 1 | Sembré una flor sin interés. Yo la sembré para ver el Tribunal Superior,
 2 | al volver ya estaba seca y ya no quizo retoñar.
 3 | -------------------------------------------------------------------------------
 4 | total=1
 5 | 0)locale=es
 6 | 0)coords=(28, 71)
 7 | 0)jurisdiction=Andalucía
 8 | 0)court_type=Tribunal Superior
 9 | 0)get_cite()=/es/court/Tribunal Superior/Andalucía/Tribunal Superior
10 | 
11 | 
12 | 
13 | -------------------------------------------------------------------------------
14 | El actual Tribunal Superior de Justicia de Madrid fue creado en 1985 a partir
15 | del artículo 26 de la Ley Orgánica del Poder Judicial, constituyéndose el 23
16 | de mayo de 1989.
17 | -------------------------------------------------------------------------------
18 | total=1
19 | 0)locale=es
20 | 0)coords=(9, 49)
21 | 0)jurisdiction=Comunidad de Madrid
22 | 0)court_type=Tribunal Superior
23 | 0)get_cite()=/es/court/Tribunal Superior de Justicia de Madrid/Comunidad de Madrid/Tribunal Superior


--------------------------------------------------------------------------------
/test_data/lexnlp/typed_annotations/es/date/dates.txt:
--------------------------------------------------------------------------------
 1 | Some dummy sample with Spanish date like 15 de febrero, 28 de abril y 17 de
 2 | noviembre de 1995, 1ºde enero de 1999
 3 | -------------------------------------------------------------------------------
 4 | total=4
 5 | ..)locale=es
 6 | 0)date=1995-11-17 00:00:00
 7 | 0)coords=(70, 93)
 8 | 0)get_cite()=/es/date/1995-11-17 00:00:00
 9 | 
10 | 1)date=1999-01-01 00:00:00
11 | 1)coords=(95, 113)
12 | 1)get_cite()=/es/date/1999-01-01 00:00:00
13 | 
14 | 2)date=1995-02-15 00:00:00
15 | 2)coords=(41, 54)
16 | 2)get_cite()=/es/date/1995-02-15 00:00:00
17 | 
18 | 3)date=1995-04-28 00:00:00
19 | 3)coords=(56, 67)
20 | 3)get_cite()=/es/date/1995-04-28 00:00:00
21 | 
22 | 
23 | 
24 | -------------------------------------------------------------------------------
25 | Esto sucedió el 4 de julio.
26 | -------------------------------------------------------------------------------
27 | total=1
28 | 0)date.month=7
29 | 0)date.day=4
30 | 0)coords=(16, 26)
31 | 0)get_cite()=/es/date/$YEAR$-07-04 00:00:00
32 | 


--------------------------------------------------------------------------------
/test_data/output/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/test_data/output/.gitkeep


--------------------------------------------------------------------------------
/test_data/table_sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/test_data/table_sample.pdf


--------------------------------------------------------------------------------
/test_data/tabular02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/test_data/tabular02.pdf


--------------------------------------------------------------------------------
/test_data/test_lexnlp_tests/test_test_extraction_func_on_test_data.csv:
--------------------------------------------------------------------------------
1 | text,expected
2 | qqq,qqq!
3 | www,www!
4 | eee,eee?
5 | rrr,rrr!
6 | 


--------------------------------------------------------------------------------