├── .gitignore ├── .pylintrc ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── Pipfile ├── Pipfile.lock ├── README.md ├── README.rst ├── documentation └── docs │ ├── Makefile │ ├── api │ ├── lexnlp.config.en.rst │ ├── lexnlp.config.rst │ ├── lexnlp.extract.all_locales.rst │ ├── lexnlp.extract.all_locales.tests.rst │ ├── lexnlp.extract.common.annotations.rst │ ├── lexnlp.extract.common.copyrights.rst │ ├── lexnlp.extract.common.date_parsing.rst │ ├── lexnlp.extract.common.definitions.rst │ ├── lexnlp.extract.common.durations.rst │ ├── lexnlp.extract.common.entities.rst │ ├── lexnlp.extract.common.ocr_rating.rst │ ├── lexnlp.extract.common.rst │ ├── lexnlp.extract.common.tests.rst │ ├── lexnlp.extract.de.rst │ ├── lexnlp.extract.de.tests.rst │ ├── lexnlp.extract.en.addresses.rst │ ├── lexnlp.extract.en.addresses.tests.rst │ ├── lexnlp.extract.en.contracts.rst │ ├── lexnlp.extract.en.contracts.tests.rst │ ├── lexnlp.extract.en.entities.rst │ ├── lexnlp.extract.en.entities.tests.rst │ ├── lexnlp.extract.en.preprocessing.rst │ ├── lexnlp.extract.en.rst │ ├── lexnlp.extract.en.tests.rst │ ├── lexnlp.extract.es.rst │ ├── lexnlp.extract.es.tests.rst │ ├── lexnlp.extract.ml.classifier.rst │ ├── lexnlp.extract.ml.detector.rst │ ├── lexnlp.extract.ml.detector.tests.rst │ ├── lexnlp.extract.ml.en.definitions.rst │ ├── lexnlp.extract.ml.en.definitions.tests.rst │ ├── lexnlp.extract.ml.en.rst │ ├── lexnlp.extract.ml.rst │ ├── lexnlp.extract.rst │ ├── lexnlp.ml.catalog.rst │ ├── lexnlp.ml.rst │ ├── lexnlp.nlp.en.rst │ ├── lexnlp.nlp.en.segments.rst │ ├── lexnlp.nlp.en.tests.rst │ ├── lexnlp.nlp.en.transforms.rst │ ├── lexnlp.nlp.rst │ ├── lexnlp.nlp.train.en.rst │ ├── lexnlp.nlp.train.en.tests.rst │ ├── lexnlp.nlp.train.rst │ ├── lexnlp.rst │ ├── lexnlp.tests.rst │ ├── lexnlp.utils.lines_processing.rst │ ├── lexnlp.utils.rst │ ├── lexnlp.utils.tests.rst │ ├── lexnlp.utils.unicode.rst │ ├── lexnlp.utils.unicode.tests.rst │ └── modules.rst │ ├── make.bat │ ├── requirements.txt │ └── source │ ├── _static │ ├── css │ │ └── custom_styles.css │ └── img │ │ └── lexnlp_logo.png │ ├── about.rst │ ├── api │ ├── lexnlp.config.en.rst │ ├── lexnlp.config.rst │ ├── lexnlp.extract.common.annotations.rst │ ├── lexnlp.extract.common.copyrights.rst │ ├── lexnlp.extract.common.date_parsing.rst │ ├── lexnlp.extract.common.definitions.rst │ ├── lexnlp.extract.common.durations.rst │ ├── lexnlp.extract.common.rst │ ├── lexnlp.extract.common.tests.rst │ ├── lexnlp.extract.de.rst │ ├── lexnlp.extract.de.tests.rst │ ├── lexnlp.extract.en.addresses.rst │ ├── lexnlp.extract.en.addresses.tests.rst │ ├── lexnlp.extract.en.amounts.get_amounts.rst │ ├── lexnlp.extract.en.amounts.get_np.rst │ ├── lexnlp.extract.en.amounts.text2num.rst │ ├── lexnlp.extract.en.citations.get_citations.rst │ ├── lexnlp.extract.en.conditions.create_condition_pattern.rst │ ├── lexnlp.extract.en.conditions.get_conditions.rst │ ├── lexnlp.extract.en.constraints.create_constraint_pattern.rst │ ├── lexnlp.extract.en.constraints.get_constraints.rst │ ├── lexnlp.extract.en.contracts.rst │ ├── lexnlp.extract.en.contracts.tests.rst │ ├── lexnlp.extract.en.copyright.CopyrightNPExtractor.rst │ ├── lexnlp.extract.en.copyright.get_copyright.rst │ ├── lexnlp.extract.en.dates.build_date_model.rst │ ├── lexnlp.extract.en.dates.get_date_features.rst │ ├── lexnlp.extract.en.dates.get_dates.rst │ ├── lexnlp.extract.en.dates.get_dates_list.rst │ ├── lexnlp.extract.en.dates.get_raw_date_list.rst │ ├── lexnlp.extract.en.dates.get_raw_dates.rst │ ├── lexnlp.extract.en.dates.train_default_model.rst │ ├── lexnlp.extract.en.definitions.get_definitions.rst │ ├── lexnlp.extract.en.dict_entities.SearchResultPosition.rst │ ├── lexnlp.extract.en.dict_entities.add_alias_to_entity.rst │ ├── lexnlp.extract.en.dict_entities.add_aliases_to_entity.rst │ ├── lexnlp.extract.en.dict_entities.alias_is_blacklisted.rst │ ├── lexnlp.extract.en.dict_entities.conflicts_take_first_by_id.rst │ ├── lexnlp.extract.en.dict_entities.conflicts_top_by_priority.rst │ ├── lexnlp.extract.en.dict_entities.entity_alias.rst │ ├── lexnlp.extract.en.dict_entities.entity_config.rst │ ├── lexnlp.extract.en.dict_entities.find_dict_entities.rst │ ├── lexnlp.extract.en.dict_entities.get_alias_id.rst │ ├── lexnlp.extract.en.dict_entities.get_alias_text.rst │ ├── lexnlp.extract.en.dict_entities.get_entity_aliases.rst │ ├── lexnlp.extract.en.dict_entities.get_entity_id.rst │ ├── lexnlp.extract.en.dict_entities.get_entity_name.rst │ ├── lexnlp.extract.en.dict_entities.get_entity_priority.rst │ ├── lexnlp.extract.en.dict_entities.normalize_text.rst │ ├── lexnlp.extract.en.dict_entities.prepare_alias_blacklist_dict.rst │ ├── lexnlp.extract.en.distances.get_distances.rst │ ├── lexnlp.extract.en.durations.get_durations.rst │ ├── lexnlp.extract.en.entities.rst │ ├── lexnlp.extract.en.entities.tests.rst │ ├── lexnlp.extract.en.geoentities.get_geoentities.rst │ ├── lexnlp.extract.en.money.get_money.rst │ ├── lexnlp.extract.en.percents.get_percents.rst │ ├── lexnlp.extract.en.pii.get_pii.rst │ ├── lexnlp.extract.en.pii.get_ssns.rst │ ├── lexnlp.extract.en.pii.get_us_phones.rst │ ├── lexnlp.extract.en.preprocessing.rst │ ├── lexnlp.extract.en.ratios.get_ratios.rst │ ├── lexnlp.extract.en.regulations.get_regulations.rst │ ├── lexnlp.extract.en.rst │ ├── lexnlp.extract.en.tests.rst │ ├── lexnlp.extract.en.trademarks.get_trademarks.rst │ ├── lexnlp.extract.en.urls.get_urls.rst │ ├── lexnlp.extract.en.utils.NPExtractor.rst │ ├── lexnlp.extract.en.utils.strip_unicode_punctuation.rst │ ├── lexnlp.extract.es.rst │ ├── lexnlp.extract.es.tests.rst │ ├── lexnlp.extract.ml.classifier.rst │ ├── lexnlp.extract.ml.detector.rst │ ├── lexnlp.extract.ml.detector.tests.rst │ ├── lexnlp.extract.ml.en.definitions.rst │ ├── lexnlp.extract.ml.en.definitions.tests.rst │ ├── lexnlp.extract.ml.en.rst │ ├── lexnlp.extract.ml.rst │ ├── lexnlp.extract.rst │ ├── lexnlp.nlp.en.rst │ ├── lexnlp.nlp.en.segments.pages.MODULE_PATH.rst │ ├── lexnlp.nlp.en.segments.pages.PAGE_SEGMENTER_MODEL.rst │ ├── lexnlp.nlp.en.segments.pages.build_page_break_features.rst │ ├── lexnlp.nlp.en.segments.pages.get_pages.rst │ ├── lexnlp.nlp.en.segments.paragraphs.MODULE_PATH.rst │ ├── lexnlp.nlp.en.segments.paragraphs.Optional.rst │ ├── lexnlp.nlp.en.segments.paragraphs.PARAGRAPH_SEGMENTER_MODEL.rst │ ├── lexnlp.nlp.en.segments.paragraphs.RE_NEW_LINE.rst │ ├── lexnlp.nlp.en.segments.paragraphs.Union.rst │ ├── lexnlp.nlp.en.segments.paragraphs.build_paragraph_break_features.rst │ ├── lexnlp.nlp.en.segments.paragraphs.get_paragraphs.rst │ ├── lexnlp.nlp.en.segments.paragraphs.splitlines_with_spans.rst │ ├── lexnlp.nlp.en.segments.rst │ ├── lexnlp.nlp.en.segments.sections.MODULE_PATH.rst │ ├── lexnlp.nlp.en.segments.sections.SECTION_SEGMENTER_MODEL.rst │ ├── lexnlp.nlp.en.segments.sections.build_section_break_features.rst │ ├── lexnlp.nlp.en.segments.sections.get_sections.rst │ ├── lexnlp.nlp.en.segments.sentences.Any.rst │ ├── lexnlp.nlp.en.segments.sentences.MODULE_PATH.rst │ ├── lexnlp.nlp.en.segments.sentences.PRE_PROCESS_TEXT_REMOVE.rst │ ├── lexnlp.nlp.en.segments.sentences.SENTENCE_SEGMENTER_MODEL.rst │ ├── lexnlp.nlp.en.segments.sentences.SENTENCE_SPLITTERS.rst │ ├── lexnlp.nlp.en.segments.sentences.SENTENCE_SPLITTERS_LOWER_EXCLUDE.rst │ ├── lexnlp.nlp.en.segments.sentences.STRIP_GROUP.rst │ ├── lexnlp.nlp.en.segments.sentences.Union.rst │ ├── lexnlp.nlp.en.segments.sentences.build_sentence_model.rst │ ├── lexnlp.nlp.en.segments.sentences.extra_abbreviations.rst │ ├── lexnlp.nlp.en.segments.sentences.get_sentence__with_coords_list.rst │ ├── lexnlp.nlp.en.segments.sentences.get_sentence_list.rst │ ├── lexnlp.nlp.en.segments.sentences.get_sentence_span.rst │ ├── lexnlp.nlp.en.segments.sentences.get_sentence_span_list.rst │ ├── lexnlp.nlp.en.segments.sentences.post_process_sentence.rst │ ├── lexnlp.nlp.en.segments.sentences.pre_process_document.rst │ ├── lexnlp.nlp.en.segments.titles.MODULE_PATH.rst │ ├── lexnlp.nlp.en.segments.titles.SECTION_SEGMENTER_MODEL.rst │ ├── lexnlp.nlp.en.segments.titles.UNICODE_CHAR_TOP_CATEGORY_MAPPING.rst │ ├── lexnlp.nlp.en.segments.titles.build_document_title_features.rst │ ├── lexnlp.nlp.en.segments.titles.build_model.rst │ ├── lexnlp.nlp.en.segments.titles.build_title_features.rst │ ├── lexnlp.nlp.en.segments.titles.get_titles.rst │ ├── lexnlp.nlp.en.segments.utils.build_document_distribution.rst │ ├── lexnlp.nlp.en.segments.utils.build_document_line_distribution.rst │ ├── lexnlp.nlp.en.tests.rst │ ├── lexnlp.nlp.en.tokens.BIGRAM_COLLOCATIONS.rst │ ├── lexnlp.nlp.en.tokens.COLLOCATION_SIZE.rst │ ├── lexnlp.nlp.en.tokens.DEFAULT_LEMMATIZER.rst │ ├── lexnlp.nlp.en.tokens.DEFAULT_STEMMER.rst │ ├── lexnlp.nlp.en.tokens.MODULE_PATH.rst │ ├── lexnlp.nlp.en.tokens.STOPWORDS.rst │ ├── lexnlp.nlp.en.tokens.TRIGRAM_COLLOCATIONS.rst │ ├── lexnlp.nlp.en.tokens.get_adjectives.rst │ ├── lexnlp.nlp.en.tokens.get_adverbs.rst │ ├── lexnlp.nlp.en.tokens.get_lemma_list.rst │ ├── lexnlp.nlp.en.tokens.get_lemmas.rst │ ├── lexnlp.nlp.en.tokens.get_nouns.rst │ ├── lexnlp.nlp.en.tokens.get_stem_list.rst │ ├── lexnlp.nlp.en.tokens.get_stems.rst │ ├── lexnlp.nlp.en.tokens.get_token_list.rst │ ├── lexnlp.nlp.en.tokens.get_tokens.rst │ ├── lexnlp.nlp.en.tokens.get_verbs.rst │ ├── lexnlp.nlp.en.tokens.get_wordnet_pos.rst │ ├── lexnlp.nlp.en.transforms.characters.MODULE_PATH.rst │ ├── lexnlp.nlp.en.transforms.characters.get_character_distribution.rst │ ├── lexnlp.nlp.en.transforms.characters.get_character_ngram_distribution.rst │ ├── lexnlp.nlp.en.transforms.rst │ ├── lexnlp.nlp.en.transforms.tokens.MODULE_PATH.rst │ ├── lexnlp.nlp.en.transforms.tokens.get_bigram_distribution.rst │ ├── lexnlp.nlp.en.transforms.tokens.get_ngram_distribution.rst │ ├── lexnlp.nlp.en.transforms.tokens.get_skipgram_distribution.rst │ ├── lexnlp.nlp.en.transforms.tokens.get_token_distribution.rst │ ├── lexnlp.nlp.en.transforms.tokens.get_trigram_distribution.rst │ ├── lexnlp.nlp.rst │ ├── lexnlp.rst │ ├── lexnlp.tests.rst │ ├── lexnlp.utils.lines_processing.rst │ ├── lexnlp.utils.rst │ ├── lexnlp.utils.tests.rst │ ├── lexnlp.utils.unicode.rst │ ├── lexnlp.utils.unicode.tests.rst │ ├── lexnlpprivate.extract.en.addresses.rst │ ├── lexnlpprivate.extract.en.addresses.tests.rst │ ├── lexnlpprivate.extract.en.rst │ ├── lexnlpprivate.extract.rst │ ├── lexnlpprivate.rst │ ├── modules.rst │ └── setup.rst │ ├── changes.rst │ ├── conf.py │ ├── index.rst │ ├── lexnlp.rst │ ├── license.rst │ └── modules │ ├── extract │ ├── de │ │ ├── amounts.rst │ │ ├── citations.rst │ │ ├── dates.rst │ │ ├── durations.rst │ │ └── percents.rst │ ├── en │ │ ├── acts.rst │ │ ├── amounts.rst │ │ ├── citations.rst │ │ ├── companies.rst │ │ ├── conditions.rst │ │ ├── constraints.rst │ │ ├── copyright.rst │ │ ├── courts.rst │ │ ├── cusip.rst │ │ ├── dates.rst │ │ ├── definitions.rst │ │ ├── distances.rst │ │ ├── durations.rst │ │ ├── geoentities.rst │ │ ├── money.rst │ │ ├── percents.rst │ │ ├── pii.rst │ │ ├── ratios.rst │ │ ├── regulations.rst │ │ ├── trademarks.rst │ │ └── urls.rst │ ├── es │ │ └── dates.rst │ └── extract.rst │ └── nlp │ ├── en │ ├── segments_pages.rst │ ├── segments_paragraphs.rst │ ├── segments_sections.rst │ ├── segments_sentences.rst │ ├── segments_titles.rst │ ├── segments_utils.rst │ ├── tokens.rst │ ├── transforms_character.rst │ └── transforms_tokens.rst │ └── nlp.rst ├── index.rst ├── lexnlp ├── __init__.py ├── config │ ├── __init__.py │ ├── de │ │ └── de_courts.csv │ ├── en │ │ ├── __init__.py │ │ ├── au_courts.csv │ │ ├── ca_courts.csv │ │ ├── company_types.csv │ │ ├── company_types.py │ │ ├── geoentities_config.py │ │ ├── us_courts.csv │ │ └── us_state_courts.csv │ ├── es │ │ ├── es_courts.csv │ │ └── es_regulations.csv │ └── stanford.py ├── extract │ ├── __init__.py │ ├── all_locales │ │ ├── __init__.py │ │ ├── amounts.py │ │ ├── citations.py │ │ ├── copyrights.py │ │ ├── court_citations.py │ │ ├── courts.py │ │ ├── dates.py │ │ ├── definitions.py │ │ ├── durations.py │ │ ├── geoentities.py │ │ ├── languages.py │ │ ├── money.py │ │ ├── percents.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ └── test_locales.py │ ├── common │ │ ├── __init__.py │ │ ├── annotation_locator_type.py │ │ ├── annotation_type.py │ │ ├── annotations │ │ │ ├── __init__.py │ │ │ ├── act_annotation.py │ │ │ ├── address_annotation.py │ │ │ ├── amount_annotation.py │ │ │ ├── citation_annotation.py │ │ │ ├── company_annotation.py │ │ │ ├── condition_annotation.py │ │ │ ├── constraint_annotation.py │ │ │ ├── copyright_annotation.py │ │ │ ├── court_annotation.py │ │ │ ├── court_citation_annotation.py │ │ │ ├── cusip_annotation.py │ │ │ ├── date_annotation.py │ │ │ ├── definition_annotation.py │ │ │ ├── distance_annotation.py │ │ │ ├── duration_annotation.py │ │ │ ├── geo_annotation.py │ │ │ ├── law_annotation.py │ │ │ ├── money_annotation.py │ │ │ ├── percent_annotation.py │ │ │ ├── phone_annotation.py │ │ │ ├── phrase_position_finder.py │ │ │ ├── ratio_annotation.py │ │ │ ├── regulation_annotation.py │ │ │ ├── ssn_annotation.py │ │ │ ├── text_annotation.py │ │ │ ├── trademark_annotation.py │ │ │ └── url_annotation.py │ │ ├── base_path.py │ │ ├── copyrights │ │ │ ├── __init__.py │ │ │ ├── copyright_en_style_parser.py │ │ │ ├── copyright_parser.py │ │ │ ├── copyright_parsing_methods.py │ │ │ └── copyright_pattern_found.py │ │ ├── date_parsing │ │ │ ├── __init__.py │ │ │ └── datefinder.py │ │ ├── dates.py │ │ ├── dates_classifier_model.py │ │ ├── definitions │ │ │ ├── __init__.py │ │ │ ├── common_definition_patterns.py │ │ │ ├── definition_match.py │ │ │ └── universal_definition_parser.py │ │ ├── durations │ │ │ ├── __init__.py │ │ │ └── durations_parser.py │ │ ├── entities │ │ │ ├── __init__.py │ │ │ └── entity_banlist.py │ │ ├── fact_extracting.py │ │ ├── geoentity_detector.py │ │ ├── language_dictionary_reader.py │ │ ├── money_detector.py │ │ ├── ocr_rating │ │ │ ├── __init__.py │ │ │ ├── lang_vector_distribution_builder.py │ │ │ ├── ocr_rating_calculator.py │ │ │ └── reference_vectors │ │ │ │ ├── de.pickle │ │ │ │ └── en.pickle │ │ ├── pattern_found.py │ │ ├── special_characters.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── definitions_text_annotator.py │ │ │ ├── test_annotation.py │ │ │ ├── test_date_classifier_model.py │ │ │ ├── test_datefinder.py │ │ │ ├── test_datefinder_tokenizer.py │ │ │ ├── test_entity_banlist.py │ │ │ ├── test_fact_extractor.py │ │ │ ├── test_lang_vector_distribution_builder.py │ │ │ ├── test_ocr_rating.py │ │ │ ├── test_phrase_position_finder.py │ │ │ ├── test_text_beautifier.py │ │ │ └── test_universal_courts_parser.py │ │ ├── text_beautifier.py │ │ ├── text_pattern_collector.py │ │ ├── universal_court_parser.py │ │ └── year_parser.py │ ├── de │ │ ├── __init__.py │ │ ├── amounts.py │ │ ├── citations.py │ │ ├── copyrights.py │ │ ├── court_citations.py │ │ ├── courts.py │ │ ├── data │ │ │ └── abbreviations.txt │ │ ├── date_model.pickle │ │ ├── date_model.py │ │ ├── dates.py │ │ ├── dates_de_classifier.py │ │ ├── de_date_parser.py │ │ ├── definitions.py │ │ ├── durations.py │ │ ├── geoentities.py │ │ ├── language_tokens.py │ │ ├── laws.py │ │ ├── model.pickle │ │ ├── money.py │ │ ├── percents.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── test_amounts.py │ │ │ ├── test_citations.py │ │ │ ├── test_copyrights.py │ │ │ ├── test_court_citations.py │ │ │ ├── test_courts.py │ │ │ ├── test_dates.py │ │ │ ├── test_definitions.py │ │ │ ├── test_durations.py │ │ │ ├── test_geoentities.py │ │ │ ├── test_laws.py │ │ │ ├── test_money.py │ │ │ └── test_percents.py │ ├── en │ │ ├── __init__.py │ │ ├── acts.py │ │ ├── addresses │ │ │ ├── __init__.py │ │ │ ├── address_features.py │ │ │ ├── addresses.py │ │ │ ├── addresses_clf.pickle │ │ │ ├── data │ │ │ │ ├── building_suffixes.csv │ │ │ │ ├── city_name_words.pickle │ │ │ │ ├── nltk_pos_tag_indexes.json │ │ │ │ ├── provinces.txt │ │ │ │ ├── street_directions.csv │ │ │ │ └── street_suffixes.csv │ │ │ └── tests │ │ │ │ ├── __init__.py │ │ │ │ └── test_addresses.py │ │ ├── amounts.py │ │ ├── citations.py │ │ ├── conditions.py │ │ ├── constraints.py │ │ ├── contracts │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── contract_type_detector.py │ │ │ ├── predictors.py │ │ │ └── tests │ │ │ │ ├── __init__.py │ │ │ │ ├── test_contract_type.py │ │ │ │ └── test_contracts.py │ │ ├── copyright.py │ │ ├── courts.py │ │ ├── cusip.py │ │ ├── data │ │ │ ├── abbreviations.txt │ │ │ ├── en_company_banlist.csv │ │ │ └── pronouns.txt │ │ ├── date_model.pickle │ │ ├── date_model.py │ │ ├── dates.py │ │ ├── definition_parsing_methods.py │ │ ├── definitions.py │ │ ├── dict_entities.py │ │ ├── distances.py │ │ ├── durations.py │ │ ├── en_language_tokens.py │ │ ├── entities │ │ │ ├── __init__.py │ │ │ ├── company_detector.py │ │ │ ├── company_np_extractor.py │ │ │ ├── nltk_maxent.py │ │ │ ├── nltk_re.py │ │ │ ├── nltk_tokenizer.py │ │ │ ├── stanford_ner.py │ │ │ └── tests │ │ │ │ ├── __init__.py │ │ │ │ ├── test_get_companies.py │ │ │ │ ├── test_nltk_maxent.py │ │ │ │ └── test_stanford_ner.py │ │ ├── geoentities.py │ │ ├── introductory_words_detector.py │ │ ├── money.py │ │ ├── percents.py │ │ ├── pii.py │ │ ├── preprocessing │ │ │ ├── __init__.py │ │ │ └── span_tokenizer.py │ │ ├── ratios.py │ │ ├── regulations.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── test_acts.py │ │ │ ├── test_amounts.py │ │ │ ├── test_amounts_plain.py │ │ │ ├── test_citations.py │ │ │ ├── test_citations_plain.py │ │ │ ├── test_conditions.py │ │ │ ├── test_conditions_plain.py │ │ │ ├── test_constraints.py │ │ │ ├── test_constraints_plain.py │ │ │ ├── test_copyright.py │ │ │ ├── test_copyright_plain.py │ │ │ ├── test_courts.py │ │ │ ├── test_courts_plain.py │ │ │ ├── test_cusip.py │ │ │ ├── test_dates.py │ │ │ ├── test_dates_plain.py │ │ │ ├── test_definitions.py │ │ │ ├── test_definitions_template.py │ │ │ ├── test_dict_entities.py │ │ │ ├── test_distance.py │ │ │ ├── test_distances_plain.py │ │ │ ├── test_durations.py │ │ │ ├── test_durations_plain.py │ │ │ ├── test_geoentities.py │ │ │ ├── test_geoentities_plain.py │ │ │ ├── test_introductory_words_detector.py │ │ │ ├── test_money.py │ │ │ ├── test_money_plain.py │ │ │ ├── test_parsing_speed.py │ │ │ ├── test_percent_plain.py │ │ │ ├── test_percents.py │ │ │ ├── test_phone_plain.py │ │ │ ├── test_pii.py │ │ │ ├── test_ratios.py │ │ │ ├── test_ratios_plain.py │ │ │ ├── test_regulations.py │ │ │ ├── test_regulations_plain.py │ │ │ ├── test_span_tokenizer.py │ │ │ ├── test_ssn_plain.py │ │ │ ├── test_trademarks.py │ │ │ ├── test_trademarks_plain.py │ │ │ ├── test_urls.py │ │ │ └── test_urls_plain.py │ │ ├── trademarks.py │ │ ├── urls.py │ │ └── utils.py │ ├── es │ │ ├── __init__.py │ │ ├── copyrights.py │ │ ├── courts.py │ │ ├── dates.py │ │ ├── definitions.py │ │ ├── language_tokens.py │ │ ├── regulations.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── test_copyrights.py │ │ │ ├── test_courts.py │ │ │ ├── test_dates.py │ │ │ ├── test_definitions.py │ │ │ └── test_regulations.py │ └── ml │ │ ├── __init__.py │ │ ├── classifier │ │ ├── __init__.py │ │ ├── base_token_sequence_classifier_model.py │ │ ├── data │ │ │ ├── unicode_character_categories.pickle │ │ │ ├── unicode_character_category_mapping.pickle │ │ │ └── unicode_character_top_category_mapping.pickle │ │ ├── spacy_token_sequence_model.py │ │ └── token_sequence_model.py │ │ ├── detector │ │ ├── __init__.py │ │ ├── artifact_detector.py │ │ ├── detecting_settings.py │ │ ├── phrase_constructor.py │ │ ├── sample_processor.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ └── test_phrase_constructor.py │ │ ├── en │ │ ├── __init__.py │ │ ├── data │ │ │ └── definition_model_layered.pickle.gzip │ │ └── definitions │ │ │ ├── __init__.py │ │ │ ├── definition_phrase_detector.py │ │ │ ├── definition_term_detector.py │ │ │ ├── layered_definition_detector.py │ │ │ └── tests │ │ │ ├── __init__.py │ │ │ └── test_layered_definition_detector.py │ │ └── environment.py ├── ml │ ├── README.md │ ├── __init__.py │ ├── catalog │ │ ├── __init__.py │ │ └── download.py │ ├── gensim_utils.py │ ├── normalizers.py │ ├── predictor.py │ ├── sklearn_transformers.py │ └── vectorizers.py ├── nlp │ ├── __init__.py │ ├── en │ │ ├── __init__.py │ │ ├── collocation_bigrams_100.pickle │ │ ├── collocation_bigrams_1000.pickle │ │ ├── collocation_bigrams_10000.pickle │ │ ├── collocation_bigrams_100000.pickle │ │ ├── collocation_bigrams_50000.pickle │ │ ├── collocation_trigrams_100.pickle │ │ ├── collocation_trigrams_1000.pickle │ │ ├── collocation_trigrams_10000.pickle │ │ ├── collocation_trigrams_100000.pickle │ │ ├── collocation_trigrams_50000.pickle │ │ ├── segments │ │ │ ├── __init__.py │ │ │ ├── heading_heuristics.py │ │ │ ├── page_segmenter.pickle │ │ │ ├── pages.py │ │ │ ├── paragraph_segmenter.pickle │ │ │ ├── paragraphs.py │ │ │ ├── section_segmenter.pickle │ │ │ ├── sections.py │ │ │ ├── sentence_segmenter.pickle │ │ │ ├── sentences.py │ │ │ ├── title_locator.pickle │ │ │ ├── titles.py │ │ │ └── utils.py │ │ ├── stanford.py │ │ ├── stopwords.pickle │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── test_heading_heuristics.py │ │ │ ├── test_pages.py │ │ │ ├── test_paragraphs.py │ │ │ ├── test_sections.py │ │ │ ├── test_sentences.py │ │ │ ├── test_stanford.py │ │ │ ├── test_stanford_missing.py │ │ │ ├── test_titles.py │ │ │ ├── test_tokens.py │ │ │ └── test_transforms.py │ │ ├── tokens.py │ │ └── transforms │ │ │ ├── __init__.py │ │ │ ├── characters.py │ │ │ └── tokens.py │ └── train │ │ ├── __init__.py │ │ ├── en │ │ ├── __init__.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_train_section_segmentizer.py │ │ └── train_section_segmanizer.py │ │ └── train_data_manager.py ├── tests │ ├── __init__.py │ ├── dictionary_comparer.py │ ├── lexnlp_tests.py │ ├── tests │ │ ├── test_lexnlp_tests.py │ │ └── test_upload_benchmarks.py │ ├── typed_annotations_tests.py │ ├── upload_benchmarks.py │ ├── utility_for_testing.py │ └── values_comparer.py └── utils │ ├── __init__.py │ ├── amount_delimiting.py │ ├── decorators.py │ ├── iterating_helpers.py │ ├── lines_processing │ ├── __init__.py │ ├── line_processor.py │ ├── parsed_text_corrector.py │ ├── parsed_text_quality_estimator.py │ └── phrase_finder.py │ ├── map.py │ ├── parse_df.py │ ├── pos_adjustments.py │ ├── tests │ ├── __init__.py │ ├── test_line_processor.py │ ├── test_map.py │ ├── test_parse_df.py │ ├── test_parsed_text_corrector.py │ ├── test_parsed_text_quality_estimator.py │ └── test_phrase_finder.py │ ├── unicode │ ├── __init__.py │ ├── tests │ │ ├── __init__.py │ │ └── test_unicode_lookup.py │ ├── unicode_character_categories.pickle │ ├── unicode_character_category_mapping.pickle │ ├── unicode_character_top_category_mapping.pickle │ └── unicode_lookup.py │ └── unpickler.py ├── libs ├── download_stanford_nlp.sh └── download_wiki.sh ├── notebooks ├── classification │ ├── contract-type-classifier │ │ ├── 0_download_corpora.ipynb │ │ ├── 1_preprocess_training_data.ipynb │ │ ├── 2_train_gensim_doc2vec_model.ipynb │ │ └── 3_train_sklearn_is_contract_classifier.ipynb │ └── is-contract-classifier │ │ ├── 0_download_corpora.ipynb │ │ ├── 1_preprocess_training_data.ipynb │ │ ├── 2_train_gensim_doc2vec_model.ipynb │ │ └── 3_train_sklearn_is_contract_classifier.ipynb ├── embeddings │ ├── 10k │ │ ├── build_word2vec_model.ipynb │ │ ├── build_word2vec_model_spacy.ipynb │ │ ├── build_word2vec_model_spacy.py │ │ └── test_w2v.ipynb │ └── contracts │ │ ├── build_doc2vec_model_all.py │ │ ├── build_word2vec_model.ipynb │ │ ├── build_word2vec_model_all.py │ │ ├── build_word2vec_model_articles.py │ │ ├── build_word2vec_model_credit.py │ │ ├── build_word2vec_model_employment.py │ │ ├── build_word2vec_model_leases.py │ │ ├── build_word2vec_model_operating.py │ │ ├── build_word2vec_model_services.py │ │ ├── build_word2vec_model_underwriting.py │ │ └── contract_classifier │ │ ├── build_classifier_doc2vec.ipynb │ │ ├── build_classifier_doc2vec_v2.ipynb │ │ └── build_classifier_word2vec.ipynb ├── extraction │ ├── employment │ │ └── code_employment.ipynb │ └── en │ │ ├── build_date_locator.ipynb │ │ ├── build_duration_locator.ipynb │ │ ├── test_dates.ipynb │ │ └── test_durations.ipynb └── nlp │ └── en │ ├── build_collocation_pickle.py │ ├── build_stopword_pickle.py │ ├── page_segmentation.ipynb │ ├── paragraph_segmentation.ipynb │ ├── section_segmentation.ipynb │ ├── sentence_segmentation.ipynb │ ├── stopwords_collocations.ipynb │ ├── term_locator_example.ipynb │ ├── test_segmenter.ipynb │ └── test_tokens.ipynb ├── python-requirements-dev.txt ├── python-requirements-full.txt ├── python-requirements-notes.txt ├── python-requirements.txt ├── readthedocs.yml ├── scripts ├── create_release_branch.sh ├── download_contract_samples.sh ├── download_tika.sh ├── run_tika.sh └── unify_py_file_structure.py ├── setup.py └── test_data ├── 1007273_2014-03-11_2 ├── 1031296_2004-11-04 ├── 1100644_2016-11-21 ├── 1205332_2008-05-08_3 ├── 1582586_2015-08-31 ├── lexnlp ├── extract │ ├── common │ │ ├── entities │ │ │ ├── en_banlist_full.csv │ │ │ └── en_banlist_one_col.csv │ │ └── ocr_grade │ │ │ ├── lorem_ipsum.txt │ │ │ ├── pretty_en_file.txt │ │ │ └── totem_und_tabu.txt │ ├── de │ │ ├── laws │ │ │ ├── de_concept_sample.csv │ │ │ ├── gesetze_list.csv │ │ │ └── verordnungen_list.csv │ │ ├── sample_de_court_citations01.txt │ │ ├── sample_de_courts01.txt │ │ ├── sample_de_courts02.txt │ │ ├── sample_de_definitions01.txt │ │ ├── sample_de_definitions02.txt │ │ ├── sample_de_definitions03.txt │ │ └── sample_de_definitions04.txt │ ├── en │ │ ├── addresses │ │ │ └── tests │ │ │ │ └── test_addresses │ │ │ │ ├── test_bad_cases.csv │ │ │ │ └── test_get_address.csv │ │ ├── contracts │ │ │ └── tests │ │ │ │ └── test_contracts │ │ │ │ └── test_is_contract.csv │ │ ├── copyrights │ │ │ └── bigfile.txt │ │ ├── courts │ │ │ └── courts_sample_01.txt │ │ ├── definitions │ │ │ ├── definitions_fp_collections.txt │ │ │ ├── definitions_hit_or_miss.txt │ │ │ ├── en_definitions_sample_doc.txt │ │ │ └── pure_definitions.txt │ │ ├── entities │ │ │ └── tests │ │ │ │ ├── test_nltk_maxent │ │ │ │ ├── test_companies.csv │ │ │ │ ├── test_companies_count.csv │ │ │ │ ├── test_companies_rs.csv │ │ │ │ ├── test_gpe_in.csv │ │ │ │ ├── test_gpes.csv │ │ │ │ ├── test_gpes_rs.csv │ │ │ │ ├── test_noun_phrases.csv │ │ │ │ ├── test_person_in.csv │ │ │ │ ├── test_persons.csv │ │ │ │ └── test_persons_rs.csv │ │ │ │ ├── test_nltk_re │ │ │ │ ├── test_companies_in_article.csv │ │ │ │ ├── test_company_article_regex.csv │ │ │ │ ├── test_company_as.csv │ │ │ │ └── test_company_regex.csv │ │ │ │ └── test_stanford_ner │ │ │ │ ├── test_stanford_locations.csv │ │ │ │ ├── test_stanford_name_example_in.csv │ │ │ │ └── test_stanford_org_example_in.csv │ │ └── tests │ │ │ ├── test_amounts │ │ │ ├── test_get_amount.csv │ │ │ ├── test_get_amount_non_round_float.csv │ │ │ └── test_get_amount_source.csv │ │ │ ├── test_citations │ │ │ └── test_get_citations.csv │ │ │ ├── test_conditions │ │ │ └── test_condition_fixed_example.csv │ │ │ ├── test_constraints │ │ │ └── test_constraint_fixed_example.csv │ │ │ ├── test_copyright │ │ │ └── test_copyright.csv │ │ │ ├── test_courts │ │ │ ├── test_courts.csv │ │ │ ├── test_courts_longest_match.csv │ │ │ ├── test_courts_rs.csv │ │ │ └── us_courts.csv │ │ │ ├── test_dates │ │ │ ├── test_fixed_dates.csv │ │ │ ├── test_fixed_dates_nonstrict.csv │ │ │ ├── test_fixed_dates_source.csv │ │ │ └── test_fixed_raw_dates.csv │ │ │ ├── test_definitions │ │ │ ├── bad_def.txt │ │ │ ├── test_definition_fixed.csv │ │ │ └── test_definition_in_sentences.csv │ │ │ ├── test_dict_entities │ │ │ └── test_normalize_text.csv │ │ │ ├── test_distance │ │ │ ├── test_get_distance.csv │ │ │ └── test_get_distance_source.csv │ │ │ ├── test_durations │ │ │ ├── test_get_durations.csv │ │ │ └── test_get_durations_source.csv │ │ │ ├── test_geoentities │ │ │ ├── geoaliases.csv │ │ │ ├── geoentities.csv │ │ │ ├── test_geoentities.csv │ │ │ ├── test_geoentities_alias_filtering.csv │ │ │ ├── test_geoentities_en_equal_match_take_lowest_id.csv │ │ │ └── test_geoentities_en_equal_match_take_top_prio.csv │ │ │ ├── test_money │ │ │ ├── test_get_money.csv │ │ │ └── test_get_money_source.csv │ │ │ ├── test_percents │ │ │ ├── test_get_percents.csv │ │ │ └── test_get_percents_source.csv │ │ │ ├── test_pii │ │ │ ├── test_pii_list.csv │ │ │ ├── test_pii_list_source.csv │ │ │ ├── test_ssn_list.csv │ │ │ ├── test_ssn_list_source.csv │ │ │ ├── test_us_phone_list.csv │ │ │ └── test_us_phone_list_source.csv │ │ │ ├── test_ratios │ │ │ ├── test_get_ratios.csv │ │ │ └── test_get_ratios_source.csv │ │ │ ├── test_regulations │ │ │ └── test_get_regulations.csv │ │ │ ├── test_trademarks │ │ │ └── test_trademarks.csv │ │ │ └── test_urls │ │ │ └── test_urls.csv │ └── es │ │ ├── definitions │ │ └── eula.txt │ │ └── sample_es_regulations.txt ├── ml │ └── en │ │ └── layered_definitions_train_data.jsonl ├── nlp │ └── en │ │ ├── heading │ │ ├── heading_doc_paragraphs.csv │ │ ├── heading_doc_sections.txt │ │ ├── heading_doc_sentences.txt │ │ └── heading_document.txt │ │ └── tests │ │ ├── test_pages │ │ └── test_page_examples.csv │ │ ├── test_paragraphs │ │ └── test_paragraph_examples.csv │ │ ├── test_sections │ │ └── skewed_document.txt │ │ ├── test_sentences │ │ ├── test_pre_process_document.csv │ │ └── test_sentence_segmenter.csv │ │ ├── test_stanford │ │ ├── test_stanford_noun_lemmas.csv │ │ ├── test_stanford_nouns.csv │ │ ├── test_stanford_tokens.csv │ │ ├── test_stanford_tokens_lc.csv │ │ ├── test_stanford_tokens_lc_sw.csv │ │ ├── test_stanford_tokens_sw.csv │ │ ├── test_stanford_verb_lemmas.csv │ │ └── test_stanford_verbs.csv │ │ └── test_tokens │ │ ├── test_adjectives.csv │ │ ├── test_adjectives_lemma.csv │ │ ├── test_adverbs.csv │ │ ├── test_adverbs_lemma.csv │ │ ├── test_lemmas.csv │ │ ├── test_lemmas_lc.csv │ │ ├── test_lemmas_lc_sw.csv │ │ ├── test_lemmas_sw.csv │ │ ├── test_nouns.csv │ │ ├── test_nouns_lemma.csv │ │ ├── test_stems.csv │ │ ├── test_stems_lowercase.csv │ │ ├── test_stems_lowercase_no_stopwords.csv │ │ ├── test_verb_lemmas.csv │ │ └── test_verbs.csv ├── typed_annotations │ ├── de │ │ ├── amount │ │ │ └── amounts.txt │ │ ├── citation │ │ │ └── citations.txt │ │ ├── copyright │ │ │ └── copyrights.txt │ │ ├── court │ │ │ └── courts.txt │ │ ├── court_citation │ │ │ └── court_citations.txt │ │ ├── date │ │ │ └── dates.txt │ │ ├── definition │ │ │ └── definitions.txt │ │ ├── duration │ │ │ └── durations.txt │ │ ├── geoentity │ │ │ └── geoentities.txt │ │ ├── law │ │ │ └── laws.txt │ │ └── percent │ │ │ └── percents.txt │ ├── en │ │ ├── act │ │ │ └── acts.txt │ │ ├── amount │ │ │ └── amounts.txt │ │ ├── citation │ │ │ └── citations.txt │ │ ├── condition │ │ │ └── conditions.txt │ │ ├── constraint │ │ │ └── constraints.txt │ │ ├── copyright │ │ │ └── copyrights.txt │ │ ├── court │ │ │ └── courts.txt │ │ ├── cusip │ │ │ └── cusips.txt │ │ ├── date │ │ │ └── dates.txt │ │ ├── definition │ │ │ └── definitions.txt │ │ ├── distance │ │ │ └── distances.txt │ │ ├── duration │ │ │ └── durations.txt │ │ ├── geoentity │ │ │ └── geoentities.txt │ │ ├── money │ │ │ └── money.txt │ │ ├── percent │ │ │ └── percents.txt │ │ ├── phone │ │ │ └── phones.txt │ │ ├── ratio │ │ │ └── ratios.txt │ │ ├── regulation │ │ │ └── regulations.txt │ │ ├── ssn │ │ │ └── ssn.txt │ │ ├── trademark │ │ │ └── trademarks.txt │ │ └── url │ │ │ └── urls.txt │ └── es │ │ ├── copyright │ │ └── copyrights.txt │ │ ├── court │ │ └── courts.txt │ │ ├── date │ │ └── dates.txt │ │ ├── definition │ │ └── definitions.txt │ │ └── regulation │ │ └── regulations.txt └── utils │ ├── parsing │ ├── pdf_malformat_parsed_default.txt │ ├── pdf_malformat_parsed_stripper.txt │ └── text_abusing_headers.txt │ └── unicode_data.txt ├── long_parsed_text.txt ├── output ├── .gitkeep ├── de_definitions_01.html └── es_definitions_01.html ├── sample_es_regulations.html ├── table_sample.pdf ├── tabular02.pdf ├── test_get_section_spans_1.txt └── test_lexnlp_tests └── test_test_extraction_func_on_test_data.csv /.pylintrc: -------------------------------------------------------------------------------- 1 | [FORMAT] 2 | max-line-length=120 3 | ignore-long-lines=^\s*(# )??$ 4 | 5 | 6 | [MISCELLANEOUS] 7 | notes=FIXME,TODO 8 | 9 | [MESSAGES CONTROL] 10 | disable=r,c,w0511 11 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include README.rst 3 | include index.rst 4 | include Pipfile 5 | include Pipfile.lock 6 | recursive-include lexnlp *.pickle 7 | recursive-include lexnlp/extract/en/addresses *.json *.txt *.xml 8 | recursive-include lexnlp *.csv 9 | recursive-include libs * 10 | recursive-include scripts * 11 | recursive-include documentation * 12 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | beautifulsoup4 = "*" 8 | cloudpickle = "*" 9 | Cython = "*" 10 | dateparser = "*" 11 | elasticsearch = "*" 12 | gensim = "==4.1.2" 13 | importlib-metadata = "*" 14 | joblib = "*" 15 | lxml = "*" 16 | nltk = "*" 17 | num2words = "*" 18 | pandas = "*" 19 | psutil = "*" 20 | pycountry = "*" 21 | python-dateutil = "*" 22 | regex = "*" 23 | reporters-db = "*" 24 | requests = "*" 25 | scikit-learn = "==0.24" 26 | scipy = "*" 27 | tqdm = "*" 28 | unidecode = "*" 29 | us = "*" 30 | zahlwort2num = "*" 31 | numpy = "*" 32 | 33 | [dev-packages] 34 | coverage = "*" 35 | memory-profiler = "*" 36 | nose = "*" 37 | pylint = "*" 38 | pytest = "*" 39 | sphinx = "*" 40 | 41 | [requires] 42 | python_version = "3.8" 43 | -------------------------------------------------------------------------------- /documentation/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = LexNLP 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.config.en.rst: -------------------------------------------------------------------------------- 1 | lexnlp.config.en package 2 | ======================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.config.en.company\_types module 8 | -------------------------------------- 9 | 10 | .. automodule:: lexnlp.config.en.company_types 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | lexnlp.config.en.geoentities\_config module 16 | ------------------------------------------- 17 | 18 | .. automodule:: lexnlp.config.en.geoentities_config 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.config.en 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.config.rst: -------------------------------------------------------------------------------- 1 | lexnlp.config package 2 | ===================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.config.en 11 | 12 | Submodules 13 | ---------- 14 | 15 | lexnlp.config.stanford module 16 | ----------------------------- 17 | 18 | .. automodule:: lexnlp.config.stanford 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.config 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.all_locales.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.all\_locales.tests package 2 | ========================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.all\_locales.tests.test\_locales module 8 | ------------------------------------------------------ 9 | 10 | .. automodule:: lexnlp.extract.all_locales.tests.test_locales 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.extract.all_locales.tests 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.common.date_parsing.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.common.date\_parsing package 2 | =========================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.common.date\_parsing.datefinder module 8 | ----------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.common.date_parsing.datefinder 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.extract.common.date_parsing 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.common.definitions.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.common.definitions package 2 | ========================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.common.definitions.common\_definition\_patterns module 8 | --------------------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.common.definitions.common_definition_patterns 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | lexnlp.extract.common.definitions.definition\_match module 16 | ---------------------------------------------------------- 17 | 18 | .. automodule:: lexnlp.extract.common.definitions.definition_match 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | lexnlp.extract.common.definitions.universal\_definition\_parser module 24 | ---------------------------------------------------------------------- 25 | 26 | .. automodule:: lexnlp.extract.common.definitions.universal_definition_parser 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: lexnlp.extract.common.definitions 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.common.durations.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.common.durations package 2 | ======================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.common.durations.durations\_parser module 8 | -------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.common.durations.durations_parser 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.extract.common.durations 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.common.entities.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.common.entities package 2 | ====================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.common.entities.entity\_banlist module 8 | ----------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.common.entities.entity_banlist 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.extract.common.entities 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.common.ocr_rating.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.common.ocr\_rating package 2 | ========================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.common.ocr\_rating.lang\_vector\_distribution\_builder module 8 | ---------------------------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.common.ocr_rating.lang_vector_distribution_builder 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | lexnlp.extract.common.ocr\_rating.ocr\_rating\_calculator module 16 | ---------------------------------------------------------------- 17 | 18 | .. automodule:: lexnlp.extract.common.ocr_rating.ocr_rating_calculator 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.extract.common.ocr_rating 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.en.addresses.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.addresses package 2 | =================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.extract.en.addresses.tests 11 | 12 | Submodules 13 | ---------- 14 | 15 | lexnlp.extract.en.addresses.address\_features module 16 | ---------------------------------------------------- 17 | 18 | .. automodule:: lexnlp.extract.en.addresses.address_features 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | lexnlp.extract.en.addresses.addresses module 24 | -------------------------------------------- 25 | 26 | .. automodule:: lexnlp.extract.en.addresses.addresses 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: lexnlp.extract.en.addresses 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.en.addresses.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.addresses.tests package 2 | ========================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.en.addresses.tests.test\_addresses module 8 | -------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.en.addresses.tests.test_addresses 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.extract.en.addresses.tests 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.en.contracts.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.contracts package 2 | =================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.extract.en.contracts.tests 11 | 12 | Submodules 13 | ---------- 14 | 15 | lexnlp.extract.en.contracts.contract\_type\_detector module 16 | ----------------------------------------------------------- 17 | 18 | .. automodule:: lexnlp.extract.en.contracts.contract_type_detector 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | lexnlp.extract.en.contracts.predictors module 24 | --------------------------------------------- 25 | 26 | .. automodule:: lexnlp.extract.en.contracts.predictors 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: lexnlp.extract.en.contracts 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.en.contracts.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.contracts.tests package 2 | ========================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.en.contracts.tests.test\_contract\_type module 8 | ------------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.en.contracts.tests.test_contract_type 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | lexnlp.extract.en.contracts.tests.test\_contracts module 16 | -------------------------------------------------------- 17 | 18 | .. automodule:: lexnlp.extract.en.contracts.tests.test_contracts 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.extract.en.contracts.tests 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.en.entities.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.entities.tests package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.en.entities.tests.test\_get\_companies module 8 | ------------------------------------------------------------ 9 | 10 | .. automodule:: lexnlp.extract.en.entities.tests.test_get_companies 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | lexnlp.extract.en.entities.tests.test\_nltk\_maxent module 16 | ---------------------------------------------------------- 17 | 18 | .. automodule:: lexnlp.extract.en.entities.tests.test_nltk_maxent 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | lexnlp.extract.en.entities.tests.test\_stanford\_ner module 24 | ----------------------------------------------------------- 25 | 26 | .. automodule:: lexnlp.extract.en.entities.tests.test_stanford_ner 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: lexnlp.extract.en.entities.tests 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.en.preprocessing.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.preprocessing package 2 | ======================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.en.preprocessing.span\_tokenizer module 8 | ------------------------------------------------------ 9 | 10 | .. automodule:: lexnlp.extract.en.preprocessing.span_tokenizer 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.extract.en.preprocessing 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.ml.classifier.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.ml.classifier package 2 | ==================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.ml.classifier.base\_token\_sequence\_classifier\_model module 8 | ---------------------------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.ml.classifier.base_token_sequence_classifier_model 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | lexnlp.extract.ml.classifier.spacy\_token\_sequence\_model module 16 | ----------------------------------------------------------------- 17 | 18 | .. automodule:: lexnlp.extract.ml.classifier.spacy_token_sequence_model 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | lexnlp.extract.ml.classifier.token\_sequence\_model module 24 | ---------------------------------------------------------- 25 | 26 | .. automodule:: lexnlp.extract.ml.classifier.token_sequence_model 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: lexnlp.extract.ml.classifier 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.ml.detector.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.ml.detector.tests package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.ml.detector.tests.test\_phrase\_constructor module 8 | ----------------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.ml.detector.tests.test_phrase_constructor 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.extract.ml.detector.tests 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.ml.en.definitions.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.ml.en.definitions.tests package 2 | ============================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.ml.en.definitions.tests.test\_layered\_definition\_detector module 8 | --------------------------------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.ml.en.definitions.tests.test_layered_definition_detector 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.extract.ml.en.definitions.tests 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.ml.en.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.ml.en package 2 | ============================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.extract.ml.en.definitions 11 | 12 | Module contents 13 | --------------- 14 | 15 | .. automodule:: lexnlp.extract.ml.en 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.ml.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.ml package 2 | ========================= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.extract.ml.classifier 11 | lexnlp.extract.ml.detector 12 | lexnlp.extract.ml.en 13 | 14 | Submodules 15 | ---------- 16 | 17 | lexnlp.extract.ml.environment module 18 | ------------------------------------ 19 | 20 | .. automodule:: lexnlp.extract.ml.environment 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | Module contents 26 | --------------- 27 | 28 | .. automodule:: lexnlp.extract.ml 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.extract.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract package 2 | ====================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.extract.all_locales 11 | lexnlp.extract.common 12 | lexnlp.extract.de 13 | lexnlp.extract.en 14 | lexnlp.extract.es 15 | lexnlp.extract.ml 16 | 17 | Module contents 18 | --------------- 19 | 20 | .. automodule:: lexnlp.extract 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.ml.catalog.rst: -------------------------------------------------------------------------------- 1 | lexnlp.ml.catalog package 2 | ========================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.ml.catalog.download module 8 | --------------------------------- 9 | 10 | .. automodule:: lexnlp.ml.catalog.download 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.ml.catalog 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.nlp.en.rst: -------------------------------------------------------------------------------- 1 | lexnlp.nlp.en package 2 | ===================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.nlp.en.segments 11 | lexnlp.nlp.en.tests 12 | lexnlp.nlp.en.transforms 13 | 14 | Submodules 15 | ---------- 16 | 17 | lexnlp.nlp.en.stanford module 18 | ----------------------------- 19 | 20 | .. automodule:: lexnlp.nlp.en.stanford 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | lexnlp.nlp.en.tokens module 26 | --------------------------- 27 | 28 | .. automodule:: lexnlp.nlp.en.tokens 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | Module contents 34 | --------------- 35 | 36 | .. automodule:: lexnlp.nlp.en 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.nlp.en.transforms.rst: -------------------------------------------------------------------------------- 1 | lexnlp.nlp.en.transforms package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.nlp.en.transforms.characters module 8 | ------------------------------------------ 9 | 10 | .. automodule:: lexnlp.nlp.en.transforms.characters 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | lexnlp.nlp.en.transforms.tokens module 16 | -------------------------------------- 17 | 18 | .. automodule:: lexnlp.nlp.en.transforms.tokens 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.nlp.en.transforms 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.nlp.rst: -------------------------------------------------------------------------------- 1 | lexnlp.nlp package 2 | ================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.nlp.en 11 | lexnlp.nlp.train 12 | 13 | Module contents 14 | --------------- 15 | 16 | .. automodule:: lexnlp.nlp 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.nlp.train.en.rst: -------------------------------------------------------------------------------- 1 | lexnlp.nlp.train.en package 2 | =========================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.nlp.train.en.tests 11 | 12 | Submodules 13 | ---------- 14 | 15 | lexnlp.nlp.train.en.train\_section\_segmanizer module 16 | ----------------------------------------------------- 17 | 18 | .. automodule:: lexnlp.nlp.train.en.train_section_segmanizer 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.nlp.train.en 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.nlp.train.en.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.nlp.train.en.tests package 2 | ================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.nlp.train.en.tests.test\_train\_section\_segmentizer module 8 | ------------------------------------------------------------------ 9 | 10 | .. automodule:: lexnlp.nlp.train.en.tests.test_train_section_segmentizer 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.nlp.train.en.tests 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.nlp.train.rst: -------------------------------------------------------------------------------- 1 | lexnlp.nlp.train package 2 | ======================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.nlp.train.en 11 | 12 | Submodules 13 | ---------- 14 | 15 | lexnlp.nlp.train.train\_data\_manager module 16 | -------------------------------------------- 17 | 18 | .. automodule:: lexnlp.nlp.train.train_data_manager 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.nlp.train 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.rst: -------------------------------------------------------------------------------- 1 | lexnlp package 2 | ============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.config 11 | lexnlp.extract 12 | lexnlp.ml 13 | lexnlp.nlp 14 | lexnlp.tests 15 | lexnlp.utils 16 | 17 | Module contents 18 | --------------- 19 | 20 | .. automodule:: lexnlp 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.utils.unicode.rst: -------------------------------------------------------------------------------- 1 | lexnlp.utils.unicode package 2 | ============================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | lexnlp.utils.unicode.tests 11 | 12 | Submodules 13 | ---------- 14 | 15 | lexnlp.utils.unicode.unicode\_lookup module 16 | ------------------------------------------- 17 | 18 | .. automodule:: lexnlp.utils.unicode.unicode_lookup 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.utils.unicode 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/api/lexnlp.utils.unicode.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.utils.unicode.tests package 2 | ================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.utils.unicode.tests.test\_unicode\_lookup module 8 | ------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.utils.unicode.tests.test_unicode_lookup 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.utils.unicode.tests 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/api/modules.rst: -------------------------------------------------------------------------------- 1 | lexnlp 2 | ====== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | lexnlp 8 | -------------------------------------------------------------------------------- /documentation/docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=LexNLP 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /documentation/docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_rtd_theme 3 | sphinx-markdown-tables 4 | recommonmark 5 | pyyaml -------------------------------------------------------------------------------- /documentation/docs/source/_static/img/lexnlp_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/documentation/docs/source/_static/img/lexnlp_logo.png -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.config.en.rst: -------------------------------------------------------------------------------- 1 | lexnlp.config.en package 2 | ======================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.config.en.company\_types module 8 | -------------------------------------- 9 | 10 | .. automodule:: lexnlp.config.en.company_types 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | lexnlp.config.en.geoentities\_config module 16 | ------------------------------------------- 17 | 18 | .. automodule:: lexnlp.config.en.geoentities_config 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: lexnlp.config.en 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.config.rst: -------------------------------------------------------------------------------- 1 | lexnlp.config package 2 | ===================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.config.en 10 | 11 | Submodules 12 | ---------- 13 | 14 | lexnlp.config.stanford module 15 | ----------------------------- 16 | 17 | .. automodule:: lexnlp.config.stanford 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.config 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.common.date_parsing.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.common.date\_parsing package 2 | =========================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.common.date\_parsing.datefinder module 8 | ----------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.common.date_parsing.datefinder 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: lexnlp.extract.common.date_parsing 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.common.durations.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.common.durations package 2 | ======================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.common.durations.durations\_parser module 8 | -------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.common.durations.durations_parser 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: lexnlp.extract.common.durations 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.addresses.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.addresses package 2 | =================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.extract.en.addresses.tests 10 | 11 | Submodules 12 | ---------- 13 | 14 | lexnlp.extract.en.addresses.address\_features module 15 | ---------------------------------------------------- 16 | 17 | .. automodule:: lexnlp.extract.en.addresses.address_features 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | lexnlp.extract.en.addresses.addresses module 23 | -------------------------------------------- 24 | 25 | .. automodule:: lexnlp.extract.en.addresses.addresses 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: lexnlp.extract.en.addresses 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.addresses.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.addresses.tests package 2 | ========================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.en.addresses.tests.test\_addresses module 8 | -------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.en.addresses.tests.test_addresses 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: lexnlp.extract.en.addresses.tests 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.amounts.get_amounts.rst: -------------------------------------------------------------------------------- 1 | get_amounts 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.extract.en.amounts 5 | 6 | .. autofunction:: get_amounts 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.amounts.get_np.rst: -------------------------------------------------------------------------------- 1 | get_np 2 | ====== 3 | 4 | .. currentmodule:: lexnlp.extract.en.amounts 5 | 6 | .. autofunction:: get_np 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.amounts.text2num.rst: -------------------------------------------------------------------------------- 1 | text2num 2 | ======== 3 | 4 | .. currentmodule:: lexnlp.extract.en.amounts 5 | 6 | .. autofunction:: text2num 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.citations.get_citations.rst: -------------------------------------------------------------------------------- 1 | get_citations 2 | ============= 3 | 4 | .. currentmodule:: lexnlp.extract.en.citations 5 | 6 | .. autofunction:: get_citations 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.conditions.create_condition_pattern.rst: -------------------------------------------------------------------------------- 1 | create_condition_pattern 2 | ======================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.conditions 5 | 6 | .. autofunction:: create_condition_pattern 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.conditions.get_conditions.rst: -------------------------------------------------------------------------------- 1 | get_conditions 2 | ============== 3 | 4 | .. currentmodule:: lexnlp.extract.en.conditions 5 | 6 | .. autofunction:: get_conditions 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.constraints.create_constraint_pattern.rst: -------------------------------------------------------------------------------- 1 | create_constraint_pattern 2 | ========================= 3 | 4 | .. currentmodule:: lexnlp.extract.en.constraints 5 | 6 | .. autofunction:: create_constraint_pattern 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.constraints.get_constraints.rst: -------------------------------------------------------------------------------- 1 | get_constraints 2 | =============== 3 | 4 | .. currentmodule:: lexnlp.extract.en.constraints 5 | 6 | .. autofunction:: get_constraints 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.contracts.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.contracts package 2 | =================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.extract.en.contracts.tests 10 | 11 | Submodules 12 | ---------- 13 | 14 | lexnlp.extract.en.contracts.detector module 15 | ------------------------------------------- 16 | 17 | .. automodule:: lexnlp.extract.en.contracts.detector 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.extract.en.contracts 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.contracts.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.contracts.tests package 2 | ========================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.en.contracts.tests.test\_contracts module 8 | -------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.en.contracts.tests.test_contracts 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: lexnlp.extract.en.contracts.tests 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.copyright.CopyrightNPExtractor.rst: -------------------------------------------------------------------------------- 1 | CopyrightNPExtractor 2 | ==================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.copyright 5 | 6 | .. autoclass:: CopyrightNPExtractor 7 | :show-inheritance: 8 | 9 | .. rubric:: Attributes Summary 10 | 11 | .. autosummary:: 12 | 13 | ~CopyrightNPExtractor.allowed_pos 14 | ~CopyrightNPExtractor.allowed_sym 15 | 16 | .. rubric:: Methods Summary 17 | 18 | .. autosummary:: 19 | 20 | ~CopyrightNPExtractor.strip_np 21 | 22 | .. rubric:: Attributes Documentation 23 | 24 | .. autoattribute:: allowed_pos 25 | .. autoattribute:: allowed_sym 26 | 27 | .. rubric:: Methods Documentation 28 | 29 | .. automethod:: strip_np 30 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.copyright.get_copyright.rst: -------------------------------------------------------------------------------- 1 | get_copyright 2 | ============= 3 | 4 | .. currentmodule:: lexnlp.extract.en.copyright 5 | 6 | .. autofunction:: get_copyright 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dates.build_date_model.rst: -------------------------------------------------------------------------------- 1 | build_date_model 2 | ================ 3 | 4 | .. currentmodule:: lexnlp.extract.en.dates 5 | 6 | .. autofunction:: build_date_model 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dates.get_date_features.rst: -------------------------------------------------------------------------------- 1 | get_date_features 2 | ================= 3 | 4 | .. currentmodule:: lexnlp.extract.en.dates 5 | 6 | .. autofunction:: get_date_features 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dates.get_dates.rst: -------------------------------------------------------------------------------- 1 | get_dates 2 | ========= 3 | 4 | .. currentmodule:: lexnlp.extract.en.dates 5 | 6 | .. autofunction:: get_dates 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dates.get_dates_list.rst: -------------------------------------------------------------------------------- 1 | get_dates_list 2 | ============== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dates 5 | 6 | .. autofunction:: get_dates_list 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dates.get_raw_date_list.rst: -------------------------------------------------------------------------------- 1 | get_raw_date_list 2 | ================= 3 | 4 | .. currentmodule:: lexnlp.extract.en.dates 5 | 6 | .. autofunction:: get_raw_date_list 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dates.get_raw_dates.rst: -------------------------------------------------------------------------------- 1 | get_raw_dates 2 | ============= 3 | 4 | .. currentmodule:: lexnlp.extract.en.dates 5 | 6 | .. autofunction:: get_raw_dates 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dates.train_default_model.rst: -------------------------------------------------------------------------------- 1 | train_default_model 2 | =================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dates 5 | 6 | .. autofunction:: train_default_model 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.definitions.get_definitions.rst: -------------------------------------------------------------------------------- 1 | get_definitions 2 | =============== 3 | 4 | .. currentmodule:: lexnlp.extract.en.definitions 5 | 6 | .. autofunction:: get_definitions 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.SearchResultPosition.rst: -------------------------------------------------------------------------------- 1 | SearchResultPosition 2 | ==================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autoclass:: SearchResultPosition 7 | :show-inheritance: 8 | 9 | .. rubric:: Attributes Summary 10 | 11 | .. autosummary:: 12 | 13 | ~SearchResultPosition.alias_text 14 | ~SearchResultPosition.entities_dict 15 | ~SearchResultPosition.start 16 | 17 | .. rubric:: Attributes Documentation 18 | 19 | .. autoattribute:: alias_text 20 | .. autoattribute:: entities_dict 21 | .. autoattribute:: start 22 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.add_alias_to_entity.rst: -------------------------------------------------------------------------------- 1 | add_alias_to_entity 2 | =================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: add_alias_to_entity 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.add_aliases_to_entity.rst: -------------------------------------------------------------------------------- 1 | add_aliases_to_entity 2 | ===================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: add_aliases_to_entity 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.alias_is_blacklisted.rst: -------------------------------------------------------------------------------- 1 | alias_is_blacklisted 2 | ==================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: alias_is_blacklisted 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.conflicts_take_first_by_id.rst: -------------------------------------------------------------------------------- 1 | conflicts_take_first_by_id 2 | ========================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: conflicts_take_first_by_id 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.conflicts_top_by_priority.rst: -------------------------------------------------------------------------------- 1 | conflicts_top_by_priority 2 | ========================= 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: conflicts_top_by_priority 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.entity_alias.rst: -------------------------------------------------------------------------------- 1 | entity_alias 2 | ============ 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: entity_alias 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.entity_config.rst: -------------------------------------------------------------------------------- 1 | entity_config 2 | ============= 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: entity_config 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.find_dict_entities.rst: -------------------------------------------------------------------------------- 1 | find_dict_entities 2 | ================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: find_dict_entities 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_alias_id.rst: -------------------------------------------------------------------------------- 1 | get_alias_id 2 | ============ 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: get_alias_id 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_alias_text.rst: -------------------------------------------------------------------------------- 1 | get_alias_text 2 | ============== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: get_alias_text 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_entity_aliases.rst: -------------------------------------------------------------------------------- 1 | get_entity_aliases 2 | ================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: get_entity_aliases 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_entity_id.rst: -------------------------------------------------------------------------------- 1 | get_entity_id 2 | ============= 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: get_entity_id 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_entity_name.rst: -------------------------------------------------------------------------------- 1 | get_entity_name 2 | =============== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: get_entity_name 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.get_entity_priority.rst: -------------------------------------------------------------------------------- 1 | get_entity_priority 2 | =================== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: get_entity_priority 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.normalize_text.rst: -------------------------------------------------------------------------------- 1 | normalize_text 2 | ============== 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: normalize_text 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.dict_entities.prepare_alias_blacklist_dict.rst: -------------------------------------------------------------------------------- 1 | prepare_alias_blacklist_dict 2 | ============================ 3 | 4 | .. currentmodule:: lexnlp.extract.en.dict_entities 5 | 6 | .. autofunction:: prepare_alias_blacklist_dict 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.distances.get_distances.rst: -------------------------------------------------------------------------------- 1 | get_distances 2 | ============= 3 | 4 | .. currentmodule:: lexnlp.extract.en.distances 5 | 6 | .. autofunction:: get_distances 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.durations.get_durations.rst: -------------------------------------------------------------------------------- 1 | get_durations 2 | ============= 3 | 4 | .. currentmodule:: lexnlp.extract.en.durations 5 | 6 | .. autofunction:: get_durations 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.geoentities.get_geoentities.rst: -------------------------------------------------------------------------------- 1 | get_geoentities 2 | =============== 3 | 4 | .. currentmodule:: lexnlp.extract.en.geoentities 5 | 6 | .. autofunction:: get_geoentities 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.money.get_money.rst: -------------------------------------------------------------------------------- 1 | get_money 2 | ========= 3 | 4 | .. currentmodule:: lexnlp.extract.en.money 5 | 6 | .. autofunction:: get_money 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.percents.get_percents.rst: -------------------------------------------------------------------------------- 1 | get_percents 2 | ============ 3 | 4 | .. currentmodule:: lexnlp.extract.en.percents 5 | 6 | .. autofunction:: get_percents 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.pii.get_pii.rst: -------------------------------------------------------------------------------- 1 | get_pii 2 | ======= 3 | 4 | .. currentmodule:: lexnlp.extract.en.pii 5 | 6 | .. autofunction:: get_pii 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.pii.get_ssns.rst: -------------------------------------------------------------------------------- 1 | get_ssns 2 | ======== 3 | 4 | .. currentmodule:: lexnlp.extract.en.pii 5 | 6 | .. autofunction:: get_ssns 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.pii.get_us_phones.rst: -------------------------------------------------------------------------------- 1 | get_us_phones 2 | ============= 3 | 4 | .. currentmodule:: lexnlp.extract.en.pii 5 | 6 | .. autofunction:: get_us_phones 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.preprocessing.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.en.preprocessing package 2 | ======================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.en.preprocessing.span\_tokenizer module 8 | ------------------------------------------------------ 9 | 10 | .. automodule:: lexnlp.extract.en.preprocessing.span_tokenizer 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: lexnlp.extract.en.preprocessing 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.ratios.get_ratios.rst: -------------------------------------------------------------------------------- 1 | get_ratios 2 | ========== 3 | 4 | .. currentmodule:: lexnlp.extract.en.ratios 5 | 6 | .. autofunction:: get_ratios 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.regulations.get_regulations.rst: -------------------------------------------------------------------------------- 1 | get_regulations 2 | =============== 3 | 4 | .. currentmodule:: lexnlp.extract.en.regulations 5 | 6 | .. autofunction:: get_regulations 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.trademarks.get_trademarks.rst: -------------------------------------------------------------------------------- 1 | get_trademarks 2 | ============== 3 | 4 | .. currentmodule:: lexnlp.extract.en.trademarks 5 | 6 | .. autofunction:: get_trademarks 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.urls.get_urls.rst: -------------------------------------------------------------------------------- 1 | get_urls 2 | ======== 3 | 4 | .. currentmodule:: lexnlp.extract.en.urls 5 | 6 | .. autofunction:: get_urls 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.utils.NPExtractor.rst: -------------------------------------------------------------------------------- 1 | NPExtractor 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.extract.en.utils 5 | 6 | .. autoclass:: NPExtractor 7 | :show-inheritance: 8 | 9 | .. rubric:: Attributes Summary 10 | 11 | .. autosummary:: 12 | 13 | ~NPExtractor.exception_pos 14 | ~NPExtractor.exception_sym 15 | ~NPExtractor.sym_with_space 16 | ~NPExtractor.sym_without_space 17 | 18 | .. rubric:: Methods Summary 19 | 20 | .. autosummary:: 21 | 22 | ~NPExtractor.cleanup_leaves 23 | ~NPExtractor.get_np 24 | ~NPExtractor.get_tokenizer 25 | ~NPExtractor.join 26 | ~NPExtractor.sep 27 | ~NPExtractor.strip_np 28 | 29 | .. rubric:: Attributes Documentation 30 | 31 | .. autoattribute:: exception_pos 32 | .. autoattribute:: exception_sym 33 | .. autoattribute:: sym_with_space 34 | .. autoattribute:: sym_without_space 35 | 36 | .. rubric:: Methods Documentation 37 | 38 | .. automethod:: cleanup_leaves 39 | .. automethod:: get_np 40 | .. automethod:: get_tokenizer 41 | .. automethod:: join 42 | .. automethod:: sep 43 | .. automethod:: strip_np 44 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.en.utils.strip_unicode_punctuation.rst: -------------------------------------------------------------------------------- 1 | strip_unicode_punctuation 2 | ========================= 3 | 4 | .. currentmodule:: lexnlp.extract.en.utils 5 | 6 | .. autofunction:: strip_unicode_punctuation 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.ml.classifier.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.ml.classifier package 2 | ==================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.ml.classifier.base\_token\_sequence\_classifier\_model module 8 | ---------------------------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.ml.classifier.base_token_sequence_classifier_model 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | lexnlp.extract.ml.classifier.spacy\_token\_sequence\_model module 16 | ----------------------------------------------------------------- 17 | 18 | .. automodule:: lexnlp.extract.ml.classifier.spacy_token_sequence_model 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | lexnlp.extract.ml.classifier.token\_sequence\_model module 24 | ---------------------------------------------------------- 25 | 26 | .. automodule:: lexnlp.extract.ml.classifier.token_sequence_model 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: lexnlp.extract.ml.classifier 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.ml.detector.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.ml.detector.tests package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.ml.detector.tests.test\_phrase\_constructor module 8 | ----------------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.ml.detector.tests.test_phrase_constructor 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: lexnlp.extract.ml.detector.tests 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.ml.en.definitions.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.ml.en.definitions.tests package 2 | ============================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.extract.ml.en.definitions.tests.test\_layered\_definition\_detector module 8 | --------------------------------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.extract.ml.en.definitions.tests.test_layered_definition_detector 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: lexnlp.extract.ml.en.definitions.tests 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.ml.en.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.ml.en package 2 | ============================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.extract.ml.en.definitions 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: lexnlp.extract.ml.en 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.ml.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract.ml package 2 | ========================= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.extract.ml.classifier 10 | lexnlp.extract.ml.detector 11 | lexnlp.extract.ml.en 12 | 13 | Submodules 14 | ---------- 15 | 16 | lexnlp.extract.ml.environment module 17 | ------------------------------------ 18 | 19 | .. automodule:: lexnlp.extract.ml.environment 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | 25 | Module contents 26 | --------------- 27 | 28 | .. automodule:: lexnlp.extract.ml 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.extract.rst: -------------------------------------------------------------------------------- 1 | lexnlp.extract package 2 | ====================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.extract.common 10 | lexnlp.extract.de 11 | lexnlp.extract.en 12 | lexnlp.extract.es 13 | lexnlp.extract.ml 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp.extract 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.rst: -------------------------------------------------------------------------------- 1 | lexnlp.nlp.en package 2 | ===================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.nlp.en.segments 10 | lexnlp.nlp.en.tests 11 | lexnlp.nlp.en.transforms 12 | 13 | Submodules 14 | ---------- 15 | 16 | lexnlp.nlp.en.stanford module 17 | ----------------------------- 18 | 19 | .. automodule:: lexnlp.nlp.en.stanford 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | lexnlp.nlp.en.tokens module 25 | --------------------------- 26 | 27 | .. automodule:: lexnlp.nlp.en.tokens 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | 32 | 33 | Module contents 34 | --------------- 35 | 36 | .. automodule:: lexnlp.nlp.en 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.pages.MODULE_PATH.rst: -------------------------------------------------------------------------------- 1 | MODULE_PATH 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.pages 5 | 6 | .. autodata:: MODULE_PATH 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.pages.PAGE_SEGMENTER_MODEL.rst: -------------------------------------------------------------------------------- 1 | PAGE_SEGMENTER_MODEL 2 | ==================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.pages 5 | 6 | .. autodata:: PAGE_SEGMENTER_MODEL 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.pages.build_page_break_features.rst: -------------------------------------------------------------------------------- 1 | build_page_break_features 2 | ========================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.pages 5 | 6 | .. autofunction:: build_page_break_features 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.pages.get_pages.rst: -------------------------------------------------------------------------------- 1 | get_pages 2 | ========= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.pages 5 | 6 | .. autofunction:: get_pages 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.MODULE_PATH.rst: -------------------------------------------------------------------------------- 1 | MODULE_PATH 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs 5 | 6 | .. autodata:: MODULE_PATH 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.Optional.rst: -------------------------------------------------------------------------------- 1 | Optional 2 | ======== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs 5 | 6 | .. autodata:: Optional 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.PARAGRAPH_SEGMENTER_MODEL.rst: -------------------------------------------------------------------------------- 1 | PARAGRAPH_SEGMENTER_MODEL 2 | ========================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs 5 | 6 | .. autodata:: PARAGRAPH_SEGMENTER_MODEL 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.RE_NEW_LINE.rst: -------------------------------------------------------------------------------- 1 | RE_NEW_LINE 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs 5 | 6 | .. autodata:: RE_NEW_LINE 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.Union.rst: -------------------------------------------------------------------------------- 1 | Union 2 | ===== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs 5 | 6 | .. autodata:: Union 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.build_paragraph_break_features.rst: -------------------------------------------------------------------------------- 1 | build_paragraph_break_features 2 | ============================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs 5 | 6 | .. autofunction:: build_paragraph_break_features 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.get_paragraphs.rst: -------------------------------------------------------------------------------- 1 | get_paragraphs 2 | ============== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs 5 | 6 | .. autofunction:: get_paragraphs 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.paragraphs.splitlines_with_spans.rst: -------------------------------------------------------------------------------- 1 | splitlines_with_spans 2 | ===================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.paragraphs 5 | 6 | .. autofunction:: splitlines_with_spans 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sections.MODULE_PATH.rst: -------------------------------------------------------------------------------- 1 | MODULE_PATH 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sections 5 | 6 | .. autodata:: MODULE_PATH 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sections.SECTION_SEGMENTER_MODEL.rst: -------------------------------------------------------------------------------- 1 | SECTION_SEGMENTER_MODEL 2 | ======================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sections 5 | 6 | .. autodata:: SECTION_SEGMENTER_MODEL 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sections.build_section_break_features.rst: -------------------------------------------------------------------------------- 1 | build_section_break_features 2 | ============================ 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sections 5 | 6 | .. autofunction:: build_section_break_features 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sections.get_sections.rst: -------------------------------------------------------------------------------- 1 | get_sections 2 | ============ 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sections 5 | 6 | .. autofunction:: get_sections 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.Any.rst: -------------------------------------------------------------------------------- 1 | Any 2 | === 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autodata:: Any 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.MODULE_PATH.rst: -------------------------------------------------------------------------------- 1 | MODULE_PATH 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autodata:: MODULE_PATH 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.PRE_PROCESS_TEXT_REMOVE.rst: -------------------------------------------------------------------------------- 1 | PRE_PROCESS_TEXT_REMOVE 2 | ======================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autodata:: PRE_PROCESS_TEXT_REMOVE 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.SENTENCE_SEGMENTER_MODEL.rst: -------------------------------------------------------------------------------- 1 | SENTENCE_SEGMENTER_MODEL 2 | ======================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autodata:: SENTENCE_SEGMENTER_MODEL 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.SENTENCE_SPLITTERS.rst: -------------------------------------------------------------------------------- 1 | SENTENCE_SPLITTERS 2 | ================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autodata:: SENTENCE_SPLITTERS 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.SENTENCE_SPLITTERS_LOWER_EXCLUDE.rst: -------------------------------------------------------------------------------- 1 | SENTENCE_SPLITTERS_LOWER_EXCLUDE 2 | ================================ 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autodata:: SENTENCE_SPLITTERS_LOWER_EXCLUDE 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.STRIP_GROUP.rst: -------------------------------------------------------------------------------- 1 | STRIP_GROUP 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autodata:: STRIP_GROUP 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.Union.rst: -------------------------------------------------------------------------------- 1 | Union 2 | ===== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autodata:: Union 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.build_sentence_model.rst: -------------------------------------------------------------------------------- 1 | build_sentence_model 2 | ==================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autofunction:: build_sentence_model 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.extra_abbreviations.rst: -------------------------------------------------------------------------------- 1 | extra_abbreviations 2 | =================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autodata:: extra_abbreviations 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.get_sentence__with_coords_list.rst: -------------------------------------------------------------------------------- 1 | get_sentence__with_coords_list 2 | ============================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autofunction:: get_sentence__with_coords_list 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.get_sentence_list.rst: -------------------------------------------------------------------------------- 1 | get_sentence_list 2 | ================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autofunction:: get_sentence_list 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.get_sentence_span.rst: -------------------------------------------------------------------------------- 1 | get_sentence_span 2 | ================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autofunction:: get_sentence_span 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.get_sentence_span_list.rst: -------------------------------------------------------------------------------- 1 | get_sentence_span_list 2 | ====================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autofunction:: get_sentence_span_list 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.post_process_sentence.rst: -------------------------------------------------------------------------------- 1 | post_process_sentence 2 | ===================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autofunction:: post_process_sentence 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.sentences.pre_process_document.rst: -------------------------------------------------------------------------------- 1 | pre_process_document 2 | ==================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.sentences 5 | 6 | .. autofunction:: pre_process_document 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.titles.MODULE_PATH.rst: -------------------------------------------------------------------------------- 1 | MODULE_PATH 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.titles 5 | 6 | .. autodata:: MODULE_PATH 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.titles.SECTION_SEGMENTER_MODEL.rst: -------------------------------------------------------------------------------- 1 | SECTION_SEGMENTER_MODEL 2 | ======================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.titles 5 | 6 | .. autodata:: SECTION_SEGMENTER_MODEL 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.titles.UNICODE_CHAR_TOP_CATEGORY_MAPPING.rst: -------------------------------------------------------------------------------- 1 | UNICODE_CHAR_TOP_CATEGORY_MAPPING 2 | ================================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.titles 5 | 6 | .. autodata:: UNICODE_CHAR_TOP_CATEGORY_MAPPING 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.titles.build_document_title_features.rst: -------------------------------------------------------------------------------- 1 | build_document_title_features 2 | ============================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.titles 5 | 6 | .. autofunction:: build_document_title_features 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.titles.build_model.rst: -------------------------------------------------------------------------------- 1 | build_model 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.titles 5 | 6 | .. autofunction:: build_model 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.titles.build_title_features.rst: -------------------------------------------------------------------------------- 1 | build_title_features 2 | ==================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.titles 5 | 6 | .. autofunction:: build_title_features 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.titles.get_titles.rst: -------------------------------------------------------------------------------- 1 | get_titles 2 | ========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.titles 5 | 6 | .. autofunction:: get_titles 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.utils.build_document_distribution.rst: -------------------------------------------------------------------------------- 1 | build_document_distribution 2 | =========================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.utils 5 | 6 | .. autofunction:: build_document_distribution 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.segments.utils.build_document_line_distribution.rst: -------------------------------------------------------------------------------- 1 | build_document_line_distribution 2 | ================================ 3 | 4 | .. currentmodule:: lexnlp.nlp.en.segments.utils 5 | 6 | .. autofunction:: build_document_line_distribution 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.BIGRAM_COLLOCATIONS.rst: -------------------------------------------------------------------------------- 1 | BIGRAM_COLLOCATIONS 2 | =================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autodata:: BIGRAM_COLLOCATIONS 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.COLLOCATION_SIZE.rst: -------------------------------------------------------------------------------- 1 | COLLOCATION_SIZE 2 | ================ 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autodata:: COLLOCATION_SIZE 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.DEFAULT_LEMMATIZER.rst: -------------------------------------------------------------------------------- 1 | DEFAULT_LEMMATIZER 2 | ================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autodata:: DEFAULT_LEMMATIZER 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.DEFAULT_STEMMER.rst: -------------------------------------------------------------------------------- 1 | DEFAULT_STEMMER 2 | =============== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autodata:: DEFAULT_STEMMER 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.MODULE_PATH.rst: -------------------------------------------------------------------------------- 1 | MODULE_PATH 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autodata:: MODULE_PATH 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.STOPWORDS.rst: -------------------------------------------------------------------------------- 1 | STOPWORDS 2 | ========= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autodata:: STOPWORDS 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.TRIGRAM_COLLOCATIONS.rst: -------------------------------------------------------------------------------- 1 | TRIGRAM_COLLOCATIONS 2 | ==================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autodata:: TRIGRAM_COLLOCATIONS 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_adjectives.rst: -------------------------------------------------------------------------------- 1 | get_adjectives 2 | ============== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_adjectives 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_adverbs.rst: -------------------------------------------------------------------------------- 1 | get_adverbs 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_adverbs 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_lemma_list.rst: -------------------------------------------------------------------------------- 1 | get_lemma_list 2 | ============== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_lemma_list 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_lemmas.rst: -------------------------------------------------------------------------------- 1 | get_lemmas 2 | ========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_lemmas 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_nouns.rst: -------------------------------------------------------------------------------- 1 | get_nouns 2 | ========= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_nouns 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_stem_list.rst: -------------------------------------------------------------------------------- 1 | get_stem_list 2 | ============= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_stem_list 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_stems.rst: -------------------------------------------------------------------------------- 1 | get_stems 2 | ========= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_stems 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_token_list.rst: -------------------------------------------------------------------------------- 1 | get_token_list 2 | ============== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_token_list 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_tokens.rst: -------------------------------------------------------------------------------- 1 | get_tokens 2 | ========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_tokens 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_verbs.rst: -------------------------------------------------------------------------------- 1 | get_verbs 2 | ========= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_verbs 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.tokens.get_wordnet_pos.rst: -------------------------------------------------------------------------------- 1 | get_wordnet_pos 2 | =============== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.tokens 5 | 6 | .. autofunction:: get_wordnet_pos 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.transforms.characters.MODULE_PATH.rst: -------------------------------------------------------------------------------- 1 | MODULE_PATH 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.transforms.characters 5 | 6 | .. autodata:: MODULE_PATH 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.transforms.characters.get_character_distribution.rst: -------------------------------------------------------------------------------- 1 | get_character_distribution 2 | ========================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.transforms.characters 5 | 6 | .. autofunction:: get_character_distribution 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.transforms.characters.get_character_ngram_distribution.rst: -------------------------------------------------------------------------------- 1 | get_character_ngram_distribution 2 | ================================ 3 | 4 | .. currentmodule:: lexnlp.nlp.en.transforms.characters 5 | 6 | .. autofunction:: get_character_ngram_distribution 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.transforms.rst: -------------------------------------------------------------------------------- 1 | lexnlp.nlp.en.transforms package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.nlp.en.transforms.characters module 8 | ------------------------------------------ 9 | 10 | .. automodule:: lexnlp.nlp.en.transforms.characters 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | lexnlp.nlp.en.transforms.tokens module 16 | -------------------------------------- 17 | 18 | .. automodule:: lexnlp.nlp.en.transforms.tokens 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: lexnlp.nlp.en.transforms 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.MODULE_PATH.rst: -------------------------------------------------------------------------------- 1 | MODULE_PATH 2 | =========== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens 5 | 6 | .. autodata:: MODULE_PATH 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.get_bigram_distribution.rst: -------------------------------------------------------------------------------- 1 | get_bigram_distribution 2 | ======================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens 5 | 6 | .. autofunction:: get_bigram_distribution 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.get_ngram_distribution.rst: -------------------------------------------------------------------------------- 1 | get_ngram_distribution 2 | ====================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens 5 | 6 | .. autofunction:: get_ngram_distribution 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.get_skipgram_distribution.rst: -------------------------------------------------------------------------------- 1 | get_skipgram_distribution 2 | ========================= 3 | 4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens 5 | 6 | .. autofunction:: get_skipgram_distribution 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.get_token_distribution.rst: -------------------------------------------------------------------------------- 1 | get_token_distribution 2 | ====================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens 5 | 6 | .. autofunction:: get_token_distribution 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.en.transforms.tokens.get_trigram_distribution.rst: -------------------------------------------------------------------------------- 1 | get_trigram_distribution 2 | ======================== 3 | 4 | .. currentmodule:: lexnlp.nlp.en.transforms.tokens 5 | 6 | .. autofunction:: get_trigram_distribution 7 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.nlp.rst: -------------------------------------------------------------------------------- 1 | lexnlp.nlp package 2 | ================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.nlp.en 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: lexnlp.nlp 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.rst: -------------------------------------------------------------------------------- 1 | lexnlp package 2 | ============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.config 10 | lexnlp.extract 11 | lexnlp.nlp 12 | lexnlp.tests 13 | lexnlp.utils 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: lexnlp 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.utils.rst: -------------------------------------------------------------------------------- 1 | lexnlp.utils package 2 | ==================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.utils.lines_processing 10 | lexnlp.utils.tests 11 | lexnlp.utils.unicode 12 | 13 | Submodules 14 | ---------- 15 | 16 | lexnlp.utils.decorators module 17 | ------------------------------ 18 | 19 | .. automodule:: lexnlp.utils.decorators 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | lexnlp.utils.iterating\_helpers module 25 | -------------------------------------- 26 | 27 | .. automodule:: lexnlp.utils.iterating_helpers 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | 32 | lexnlp.utils.map module 33 | ----------------------- 34 | 35 | .. automodule:: lexnlp.utils.map 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | 40 | lexnlp.utils.parse\_df module 41 | ----------------------------- 42 | 43 | .. automodule:: lexnlp.utils.parse_df 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | 48 | 49 | Module contents 50 | --------------- 51 | 52 | .. automodule:: lexnlp.utils 53 | :members: 54 | :undoc-members: 55 | :show-inheritance: 56 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.utils.unicode.rst: -------------------------------------------------------------------------------- 1 | lexnlp.utils.unicode package 2 | ============================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlp.utils.unicode.tests 10 | 11 | Submodules 12 | ---------- 13 | 14 | lexnlp.utils.unicode.unicode\_lookup module 15 | ------------------------------------------- 16 | 17 | .. automodule:: lexnlp.utils.unicode.unicode_lookup 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: lexnlp.utils.unicode 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlp.utils.unicode.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlp.utils.unicode.tests package 2 | ================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlp.utils.unicode.tests.test\_unicode\_lookup module 8 | ------------------------------------------------------- 9 | 10 | .. automodule:: lexnlp.utils.unicode.tests.test_unicode_lookup 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: lexnlp.utils.unicode.tests 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlpprivate.extract.en.addresses.rst: -------------------------------------------------------------------------------- 1 | lexnlpprivate.extract.en.addresses package 2 | ========================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlpprivate.extract.en.addresses.tests 10 | 11 | Submodules 12 | ---------- 13 | 14 | lexnlpprivate.extract.en.addresses.addresses\_train module 15 | ---------------------------------------------------------- 16 | 17 | .. automodule:: lexnlpprivate.extract.en.addresses.addresses_train 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | lexnlpprivate.extract.en.addresses.convert\_geonames\_cities\_to\_word\_set module 23 | ---------------------------------------------------------------------------------- 24 | 25 | .. automodule:: lexnlpprivate.extract.en.addresses.convert_geonames_cities_to_word_set 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: lexnlpprivate.extract.en.addresses 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlpprivate.extract.en.addresses.tests.rst: -------------------------------------------------------------------------------- 1 | lexnlpprivate.extract.en.addresses.tests package 2 | ================================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | lexnlpprivate.extract.en.addresses.tests.test\_addresses\_train module 8 | ---------------------------------------------------------------------- 9 | 10 | .. automodule:: lexnlpprivate.extract.en.addresses.tests.test_addresses_train 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: lexnlpprivate.extract.en.addresses.tests 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlpprivate.extract.en.rst: -------------------------------------------------------------------------------- 1 | lexnlpprivate.extract.en package 2 | ================================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlpprivate.extract.en.addresses 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: lexnlpprivate.extract.en 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlpprivate.extract.rst: -------------------------------------------------------------------------------- 1 | lexnlpprivate.extract package 2 | ============================= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlpprivate.extract.en 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: lexnlpprivate.extract 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /documentation/docs/source/api/lexnlpprivate.rst: -------------------------------------------------------------------------------- 1 | lexnlpprivate package 2 | ===================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | lexnlpprivate.extract 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: lexnlpprivate 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /documentation/docs/source/api/modules.rst: -------------------------------------------------------------------------------- 1 | lexpredict-contraxsuite-core 2 | ============================ 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | lexnlp 8 | lexnlpprivate 9 | setup 10 | -------------------------------------------------------------------------------- /documentation/docs/source/api/setup.rst: -------------------------------------------------------------------------------- 1 | setup module 2 | ============ 3 | 4 | .. automodule:: setup 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /documentation/docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to the LexNLP documentation! 2 | ================================== 3 | 4 | .. image:: https://s3.amazonaws.com/lexpredict.com-marketing/graphics/lexpredict_lexnlp_logo_horizontal_1.png 5 | :width: 200px 6 | :alt: LexNLP 7 | :align: center 8 | 9 | 10 | | 11 | 12 | 13 | Table of Contents 14 | ------------ 15 | .. toctree:: 16 | :maxdepth: 4 17 | 18 | about 19 | lexnlp 20 | changes 21 | license 22 | 23 | 24 | Indices and tables 25 | ================== 26 | * :ref:`genindex` 27 | * :ref:`modindex` 28 | * :ref:`search` 29 | -------------------------------------------------------------------------------- /documentation/docs/source/lexnlp.rst: -------------------------------------------------------------------------------- 1 | LexNLP package 2 | ============== 3 | 4 | 5 | .. image:: https://s3.amazonaws.com/lexpredict.com-marketing/graphics/lexpredict_lexnlp_logo_horizontal_1.png 6 | :width: 200px 7 | :alt: LexNLP 8 | :align: left 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 4 13 | :caption: Contents: 14 | 15 | modules/extract/extract 16 | modules/nlp/nlp 17 | 18 | -------------------------------------------------------------------------------- /documentation/docs/source/license.rst: -------------------------------------------------------------------------------- 1 | .. _license: 2 | 3 | ============ 4 | License 5 | ============ 6 | 7 | AGPL License 8 | ---------------- 9 | LexNLP is available by default under the terms of the GNU Affero General Public License v3.0. 10 | https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE 11 | 12 | 13 | License Release 14 | ---------------- 15 | If you would like to request a release from the terms of the default AGPLv3 license, please contact us at: 16 | ContraxSuite Licensing . 17 | 18 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/extract/de/dates.rst: -------------------------------------------------------------------------------- 1 | .. _extract_de_dates: 2 | 3 | ============ 4 | :mod:`lexnlp.extract.de.dates`: Extracting date references 5 | ============ 6 | 7 | The :mod:`lexnlp.extract.de.dates` module contains methods that allow for the extraction 8 | of dates from text. Sample formats that are handled by this module include: 9 | 10 | * vom 29. März 2017 11 | * 16.5.2002 12 | 13 | The full list of current unit test cases can be found here: 14 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/lexnlp/extract/common/tests/test_dates 15 | 16 | 17 | .. currentmodule:: lexnlp.extract.de.dates 18 | 19 | 20 | Extracting dates 21 | ---------------- 22 | .. autofunction:: get_date_list 23 | 24 | Example :: 25 | 26 | >>> import lexnlp.extract.de.dates 27 | >>> text = " Artikel 39 des Gesetzes vom 29. März 2017 (BGBl. I S. 626) geändert worden ist" 28 | >>> print((lexnlp.extract.de.dates.get_date_list(text)) 29 | [{'location_start': 29, 30 | 'location_end': 42, 31 | 'value': datetime.datetime(2017, 3, 29, 0, 0), 32 | 'source': '29. März 2017'}] 33 | 34 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/extract/en/copyright.rst: -------------------------------------------------------------------------------- 1 | .. _extract_en_copyright: 2 | 3 | ============ 4 | :mod:`lexnlp.extract.en.copyright`: Extracting copyright references 5 | ============ 6 | 7 | The :mod:`lexnlp.extract.en.copyright` module contains methods that allow for the extraction 8 | of copyright references from text. 9 | 10 | 11 | The full list of current unit test cases can be found here: 12 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_copyright 13 | 14 | 15 | .. currentmodule:: lexnlp.extract.en.copyright 16 | 17 | 18 | Extracting copyrights 19 | ---------------- 20 | .. autofunction:: get_copyright 21 | 22 | Example :: 23 | 24 | >>> import lexnlp.extract.en.copyright 25 | >>> text = "(C) Copyright 1993-1996 Hughes Information Systems Company" 26 | >>> print(list(lexnlp.extract.en.copyright.get_copyright(text))) 27 | [('Copyright', '1993-1996', 'Hughes Information Systems Company')] 28 | 29 | >>> text = "Test copyrigh symbol © 2017, SIGN LLC" 30 | >>> print(list(lexnlp.extract.en.conditions.get_conditions(text))) 31 | print(list(lexnlp.extract.en.copyright.get_copyright(text))) 32 | [('©', '2017', 'SIGN LLC')] 33 | 34 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/extract/en/percents.rst: -------------------------------------------------------------------------------- 1 | .. _extract_en_percents: 2 | 3 | ============ 4 | :mod:`lexnlp.extract.en.percents`: Extracting percents and rates 5 | ============ 6 | 7 | The :mod:`lexnlp.extract.en.percents` module contains methods that allow for the extraction 8 | of percent and rate statements from text. Example statements that are covered by default in this module include: 9 | 10 | * one percent 11 | * 1% 12 | * 50 bps 13 | * fifty basis points 14 | 15 | The full list of current unit test cases can be found here: 16 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_percents 17 | 18 | 19 | .. currentmodule:: lexnlp.extract.en.percents 20 | 21 | 22 | Extracting conditions 23 | ---------------- 24 | .. autofunction:: get_percents 25 | 26 | Example :: 27 | 28 | >>> import lexnlp.extract.en.percents 29 | >>> text = "At a discount of 1%" 30 | >>> print(list(lexnlp.extract.en.percents.get_percents(text))) 31 | [('%', 1.0, 0.01)] 32 | >>> text = "At a discount of 10 basis points" 33 | >>> print(list(lexnlp.extract.en.percents.get_percents(text))) 34 | [('basis points', 10.0, 0.001)] 35 | 36 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/extract/en/pii.rst: -------------------------------------------------------------------------------- 1 | .. _extract_en_pii: 2 | 3 | ============ 4 | :mod:`lexnlp.extract.en.pii`: Extracting personally-identifiable information (PII) 5 | ============ 6 | 7 | The :mod:`lexnlp.extract.en.pii` module contains methods that allow for the extraction 8 | of personally identifying information from text. Examples include: 9 | 10 | * phone numbers 11 | * US social security numbers 12 | * names 13 | 14 | The full list of current unit test cases can be found here: 15 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_pii 16 | 17 | 18 | .. currentmodule:: lexnlp.extract.en.pii 19 | 20 | 21 | Extracting PII 22 | ---------------- 23 | .. autofunction:: get_pii 24 | 25 | Example :: 26 | 27 | >>> import lexnlp.extract.en.pii 28 | >>> text = "John Doe (999-12-3456)" 29 | >>> print(list(lexnlp.extract.en.pii.get_pii(text))) 30 | [('ssn', '999-12-3456')] 31 | >>> text = "Mary Doe (212-123-4567)" 32 | >>> print(list(lexnlp.extract.en.pii.get_pii(text))) 33 | [('us_phone', '(212) 123-4567')] 34 | 35 | 36 | .. autofunction:: get_ssns 37 | 38 | .. autofunction:: get_us_phones 39 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/extract/en/ratios.rst: -------------------------------------------------------------------------------- 1 | .. _extract_en_ratios: 2 | 3 | ============ 4 | :mod:`lexnlp.extract.en.ratios`: Extracting ratios 5 | ============ 6 | 7 | The :mod:`lexnlp.extract.en.ratios` module contains methods that allow for the extraction 8 | of ratio statements from text. Example statements include: 9 | 10 | * 3:1 11 | * 3.0:1.0 12 | * three to one 13 | 14 | 15 | The full list of current unit test cases can be found here: 16 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_ratios 17 | 18 | 19 | .. currentmodule:: lexnlp.extract.en.ratios 20 | 21 | 22 | Extracting conditions 23 | ---------------- 24 | .. autofunction:: get_ratios 25 | 26 | Example :: 27 | 28 | >>> import lexnlp.extract.en.ratios 29 | >>> text = "At a leverage ratio of no more than ten to one." 30 | >>> print(list(lexnlp.extract.en.ratios.get_ratios(text))) 31 | [(10, 1, 10.0)] 32 | >>> text = "At a leverage ratio of no more than 2.5:1." 33 | >>> print(list(lexnlp.extract.en.ratios.get_ratios(text))) 34 | [(2.5, 1.0, 2.5)] 35 | 36 | 37 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/extract/en/trademarks.rst: -------------------------------------------------------------------------------- 1 | .. _extract_en_trademarks: 2 | 3 | ============ 4 | :mod:`lexnlp.extract.en.trademarks`: Extracting trademark references 5 | ============ 6 | 7 | The :mod:`lexnlp.extract.en.trademarks` module contains methods that allow for the extraction 8 | of trademarks references from text. Examples include: 9 | 10 | * Widget™ 11 | * Widget(TM) 12 | * Widget® 13 | * Widget(R) 14 | 15 | The full list of current unit test cases can be found here: 16 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_trademarks 17 | 18 | 19 | .. currentmodule:: lexnlp.extract.en.trademarks 20 | 21 | 22 | Extracting conditions 23 | ---------------- 24 | .. autofunction:: get_trademarks 25 | 26 | Example :: 27 | 28 | >>> import lexnlp.extract.en.trademarks 29 | >>> text = "Customer agrees to license HAL(TM)" 30 | >>> print(list(lexnlp.extract.en.trademarks.get_trademarks(text))) 31 | ['HAL (TM)'] 32 | >>> text = "Customer agrees to purchase a minimum quantity of 1000 Widget® units" 33 | >>> print(list(lexnlp.extract.en.trademarks.get_trademarks(text))) 34 | ['Widget®'] 35 | 36 | 37 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/extract/en/urls.rst: -------------------------------------------------------------------------------- 1 | .. _extract_en_urls: 2 | 3 | ============ 4 | :mod:`lexnlp.extract.en.url`: Extracting URLs 5 | ============ 6 | 7 | The :mod:`lexnlp.extract.en.urls` module contains methods that allow for the extraction 8 | of URLs from text. 9 | 10 | The full list of current unit test cases can be found here: 11 | https://github.com/LexPredict/lexpredict-lexnlp/tree/master/test_data/lexnlp/extract/en/tests/test_urls 12 | 13 | .. currentmodule:: lexnlp.extract.en.urls 14 | 15 | Extracting constraints 16 | ---------------- 17 | .. autofunction:: get_urls 18 | 19 | Example :: 20 | 21 | >>> import lexnlp.extract.en.urls 22 | >>> text = "A copy of the terms can be found at www.acme.com/terms" 23 | >>> print(list(lexnlp.extract.en.urls.get_urls(text))) 24 | ['www.acme.com/terms'] 25 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/nlp/en/segments_pages.rst: -------------------------------------------------------------------------------- 1 | .. _nlp_en_segments_pages: 2 | 3 | ============ 4 | :mod:`lexnlp.nlp.en.segments.pages`: Segmenting pages in text 5 | ============ 6 | 7 | The :mod:`lexnlp.nlp.en.segments.pages` module contains methods for segmenting text 8 | into zero or more pages. 9 | 10 | 11 | .. attention:: 12 | The sections below are a work in progress. Thank you for your patience 13 | while we continue to expand and improve our documentation coverage. 14 | 15 | If you have any questions in the meantime, please feel free to log issues on 16 | GitHub at the URL below or contact us at the email below: 17 | 18 | - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp 19 | - Email: support@contraxsuite.com 20 | 21 | 22 | .. automodapi:: lexnlp.nlp.en.segments.pages 23 | :include-all-objects: 24 | :members: 25 | 26 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/nlp/en/segments_paragraphs.rst: -------------------------------------------------------------------------------- 1 | .. _nlp_en_segments_paragraphs: 2 | 3 | ============ 4 | :mod:`lexnlp.nlp.en.segments.pages`: Segmenting paragraphs in text 5 | ============ 6 | 7 | The :mod:`lexnlp.nlp.en.segments.paragraphs` module contains methods for segmenting text 8 | into zero or more paragraphs. 9 | 10 | .. attention:: 11 | The sections below are a work in progress. Thank you for your patience 12 | while we continue to expand and improve our documentation coverage. 13 | 14 | If you have any questions in the meantime, please feel free to log issues on 15 | GitHub at the URL below or contact us at the email below: 16 | 17 | - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp 18 | - Email: support@contraxsuite.com 19 | 20 | .. automodapi:: lexnlp.nlp.en.segments.paragraphs 21 | :include-all-objects: 22 | :members: 23 | 24 | 25 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/nlp/en/segments_sections.rst: -------------------------------------------------------------------------------- 1 | .. _nlp_en_segments_sections: 2 | 3 | ============ 4 | :mod:`lexnlp.nlp.en.segments.sections`: Segmenting sections in text 5 | ============ 6 | 7 | The :mod:`lexnlp.nlp.en.segments.sections` module contains methods for segmenting text 8 | into zero or more sections. 9 | 10 | .. attention:: 11 | The sections below are a work in progress. Thank you for your patience 12 | while we continue to expand and improve our documentation coverage. 13 | 14 | If you have any questions in the meantime, please feel free to log issues on 15 | GitHub at the URL below or contact us at the email below: 16 | 17 | - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp 18 | - Email: support@contraxsuite.com 19 | 20 | .. automodapi:: lexnlp.nlp.en.segments.sections 21 | :include-all-objects: 22 | :members: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/nlp/en/segments_sentences.rst: -------------------------------------------------------------------------------- 1 | .. _nlp_en_segments_sentences: 2 | 3 | ============ 4 | :mod:`lexnlp.nlp.en.segments.sections`: Segmenting sentences in text 5 | ============ 6 | 7 | The :mod:`lexnlp.nlp.en.segments.sentences` module contains methods for segmenting text 8 | into zero or more sentences. 9 | 10 | .. attention:: 11 | The sections below are a work in progress. Thank you for your patience 12 | while we continue to expand and improve our documentation coverage. 13 | 14 | If you have any questions in the meantime, please feel free to log issues on 15 | GitHub at the URL below or contact us at the email below: 16 | 17 | - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp 18 | - Email: support@contraxsuite.com 19 | 20 | .. automodapi:: lexnlp.nlp.en.segments.sentences 21 | :include-all-objects: 22 | :members: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/nlp/en/segments_titles.rst: -------------------------------------------------------------------------------- 1 | .. _nlp_en_segments_titles: 2 | 3 | ============ 4 | :mod:`lexnlp.nlp.en.segments.titles`: Segmenting and identifying titles in text 5 | ============ 6 | 7 | The :mod:`lexnlp.nlp.en.segments.titles` module contains methods for identifying titles and 8 | segmenting text between zero or more titles. 9 | 10 | .. attention:: 11 | The sections below are a work in progress. Thank you for your patience 12 | while we continue to expand and improve our documentation coverage. 13 | 14 | If you have any questions in the meantime, please feel free to log issues on 15 | GitHub at the URL below or contact us at the email below: 16 | 17 | - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp 18 | - Email: support@contraxsuite.com 19 | 20 | .. automodapi:: lexnlp.nlp.en.segments.titles 21 | :include-all-objects: 22 | :members: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/nlp/en/segments_utils.rst: -------------------------------------------------------------------------------- 1 | .. _nlp_en_segments_utils: 2 | 3 | ============ 4 | :mod:`lexnlp.nlp.en.segments.utils`: Utilities for segmenting 5 | ============ 6 | 7 | The :mod:`lexnlp.nlp.en.segments.utils` module contains utility methods for 8 | segmenting text. 9 | 10 | .. attention:: 11 | The sections below are a work in progress. Thank you for your patience 12 | while we continue to expand and improve our documentation coverage. 13 | 14 | If you have any questions in the meantime, please feel free to log issues on 15 | GitHub at the URL below or contact us at the email below: 16 | 17 | - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp 18 | - Email: support@contraxsuite.com 19 | 20 | .. automodapi:: lexnlp.nlp.en.segments.utils 21 | :include-all-objects: 22 | :members: 23 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/nlp/en/transforms_character.rst: -------------------------------------------------------------------------------- 1 | .. _nlp_en_transforms_characters: 2 | 3 | ============ 4 | :mod:`lexnlp.nlp.en.transforms.characters`: Transforming text into character-oriented features 5 | ============ 6 | 7 | The :mod:`lexnlp.nlp.en.transforms.characters` module contains methods 8 | that transform text into character distributions or related feature vectors. 9 | 10 | .. attention:: 11 | The sections below are a work in progress. Thank you for your patience 12 | while we continue to expand and improve our documentation coverage. 13 | 14 | If you have any questions in the meantime, please feel free to log issues on 15 | GitHub at the URL below or contact us at the email below: 16 | 17 | - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp 18 | - Email: support@contraxsuite.com 19 | 20 | .. automodapi:: lexnlp.nlp.en.transforms.characters 21 | :include-all-objects: 22 | :members: 23 | 24 | -------------------------------------------------------------------------------- /documentation/docs/source/modules/nlp/en/transforms_tokens.rst: -------------------------------------------------------------------------------- 1 | .. _nlp_en_transforms_tokens: 2 | 3 | ============ 4 | :mod:`lexnlp.nlp.en.transforms.tokens`: Transforming text into token-oriented features 5 | ============ 6 | 7 | The :mod:`lexnlp.nlp.en.transforms.tokens` module contains methods 8 | that transform text into token distributions or related feature vectors. 9 | 10 | .. attention:: 11 | The sections below are a work in progress. Thank you for your patience 12 | while we continue to expand and improve our documentation coverage. 13 | 14 | If you have any questions in the meantime, please feel free to log issues on 15 | GitHub at the URL below or contact us at the email below: 16 | 17 | - GitHub issues: https://github.com/LexPredict/lexpredict-lexnlp 18 | - Email: support@contraxsuite.com 19 | 20 | .. automodapi:: lexnlp.nlp.en.transforms.tokens 21 | :include-all-objects: 22 | :members: 23 | 24 | -------------------------------------------------------------------------------- /lexnlp/config/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/config/en/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/config/en/geoentities_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Geo Entities extraction configuration. 3 | """ 4 | 5 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 6 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 7 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 8 | __version__ = "2.3.0" 9 | __maintainer__ = "LexPredict, LLC" 10 | __email__ = "support@contraxsuite.com" 11 | 12 | 13 | # Minimal length of geo entity aliases to search for. 14 | # Allows avoiding false-positives on first and last names abbreviations (A.M. Best) e.t.c. 15 | 16 | 17 | MIN_ALIAS_LEN = 2 18 | 19 | # List of aliases to exclude from search: [(alias:str, language:str, is_abbrev:bool), ...] 20 | ALIAS_BLACK_LIST = [] 21 | -------------------------------------------------------------------------------- /lexnlp/config/es/es_regulations.csv: -------------------------------------------------------------------------------- 1 | trigger,position 2 | junta de,start 3 | Administración,start 4 | Apartado \p{Lu} de,start 5 | Auditoría Superior de,start 6 | Comisión,start 7 | Comisiones,start 8 | Comité de,start 9 | Congreso de,start 10 | Cuenta de,start 11 | Ejecutivo,start 12 | Código Fiscal,start 13 | Gobierno,start 14 | Hacienda Pública,start 15 | INSTITUCIONES DE,start 16 | Instituto para,start 17 | ley de,start 18 | Nacional Financiera,start 19 | Plan Nacional de,start 20 | Programa Nacional de,start 21 | Registro Nacional,start 22 | Reglamento,start 23 | Secretaría de,start 24 | Secretarío,start 25 | Finanzas Públicas,start 26 | Subsecretario de,start 27 | Tesorería de,start -------------------------------------------------------------------------------- /lexnlp/extract/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/all_locales/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | # -*- coding: utf-8 -*- 10 | -------------------------------------------------------------------------------- /lexnlp/extract/all_locales/citations.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 4 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 5 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 6 | __version__ = "2.3.0" 7 | __maintainer__ = "LexPredict, LLC" 8 | __email__ = "support@contraxsuite.com" 9 | 10 | 11 | from typing import Generator 12 | 13 | from lexnlp.extract.all_locales.languages import LANG_EN, LANG_DE, DEFAULT_LANGUAGE, Locale 14 | from lexnlp.extract.common.annotations.citation_annotation import CitationAnnotation 15 | from lexnlp.extract.en.citations import get_citation_annotations as get_citation_annotations_en 16 | from lexnlp.extract.de.citations import get_citation_annotations as get_citation_annotations_de 17 | 18 | 19 | ROUTINE_BY_LOCALE = { 20 | LANG_EN.code: get_citation_annotations_en, 21 | LANG_DE.code: get_citation_annotations_de 22 | } 23 | 24 | 25 | def get_citation_annotations( 26 | locale: str, 27 | text: str) -> Generator[CitationAnnotation, None, None]: 28 | routine = ROUTINE_BY_LOCALE.get(Locale(locale).language, ROUTINE_BY_LOCALE[DEFAULT_LANGUAGE.code]) 29 | yield from routine(text) 30 | -------------------------------------------------------------------------------- /lexnlp/extract/all_locales/court_citations.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 4 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 5 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 6 | __version__ = "2.3.0" 7 | __maintainer__ = "LexPredict, LLC" 8 | __email__ = "support@contraxsuite.com" 9 | 10 | 11 | from typing import Generator 12 | 13 | from lexnlp.extract.all_locales.languages import LANG_DE, Locale 14 | from lexnlp.extract.common.annotations.court_citation_annotation import CourtCitationAnnotation 15 | from lexnlp.extract.de.court_citations import get_court_citation_annotations as get_court_citation_annotations_de 16 | 17 | 18 | ROUTINE_BY_LOCALE = { 19 | LANG_DE.code: get_court_citation_annotations_de 20 | } 21 | 22 | 23 | def get_court_citation_annotations(locale: str, text: str, language: str = None) -> \ 24 | Generator[CourtCitationAnnotation, None, None]: 25 | routine = ROUTINE_BY_LOCALE.get(Locale(locale).language, ROUTINE_BY_LOCALE[LANG_DE.code]) 26 | yield from routine(text, language) 27 | -------------------------------------------------------------------------------- /lexnlp/extract/all_locales/money.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 4 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 5 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 6 | __version__ = "2.3.0" 7 | __maintainer__ = "LexPredict, LLC" 8 | __email__ = "support@contraxsuite.com" 9 | 10 | 11 | from typing import Generator 12 | 13 | from lexnlp.extract.all_locales.languages import LANG_EN, LANG_DE, DEFAULT_LANGUAGE, Locale 14 | from lexnlp.extract.common.annotations.money_annotation import MoneyAnnotation 15 | from lexnlp.extract.en.money import get_money_annotations as get_money_annotations_en 16 | from lexnlp.extract.de.money import get_money_annotations as get_money_annotations_de 17 | 18 | 19 | ROUTINE_BY_LOCALE = { 20 | LANG_EN.code: get_money_annotations_en, 21 | LANG_DE.code: get_money_annotations_de 22 | } 23 | 24 | 25 | def get_money_annotations( 26 | locale: str, 27 | text: str, 28 | float_digits: int = 4, 29 | ) -> Generator[MoneyAnnotation, None, None]: 30 | routine = ROUTINE_BY_LOCALE.get(Locale(locale).language, ROUTINE_BY_LOCALE[DEFAULT_LANGUAGE.code]) 31 | yield from routine(text, float_digits) 32 | -------------------------------------------------------------------------------- /lexnlp/extract/all_locales/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/all_locales/tests/test_locales.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | """ 5 | Languages unit tests. 6 | """ 7 | 8 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 9 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 10 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 11 | __version__ = "2.3.0" 12 | __maintainer__ = "LexPredict, LLC" 13 | __email__ = "support@contraxsuite.com" 14 | 15 | 16 | from unittest import TestCase 17 | 18 | from lexnlp.extract.all_locales.languages import Locale 19 | 20 | 21 | class TestLocales(TestCase): 22 | 23 | def test_locales_convert(self): 24 | data = [ 25 | {'input': 'en', 'output_locale_code': 'EN'}, 26 | {'input': 'en-US', 'output_locale_code': 'US'}, 27 | {'input': 'en/Gb', 'output_locale_code': 'GB'}, 28 | {'input': 'En_us', 'output_locale_code': 'US'}, 29 | ] 30 | output_language_code = 'en' 31 | for item in data: 32 | locale_obj = Locale(item['input']) 33 | self.assertEqual(locale_obj.language, output_language_code) 34 | self.assertEqual(locale_obj.locale_code, item['output_locale_code']) 35 | -------------------------------------------------------------------------------- /lexnlp/extract/common/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/common/annotation_locator_type.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | from enum import Enum 10 | 11 | 12 | class AnnotationLocatorType(Enum): 13 | RegexpBased = 1 14 | MlWordVectorBased = 2 15 | -------------------------------------------------------------------------------- /lexnlp/extract/common/annotation_type.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | from enum import Enum 10 | 11 | 12 | class AnnotationType(Enum): 13 | act = 1 14 | amount = 2 15 | citation = 3 16 | condition = 4 17 | constraint = 5 18 | copyright = 6 19 | court = 7 20 | court_citation = 8 21 | cusip = 9 22 | date = 10 23 | definition = 11 24 | distance = 12 25 | duration = 13 26 | geoentity = 14 27 | money = 15 28 | percent = 16 29 | pii = 17 30 | phone = 18 31 | ssn = 19 32 | ratio = 20 33 | regulation = 21 34 | trademark = 22 35 | url = 23 36 | laws = 24 37 | -------------------------------------------------------------------------------- /lexnlp/extract/common/annotations/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/common/base_path.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | import os 10 | 11 | 12 | lexnlp_base_path = os.path.abspath(os.path.dirname(__file__) + '/../../../') 13 | 14 | lexnlp_test_path = os.path.join(lexnlp_base_path, 'test_data/') 15 | -------------------------------------------------------------------------------- /lexnlp/extract/common/copyrights/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/common/date_parsing/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/common/definitions/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/common/definitions/definition_match.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | class DefinitionMatch: 10 | """ 11 | used inside EsDefinitionsParser and SpanishParsingMethods 12 | to store intermediate parsing results 13 | """ 14 | def __init__(self): 15 | self.name = None # type: str 16 | self.start = 0 17 | self.end = 0 18 | self.probability = 0 19 | -------------------------------------------------------------------------------- /lexnlp/extract/common/durations/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/common/entities/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | # -*- coding: utf-8 -*- 10 | -------------------------------------------------------------------------------- /lexnlp/extract/common/ocr_rating/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | # -*- coding: utf-8 -*- 10 | -------------------------------------------------------------------------------- /lexnlp/extract/common/ocr_rating/reference_vectors/de.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/common/ocr_rating/reference_vectors/de.pickle -------------------------------------------------------------------------------- /lexnlp/extract/common/ocr_rating/reference_vectors/en.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/common/ocr_rating/reference_vectors/en.pickle -------------------------------------------------------------------------------- /lexnlp/extract/common/pattern_found.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | class PatternFound: 10 | """ 11 | used inside EsDefinitionsParser and SpanishParsingMethods 12 | to store intermediate parsing results 13 | """ 14 | def __init__(self): 15 | self.name = None # type: str 16 | self.start = 0 17 | self.end = 0 18 | self.probability = 0 19 | 20 | # pylint: disable=unused-argument 21 | def pattern_worse_than_target(self, p, text: str) -> bool: # p: PatternFound 22 | """ 23 | check what pattern is better then 2 patterns are considered duplicated 24 | "text" may be used in derived classes 25 | """ 26 | spans = self.start <= p.start <= self.end and \ 27 | self.start <= p.end <= self.end 28 | if not spans: 29 | return False 30 | return self.name.find(p.name) >= 0 31 | -------------------------------------------------------------------------------- /lexnlp/extract/common/special_characters.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | class SpecialCharacters: 10 | punctuation = {'.', ',', ':', '-', ';', ')', '(', ']', '{', '}' 11 | '[', '*', '/', '\\', '"', '\'', '!', '?', '%', 12 | '$', '^', '&', '@'} 13 | -------------------------------------------------------------------------------- /lexnlp/extract/common/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/common/tests/definitions_text_annotator.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | from typing import List 10 | from lexnlp.extract.common.annotations.definition_annotation import DefinitionAnnotation 11 | from lexnlp.tests.utility_for_testing import save_test_document, annotate_text 12 | 13 | 14 | def annotate_definitions_text(text: str, 15 | definitions: List[DefinitionAnnotation], 16 | save_path: str) -> None: 17 | markup = annotate_text(text, definitions) 18 | save_test_document(save_path, markup) 19 | -------------------------------------------------------------------------------- /lexnlp/extract/de/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/de/date_model.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/de/date_model.pickle -------------------------------------------------------------------------------- /lexnlp/extract/de/date_model.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | import string 10 | 11 | 12 | DE_UNICODE_ALPHAS = 'äöüẞ' 13 | DE_ALPHA_CHAR_SET = set(string.ascii_letters + DE_UNICODE_ALPHAS + DE_UNICODE_ALPHAS.upper()) 14 | 15 | DE_ALPHABET = DE_UNICODE_ALPHAS + DE_UNICODE_ALPHAS.upper() 16 | DATE_MODEL_CHARS = [] 17 | DATE_MODEL_CHARS.extend(DE_ALPHABET + string.ascii_letters) 18 | DATE_MODEL_CHARS.extend(string.digits) 19 | DATE_MODEL_CHARS.extend(['-', '/', ' ', '%', '#', '$', '.', ',']) 20 | MONTH_NAMES = ['Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 21 | 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember'] 22 | -------------------------------------------------------------------------------- /lexnlp/extract/de/language_tokens.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | import os 10 | 11 | from lexnlp.extract.common.language_dictionary_reader import LanguageDictionaryReader 12 | 13 | 14 | class DeLanguageTokens: 15 | abbreviations = {'nr.', 'abs.', 'no.', 'act.', 'inc.', 'p.', 'Inc.'} 16 | articles = ['der', 'die', 'das', 'des', 'dem', 'den', 17 | 'ein', 'eine', 'eines', 'einer', 'einem', 'einen'] 18 | conjunctions = ['und', 'oder'] 19 | 20 | @staticmethod 21 | def init(): 22 | abr_file_path = os.path.join(os.path.dirname(__file__), 23 | 'data/abbreviations.txt') 24 | if os.path.isfile(abr_file_path): 25 | file_set = LanguageDictionaryReader.read_str_set(abr_file_path) 26 | DeLanguageTokens.abbreviations = \ 27 | DeLanguageTokens.abbreviations.union(file_set) 28 | 29 | 30 | DeLanguageTokens.init() 31 | -------------------------------------------------------------------------------- /lexnlp/extract/de/model.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/de/model.pickle -------------------------------------------------------------------------------- /lexnlp/extract/de/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/en/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/en/addresses/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/en/addresses/addresses_clf.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/en/addresses/addresses_clf.pickle -------------------------------------------------------------------------------- /lexnlp/extract/en/addresses/data/city_name_words.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/en/addresses/data/city_name_words.pickle -------------------------------------------------------------------------------- /lexnlp/extract/en/addresses/data/nltk_pos_tag_indexes.json: -------------------------------------------------------------------------------- 1 | {"VBD": 36, "PDT": 24, "TO": 33, "WP": 42, "SYM": 32, "NNS": 23, "EX": 12, "(": 3, "VBP": 39, ",": 5, "VBZ": 40, "MD": 19, "JJ": 15, "NNP": 21, "WRB": 44, "DT": 11, "--": 6, "RB": 28, "FW": 13, "PRP": 26, "RBR": 29, "LS": 18, "JJS": 17, "CD": 10, "JJR": 16, "IN": 14, "WP$": 43, "''": 2, "RBS": 30, "UH": 34, "``": 45, "VBG": 37, "RP": 31, "PRP$": 27, "VB": 35, "$": 1, ".": 7, ")": 4, "WDT": 41, "NNPS": 22, "NN": 20, "CC": 9, "POS": 25, ":": 8, "VBN": 38} -------------------------------------------------------------------------------- /lexnlp/extract/en/addresses/data/street_directions.csv: -------------------------------------------------------------------------------- 1 | "CENTRAL" 2 | "NORTH" 3 | "SOUTH" 4 | "EAST" 5 | "WEST" 6 | "NORTH-EAST" 7 | "NORTH-WEST" 8 | "SOUTH-EAST" 9 | "SOUTH-WEST" 10 | "NE" 11 | "NW" 12 | "SE" 13 | "SW" 14 | "N" 15 | "S" 16 | "E" 17 | "W" 18 | -------------------------------------------------------------------------------- /lexnlp/extract/en/addresses/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/en/contracts/README.md: -------------------------------------------------------------------------------- 1 | # Contract Classification 2 | 3 | *Date (ISO 8601): 2022-04-19* 4 | 5 | --- 6 | 7 | ## `Is-Contract?` Classifier 8 | 9 | ### Usage 10 | 11 | Download the default Scikit-Learn pipeline: 12 | 13 | ```python 14 | from lexnlp.ml.catalog.download import download_github_release 15 | download_github_release('pipeline/is-contract/') 16 | ``` 17 | 18 | Instantiate the classifier: 19 | 20 | ```python 21 | 22 | from lexnlp.extract.en.contracts.predictors import ProbabilityPredictorIsContract 23 | probability_predictor_is_contract: ProbabilityPredictorIsContract = ProbabilityPredictorIsContract() 24 | ``` 25 | 26 | Use the `ProbabilityPredictorIsContract` 27 | 28 | ```python 29 | probability_predictor_is_contract.is_contract( 30 | text='...', 31 | min_probability=0.5, 32 | return_probability=True, 33 | ) 34 | ``` 35 | 36 | ### Training 37 | 38 | Training processes can be found under `notebooks/classification/contracts/` 39 | 40 | --- 41 | 42 | ## Contract Type Classifier 43 | 44 | *Not yet implemented* 45 | 46 | -------------------------------------------------------------------------------- /lexnlp/extract/en/contracts/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/en/contracts/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/en/contracts/tests/test_contract_type.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | import codecs 10 | from lexnlp.extract.en.contracts.contract_type_detector import ContractTypeDetector 11 | 12 | 13 | def non_test_contract_type(): 14 | model_folder = '' 15 | d2v_path = f'{model_folder}/d2v_size100_window10.json' 16 | rf_path = f'{model_folder}/rf_size100_window10_depth64' 17 | d = ContractTypeDetector(rf_path, d2v_path) 18 | 19 | with codecs.open( 20 | '/home/andrey/Downloads/src_files/text/src_txt_files/1274055_2010-03-23_4.txt', 21 | 'r', encoding='utf-8') as fr: 22 | doc_text = fr.read() 23 | v = d.detect_contract_type_vector(doc_text) 24 | print(d.detect_contract_type(v, 0.15, 99, '?')) 25 | print(d.detect_contract_type(v, 0.15, 75, '?')) 26 | print(d.detect_contract_type(v, 0.19, 99, '?')) 27 | -------------------------------------------------------------------------------- /lexnlp/extract/en/contracts/tests/test_contracts.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | from lexnlp.extract.en.contracts.predictors import ProbabilityPredictorIsContract 10 | from lexnlp.tests import lexnlp_tests 11 | 12 | 13 | def actual_data_converter(val): 14 | return [str(val)] 15 | 16 | 17 | def test_is_contract(): 18 | 19 | probability_predictor_is_contract: ProbabilityPredictorIsContract = \ 20 | ProbabilityPredictorIsContract(pipeline=ProbabilityPredictorIsContract.get_default_pipeline()) 21 | 22 | lexnlp_tests.test_extraction_func_on_test_data( 23 | probability_predictor_is_contract.is_contract, 24 | actual_data_converter=actual_data_converter, 25 | min_probability=0.3) 26 | 27 | # def test_bad_cases(): 28 | # lexnlp_tests.test_extraction_func_on_test_data(get_addresses) 29 | -------------------------------------------------------------------------------- /lexnlp/extract/en/date_model.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/en/date_model.pickle -------------------------------------------------------------------------------- /lexnlp/extract/en/date_model.py: -------------------------------------------------------------------------------- 1 | """Date extraction for English. 2 | 3 | This module implements date extraction functionality in English. 4 | """ 5 | 6 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 7 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 8 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 9 | __version__ = "2.3.0" 10 | __maintainer__ = "LexPredict, LLC" 11 | __email__ = "support@contraxsuite.com" 12 | 13 | 14 | # pylint: disable=bare-except 15 | 16 | # Standard imports 17 | import os 18 | import string 19 | import joblib 20 | 21 | 22 | # Setup path 23 | 24 | 25 | MODULE_PATH = os.path.dirname(os.path.abspath(__file__)) 26 | 27 | # Load model 28 | MODEL_DATE = joblib.load(os.path.join(MODULE_PATH, "./date_model.pickle")) 29 | 30 | ALPHA_CHAR_SET = set(string.ascii_letters) 31 | DATE_MODEL_CHARS = [] 32 | DATE_MODEL_CHARS.extend(string.ascii_letters) 33 | DATE_MODEL_CHARS.extend(string.digits) 34 | DATE_MODEL_CHARS.extend(["-", "/", " ", "%", "#", "$"]) 35 | -------------------------------------------------------------------------------- /lexnlp/extract/en/entities/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/en/entities/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/en/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/en/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/en/tests/test_conditions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | """Condition unit tests for English. 5 | 6 | This module implements unit tests for the condition extraction functionality in English. 7 | 8 | Todo: 9 | * Better testing for exact test in return sources 10 | * More pathological and difficult cases 11 | """ 12 | 13 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 14 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 15 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 16 | __version__ = "2.3.0" 17 | __maintainer__ = "LexPredict, LLC" 18 | __email__ = "support@contraxsuite.com" 19 | 20 | 21 | from lexnlp.extract.en.conditions import get_conditions 22 | from lexnlp.tests import lexnlp_tests 23 | 24 | 25 | def test_condition_fixed_example(): 26 | lexnlp_tests.test_extraction_func_on_test_data(get_conditions, 27 | actual_data_converter=lambda t: [elem[0] for elem in t], 28 | test_only_expected_in=True) 29 | -------------------------------------------------------------------------------- /lexnlp/extract/en/tests/test_conditions_plain.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | from unittest import TestCase 10 | 11 | from lexnlp.extract.common.annotations.condition_annotation import ConditionAnnotation 12 | from lexnlp.extract.en.conditions import get_condition_annotations 13 | from lexnlp.tests.typed_annotations_tests import TypedAnnotationsTester 14 | 15 | 16 | class TestConditionsPlain(TestCase): 17 | 18 | def test_file_samples(self): 19 | tester = TypedAnnotationsTester() 20 | tester.test_and_raise_errors( 21 | get_condition_annotations, 22 | 'lexnlp/typed_annotations/en/condition/conditions.txt', 23 | ConditionAnnotation) 24 | -------------------------------------------------------------------------------- /lexnlp/extract/en/tests/test_constraints.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | """Constraints unit tests for English. 5 | 6 | This module implements unit tests for the constraint extraction functionality in English. 7 | 8 | Todo: 9 | * Better testing for exact test in return sources 10 | * More pathological and difficult cases 11 | """ 12 | 13 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 14 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 15 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 16 | __version__ = "2.3.0" 17 | __maintainer__ = "LexPredict, LLC" 18 | __email__ = "support@contraxsuite.com" 19 | 20 | 21 | from lexnlp.extract.en.constraints import get_constraints 22 | from lexnlp.tests import lexnlp_tests 23 | 24 | 25 | def test_constraint_fixed_example(): 26 | lexnlp_tests.test_extraction_func_on_test_data(get_constraints, 27 | actual_data_converter=lambda t: [elem[0] for elem in t], 28 | test_only_expected_in=True) 29 | -------------------------------------------------------------------------------- /lexnlp/extract/en/tests/test_copyright.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | """Copyright unit tests for English. 5 | 6 | This module implements unit tests for the copyright extraction functionality in English. 7 | 8 | Todo: 9 | * Better testing for exact test in return sources 10 | * More pathological and difficult cases 11 | """ 12 | 13 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 14 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 15 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 16 | __version__ = "2.3.0" 17 | __maintainer__ = "LexPredict, LLC" 18 | __email__ = "support@contraxsuite.com" 19 | 20 | 21 | from lexnlp.extract.en.copyright import get_copyrights 22 | from lexnlp.tests import lexnlp_tests 23 | 24 | 25 | def test_copyright(): 26 | lexnlp_tests.test_extraction_func_on_test_data(get_copyrights, return_sources=True) 27 | -------------------------------------------------------------------------------- /lexnlp/extract/en/tests/test_definitions_template.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | from unittest import TestCase 10 | 11 | from lexnlp.extract.common.annotations.definition_annotation import DefinitionAnnotation 12 | from lexnlp.extract.en.definitions import get_definition_annotations 13 | from lexnlp.tests.typed_annotations_tests import TypedAnnotationsTester 14 | 15 | 16 | class TestDefinitionsTemplate(TestCase): 17 | 18 | def test_file_samples(self): 19 | tester = TypedAnnotationsTester() 20 | tester.test_and_raise_errors( 21 | get_definitions_sorted, 22 | 'lexnlp/typed_annotations/en/definition/definitions.txt', 23 | DefinitionAnnotation) 24 | 25 | 26 | def get_definitions_sorted(text: str): 27 | annotations = list(get_definition_annotations(text)) 28 | annotations.sort(key=lambda a: a.coords[0]) 29 | return annotations 30 | -------------------------------------------------------------------------------- /lexnlp/extract/en/tests/test_trademarks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | """Trademark unit tests for English. 5 | 6 | This module implements unit tests for the Trademark extraction functionality in English. 7 | 8 | Todo: 9 | * Better testing for exact test in return sources 10 | * More pathological and difficult cases 11 | """ 12 | 13 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 14 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 15 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 16 | __version__ = "2.3.0" 17 | __maintainer__ = "LexPredict, LLC" 18 | __email__ = "support@contraxsuite.com" 19 | 20 | 21 | # Project imports 22 | from lexnlp.extract.en.trademarks import get_trademarks 23 | from lexnlp.tests import lexnlp_tests 24 | 25 | 26 | def test_trademarks(): 27 | lexnlp_tests.test_extraction_func_on_test_data(get_trademarks) 28 | -------------------------------------------------------------------------------- /lexnlp/extract/en/tests/test_urls.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | """Urls unit tests for English. 5 | 6 | This module implements unit tests for the urls extraction functionality in English. 7 | 8 | Todo: 9 | * Better testing for exact test in return sources 10 | * More pathological and difficult cases 11 | """ 12 | 13 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 14 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 15 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 16 | __version__ = "2.3.0" 17 | __maintainer__ = "LexPredict, LLC" 18 | __email__ = "support@contraxsuite.com" 19 | 20 | 21 | # Project imports 22 | from lexnlp.extract.en.urls import get_urls 23 | from lexnlp.tests import lexnlp_tests 24 | 25 | 26 | def test_urls(): 27 | lexnlp_tests.test_extraction_func_on_test_data(get_urls) 28 | -------------------------------------------------------------------------------- /lexnlp/extract/es/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/es/language_tokens.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | class EsLanguageTokens: 10 | """ 11 | Spanish parts of speech, used in a number of parsing methods 12 | """ 13 | abbreviations = {'nr.', 'abs.', 'no.', 'act.', 'inc.', 'p.'} 14 | articles = ['el', 'la', 'los', 'las'] 15 | conjunctions = ['und', 'oder'] 16 | -------------------------------------------------------------------------------- /lexnlp/extract/es/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/ml/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/ml/classifier/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/ml/classifier/data/unicode_character_categories.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/ml/classifier/data/unicode_character_categories.pickle -------------------------------------------------------------------------------- /lexnlp/extract/ml/classifier/data/unicode_character_category_mapping.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/ml/classifier/data/unicode_character_category_mapping.pickle -------------------------------------------------------------------------------- /lexnlp/extract/ml/classifier/data/unicode_character_top_category_mapping.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/ml/classifier/data/unicode_character_top_category_mapping.pickle -------------------------------------------------------------------------------- /lexnlp/extract/ml/detector/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/ml/detector/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/ml/en/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/ml/en/data/definition_model_layered.pickle.gzip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/extract/ml/en/data/definition_model_layered.pickle.gzip -------------------------------------------------------------------------------- /lexnlp/extract/ml/en/definitions/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/ml/en/definitions/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/extract/ml/environment.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | import os 10 | 11 | 12 | ENV_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) 13 | ENV_EN_DATA_DIRECTORY = os.path.join(ENV_DIRECTORY, 'en/data') 14 | -------------------------------------------------------------------------------- /lexnlp/ml/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/nlp/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/nlp/en/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/nlp/en/collocation_bigrams_100.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_bigrams_100.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/collocation_bigrams_1000.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_bigrams_1000.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/collocation_bigrams_10000.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_bigrams_10000.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/collocation_bigrams_100000.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_bigrams_100000.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/collocation_bigrams_50000.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_bigrams_50000.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/collocation_trigrams_100.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_trigrams_100.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/collocation_trigrams_1000.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_trigrams_1000.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/collocation_trigrams_10000.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_trigrams_10000.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/collocation_trigrams_100000.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_trigrams_100000.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/collocation_trigrams_50000.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/collocation_trigrams_50000.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/segments/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/nlp/en/segments/page_segmenter.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/segments/page_segmenter.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/segments/paragraph_segmenter.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/segments/paragraph_segmenter.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/segments/section_segmenter.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/segments/section_segmenter.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/segments/sentence_segmenter.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/segments/sentence_segmenter.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/segments/title_locator.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/segments/title_locator.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/stopwords.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/nlp/en/stopwords.pickle -------------------------------------------------------------------------------- /lexnlp/nlp/en/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/nlp/en/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/nlp/train/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/nlp/train/en/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/nlp/train/en/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/tests/values_comparer.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | import numbers 10 | 11 | 12 | def values_look_equal(a, b) -> bool: 13 | if a == b: 14 | return True 15 | 16 | if (isinstance(a, str) and not a and not b) or (isinstance(b, str) and not b and not a): 17 | return True 18 | 19 | if isinstance(a, numbers.Number) and isinstance(b, numbers.Number): 20 | a = float(a) 21 | b = float(b) 22 | 23 | delta = abs(a - b) 24 | da = 0 if a == 0 else 100 * delta / abs(a) 25 | db = 0 if b == 0 else 100 * delta / abs(b) 26 | dmax = max(da, db) 27 | # delta less than 0.001% 28 | return dmax < 0.001 29 | 30 | try: 31 | sa = str(a) 32 | sb = str(b) 33 | if sa == sb: 34 | return True 35 | except: # pylint:disable=bare-except 36 | pass 37 | return False 38 | -------------------------------------------------------------------------------- /lexnlp/utils/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/utils/iterating_helpers.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | try: 10 | from collections import Iterable 11 | except ImportError: 12 | from collections.abc import Iterable 13 | from typing import Callable, Any 14 | 15 | 16 | def collapse_sequence(sequence: Iterable, 17 | predicate: Callable[[Any, Any], Any], 18 | accumulator: Any = 0.0) -> Any: 19 | for item in sequence: 20 | accumulator = predicate(item, accumulator) 21 | return accumulator 22 | 23 | 24 | def count_sequence_matches(sequence: Iterable, 25 | predicate: Callable[[Any], bool]) -> int: 26 | return collapse_sequence(sequence, 27 | lambda i, a: a + 1 if predicate(i) else a, 0) 28 | -------------------------------------------------------------------------------- /lexnlp/utils/lines_processing/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/utils/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/utils/tests/test_map.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | from unittest import TestCase 10 | from lexnlp.utils.map import Map 11 | 12 | 13 | class TestMap(TestCase): 14 | 15 | def test_map(self): 16 | m = Map({'name': 'Siemens', 'age': 108}) 17 | self.assertEqual('Siemens', m['name']) 18 | self.assertEqual('Siemens', m.name) 19 | 20 | m = Map({'name': {'company': 'Siemens', 'trademark': '(c)Siemens'}}) 21 | self.assertEqual('Siemens', m.name['company']) 22 | self.assertEqual('Siemens', m.name.company) 23 | m.name.specie = Map() 24 | m.name.specie.legal = 'xXx' 25 | self.assertEqual('xXx', m.name.specie.legal) 26 | -------------------------------------------------------------------------------- /lexnlp/utils/tests/test_phrase_finder.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | 8 | 9 | from unittest import TestCase 10 | 11 | from lexnlp.utils.lines_processing.phrase_finder import PhraseFinder 12 | 13 | 14 | class TestPhraseFinder(TestCase): 15 | def test_abbreviation(self): 16 | text = "In C.D. Ill. we should find" 17 | finder = PhraseFinder(['C.D. Ill.']) 18 | rst = finder.find_word(text, True) 19 | self.assertEqual(1, len(rst)) 20 | 21 | finder = PhraseFinder(['C.D. Ill.', 'sh', 'should', 'find']) 22 | rst = finder.find_word(text, True) 23 | self.assertEqual(3, len(rst)) 24 | -------------------------------------------------------------------------------- /lexnlp/utils/unicode/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/utils/unicode/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "ContraxSuite, LLC; LexPredict, LLC" 2 | __copyright__ = "Copyright 2015-2021, ContraxSuite, LLC" 3 | __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/2.3.0/LICENSE" 4 | __version__ = "2.3.0" 5 | __maintainer__ = "LexPredict, LLC" 6 | __email__ = "support@contraxsuite.com" 7 | -------------------------------------------------------------------------------- /lexnlp/utils/unicode/unicode_character_categories.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/utils/unicode/unicode_character_categories.pickle -------------------------------------------------------------------------------- /lexnlp/utils/unicode/unicode_character_category_mapping.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/utils/unicode/unicode_character_category_mapping.pickle -------------------------------------------------------------------------------- /lexnlp/utils/unicode/unicode_character_top_category_mapping.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/lexnlp/utils/unicode/unicode_character_top_category_mapping.pickle -------------------------------------------------------------------------------- /lexnlp/utils/unpickler.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | 4 | class RenameUnpickler(pickle.Unpickler): 5 | def find_class(self, module, name): 6 | renamed_module = module 7 | if module == "sklearn.tree.tree": 8 | renamed_module = "sklearn.tree" 9 | if module == "sklearn.ensemble.forest": 10 | renamed_module = "sklearn.ensemble._forest" 11 | return super(RenameUnpickler, self).find_class(renamed_module, name) 12 | 13 | 14 | def renamed_load(file_obj): 15 | return RenameUnpickler(file_obj).load() 16 | -------------------------------------------------------------------------------- /libs/download_wiki.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Path 3 | WIKI_PATH="wiki" 4 | 5 | mkdir -p $WIKI_PATH 6 | wget https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles-multistream.xml.bz2 -O "$WIKI_PATH/en.xml.bz2" 7 | wget https://dumps.wikimedia.org/eswiki/latest/eswiki-latest-pages-articles-multistream.xml.bz2 -O "$WIKI_PATH/es.xml.bz2" 8 | #wget https://dumps.wikimedia.org/dewiki/latest/frwiki-latest-pages-articles-multistream.xml.bz2 -O "$WIKI_PATH/fr.xml.bz2" 9 | #wget https://dumps.wikimedia.org/frwiki/latest/dewiki-latest-pages-articles-multistream.xml.bz2 -O "$WIKI_PATH/de.xml.bz2" 10 | -------------------------------------------------------------------------------- /python-requirements-dev.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.11.1 2 | cloudpickle==2.1.0 3 | dateparser==1.1.1 4 | docutils==0.17.1 5 | gensim==4.1.2 6 | joblib==1.1.0 7 | elasticsearch==7.8.0 8 | ipdb==0.13.9 9 | lxml==4.9.0 10 | memory-profiler==0.60.0 11 | nltk==3.7 12 | nose==1.3.7 13 | num2words==0.5.10 14 | numpy==1.22.3 15 | pandas==1.4.2 16 | psutil==5.9.1 17 | pycountry==22.3.5 18 | pytest-cov==3.0.0 19 | pytest-pep8==1.0.6 20 | pytest-pylint==0.18.0 21 | pytest-xdist==1.33.1 22 | python-dateutil==2.8.2 23 | regex==2022.3.2 24 | reporters-db==3.2.18 25 | requests==2.27.1 26 | scikit-learn==0.23.1 27 | scipy==1.8.1 28 | sphinx==5.0.1 29 | sphinx-rtd-theme==1.0.0 30 | tika==1.24 31 | twine==4.0.1 32 | Unidecode==1.3.4 33 | us==2.0.2 34 | zahlwort2num==0.3.0 -------------------------------------------------------------------------------- /python-requirements-full.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.11.1 2 | cloudpickle==2.1.0 3 | coverage==6.4.1 4 | https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz 5 | dateparser==1.1.1 6 | docutils==0.17.1 7 | elasticsearch==7.8.0 8 | gensim==4.1.2 9 | joblib==1.1.0 10 | ipdb==0.13.9 11 | lxml==4.9.0 12 | memory-profiler==0.60.0 13 | nltk==3.7 14 | nose==1.3.7 15 | num2words==0.5.10 16 | numpy==1.22.3 17 | pandas==1.4.2 18 | psutil==5.9.1 19 | pycountry==22.3.5 20 | pylint==2.14.1 21 | pytest==7.1.2 22 | pytest-cache==1.0 23 | pytest-cov==3.0.0 24 | pytest-pep8==1.0.6 25 | pytest-pylint==0.18.0 26 | pytest-xdist==1.33.0 27 | python-coveralls==2.9.3 28 | python-dateutil==2.8.1 29 | regex==2022.3.2 30 | reporters-db==3.2.18 31 | requests==2.27.1 32 | scikit-learn==0.23.1 33 | scipy==1.8.1 34 | sphinx==5.0.1 35 | sphinx-rtd-theme==1.0.0 36 | tika==1.24 37 | twine==4.0.1 38 | Unidecode==1.3.4 39 | us==2.0.2 40 | zahlwort2num==0.3.0 41 | -------------------------------------------------------------------------------- /python-requirements-notes.txt: -------------------------------------------------------------------------------- 1 | 1. used dateparser==0.7.2 instead of 0.7.6 because 0.7.6 parses incorrectly "one 10-11-2017" as "01-10-2017" 2 | 2. used pandas 0.24.2 instead of 1.0.5 because 0.25.0 and latest version breaks feature dataframe for 3 | page/paragraph/sentence/section pickled models and they predict wrong results 4 | 3. Sphinx, twine install the newest docutils==0.16 but it's incompatible with gensim==3.8.3>>botocore. Installed docutils==0.15.2. -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | # readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Build documentation in the docs/ directory with Sphinx 9 | sphinx: 10 | configuration: documentation/docs/source/conf.py 11 | 12 | # Optionally build your docs in additional formats such as PDF and ePub 13 | formats: all 14 | 15 | # Optionally set the version of Python and requirements required to build your docs 16 | python: 17 | version: "3.8" 18 | install: 19 | - requirements: python-requirements.txt 20 | 21 | build: 22 | image: latest -------------------------------------------------------------------------------- /scripts/download_contract_samples.sh: -------------------------------------------------------------------------------- 1 | # Make data path 2 | mkdir -p data/samples/ 3 | cd data/samples/ 4 | wget https://github.com/LexPredict/lexpredict-contraxsuite-samples/archive/master.zip 5 | unzip -q master.zip 6 | rm -f master.zip 7 | -------------------------------------------------------------------------------- /scripts/download_tika.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Based on https://github.com/vaites/php-apache-tika 3 | 4 | if [ "$LEXNLP_USE_TIKA" = true ]; then 5 | 6 | BINARIES=${APACHE_TIKA_BINARIES:-bin} 7 | VERSION=${APACHE_TIKA_VERSION:-"1.16"} 8 | LATEST="1.16" 9 | 10 | mkdir --parents $BINARIES 11 | 12 | if [ $VERSION == $LATEST ]; then 13 | MIRROR="http://www-us.apache.org" 14 | else 15 | MIRROR="https://archive.apache.org" 16 | fi 17 | 18 | if [ ! -f "$BINARIES/tika-app-$VERSION.jar" ]; then 19 | wget "$MIRROR/dist/tika/tika-app-$VERSION.jar" -O "$BINARIES/tika-app-$VERSION.jar" 20 | fi 21 | 22 | if [ ! -f "$BINARIES/tika-server-$VERSION.jar" ]; then 23 | wget "$MIRROR/dist/tika/tika-server-$VERSION.jar" -O "$BINARIES/tika-server-$VERSION.jar" 24 | fi 25 | 26 | fi 27 | -------------------------------------------------------------------------------- /scripts/run_tika.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Based on https://github.com/vaites/php-apache-tika 3 | 4 | if [ "$LEXNLP_USE_TIKA" = true ]; then 5 | 6 | PORT=9998 7 | BINARIES=${APACHE_TIKA_BINARIES:-bin} 8 | VERSION=${APACHE_TIKA_VERSION:-"1.16"} 9 | 10 | RUNNING=`ps aux | grep -c tika-server-$VERSION` 11 | 12 | if [ $RUNNING -lt 2 ]; then 13 | java -version 14 | echo "Starting Tika Server $VERSION" 15 | java -jar "$BINARIES/tika-server-$VERSION.jar" -p $PORT 2> /tmp/tika-server-$VERSION.log & 16 | ((PORT++)) 17 | sleep 5 18 | else 19 | echo "Tika Server $VERSION already running" 20 | fi 21 | 22 | fi -------------------------------------------------------------------------------- /test_data/lexnlp/extract/de/laws/de_concept_sample.csv: -------------------------------------------------------------------------------- 1 | b,predicate,a 2 | Deutschland,broader,Countries -------------------------------------------------------------------------------- /test_data/lexnlp/extract/de/laws/gesetze_list.csv: -------------------------------------------------------------------------------- 1 | Abkürzung,Kurztitel,Titel 2 | AABG,,Gesetz zur Begrenzung der Arzneimittelausgaben der gesetzlichen Krankenversicherung 3 | AAG,Aufwendungsausgleichsgesetz,Gesetz über den Ausgleich der Arbeitgeberaufwendungen für Entgeltfortzahlung 4 | ÄArbVtrG,,Gesetz über befristete Arbeitsverträge mit Ärzten in der Weiterbildung 5 | AAÜG,Anspruchs- und Anwartschaftsüberführungsgesetz,Gesetz zur Überführung der Ansprüche und Anwartschaften aus Zusatz- und Sonderversorgungssystemen des Beitrittsgebiets 6 | AAÜG-ÄndG,,Gesetz zur Änderung und Ergänzung des Anspruchs- und Anwartschaftsüberführungsgesetzes -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/entities/tests/test_nltk_maxent/test_companies_count.csv: -------------------------------------------------------------------------------- 1 | "Text","Company Name","Company Type","Count" 2 | "This Amendment to Employment Agreement (“Amendment”) is made and entered into this 18th day of July, 3 | 2005, by and between OSI SYSTEMS, INC. (“Company”), a California corporation, and Anuj Wadhawan (“Employee”).","OSI SYSTEMS","CORP",1 4 | "Fox Factory Holding Corp and Fox Factory Holding Corporation are the same company.","Fox Factory Holding","CORP",2 5 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/entities/tests/test_nltk_maxent/test_companies_rs.csv: -------------------------------------------------------------------------------- 1 | Text,Company Name,Company Type 2 | "This Amendment to Employment Agreement (“Amendment”) is made and entered into this 18th day of July, 3 | 2005, by and between OSI SYSTEMS, INC. (“Company”), a California corporation, and Anuj Wadhawan (“Employee”).",OSI SYSTEMS,INC 4 | "AMERICAN RESIDENTIAL GAP LLC (ARG), a Michigan Limited Liability Company 5 | with address at 380, N. Old Woodward Avenue, Ste. 300, Birmingham, MI 48009. 6 | And 7 | PROGREEN CONSTRUCTION LLC (PGC), a Michigan Limited Liability Company 8 | with address at 380 N. Old Woodward Avenue, Ste. 226, Birmingham, MI 48009. 9 | ",AMERICAN RESIDENTIAL GAP,LLC 10 | ,PROGREEN CONSTRUCTION,LLC 11 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/entities/tests/test_nltk_re/test_companies_in_article.csv: -------------------------------------------------------------------------------- 1 | Text,Company Name,Company Type,Company Description 2 | "The Amendment, dated as of May 31, 3 | 1999, between California Federal Bank, A Federal Savings Bank, (the ""Company"") 4 | successor by merger to First Nationwide Bank, A Federal Savings Bank, (""FNB"") 5 | and Christie S. Flanagan (the ""Executive"").",California Federal,,Bank 6 | ,Federal Savings,,Bank 7 | "This Amendment to Employment Agreement (“Amendment”) is made and entered into this 18th day of July, 8 | 2005, by and between OSI SYSTEMS, INC. (“Company”), a California corporation, and Anuj Wadhawan (“Employee”).",OSI SYSTEMS,INC, 9 | " By and between American Residential Gap LLC (ARG), a Michigan Limited Liability Company with 10 | address at 380, N. Old Woodward Avenue, Ste. 300, Birmingham, MI 48009, and Progreen Construction LLC 11 | (PGC), a Michigan Limited Liability Company with address at 380 N. Old Woodward Avenue, Ste. 226, 12 | Birmingham, MI 48009.",American Residential Gap,LLC, 13 | ,Progreen Construction,LLC, 14 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/entities/tests/test_nltk_re/test_company_article_regex.csv: -------------------------------------------------------------------------------- 1 | "Text","Company Name","Company Type","Company Type Abbr","Company Type Label","Company Description" 2 | "MK GOLD EXPLORATION B.V., a Dutch private company with limited liability (“Borrower”), and LEUCADIA NATIONAL CORPORATION, a New York corporation (“Lender”)","MK GOLD EXPLORATION","B.V.","BV","Besloten vennootschap", 3 | ,"LEUCADIA NATIONAL","CORPORATION","CORP","Corporation", 4 | "CREDIT AGREEMENT 5 | 6 | Dated as of April 20, 2011 7 | Among 8 | THE HANOVER INSURANCE GROUP, INC. 9 | 10 | as Borrower 11 | THE 12 | LENDERS NAMED HEREIN 13 | as Lenders 14 | GOLDMAN SACHS BANK USA 15 | as Sole Arranger and Bookrunner 16 | 17 | MORGAN STANLEY SENIOR FUNDING, INC 18 | as Syndication Agent 19 | WELLS FARGO BANK, NATIONAL ASSOCIATION 20 | 21 | as Documentation Agent 22 | and 23 | GOLDMAN SACHS BANK USA 24 | 25 | as Administrative Agent","THE HANOVER INSURANCE GROUP","INC","CORP","Corporation", 26 | ,"GOLDMAN SACHS",,,,"BANK" 27 | ,"MORGAN STANLEY SENIOR FUNDING","INC","CORP","Corporation", 28 | ,"WELLS FARGO","NATIONAL ASSOCIATION","NA","National Association","BANK" 29 | ,"GOLDMAN SACHS",,,,"BANK" 30 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/entities/tests/test_nltk_re/test_company_as.csv: -------------------------------------------------------------------------------- 1 | Text,Company Name,Company Type,Company Description,Party Type 2 | "Acme, Inc. as Lead Borrower","Acme",Inc,,Lead Borrower 3 | "HF Logistics-SKX T1, LLC, as Borrower","HF Logistics-SKX T1",LLC,,Borrower 4 | "dated as of 5 May, 2017" 5 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/entities/tests/test_nltk_re/test_company_regex.csv: -------------------------------------------------------------------------------- 1 | Text,Company Name,Company Type,Company Description 2 | "ACME, INC.",ACME,INC, 3 | "MK GOLD EXPLORATION B.V., a Dutch private company with limited liability (“Borrower”), and LEUCADIA NATIONAL CORPORATION, a New York corporation (“Lender”)",MK GOLD EXPLORATION,B.V., 4 | ,LEUCADIA NATIONAL,CORPORATION, 5 | "Wells Fargo Bank Minnesota, National Association",Wells Fargo Bank Minnesota,National Association,Bank 6 | "Deutsche Bank Securities Inc.",Deutsche Bank Securities,Inc,Bank 7 | "This is The Depository Trust & Clearing Corporation (“DTCC“)","The Depository Trust & Clearing","Corporation","Trust" 8 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/entities/tests/test_stanford_ner/test_stanford_org_example_in.csv: -------------------------------------------------------------------------------- 1 | Text,Organization 2 | "This Amendment to Executive Employment Agreement, dated effective as of February 22, 3 | 2011, is between Allis-Chalmers Energy Inc. (the “Company”) and Theodore F. Pound III 4 | (“Executive”).",Allis-Chalmers Energy Inc 5 | "The following agreement effective 1 January 2006 is hereby entered into between Art 6 | Hicks (hereinafter known as Executive) and Cybex International (together with its 7 | affiliated corporations hereinafter known as the “Company”) and having its principal 8 | offices at 10 Trotter Drive, Medway, MA. 02053.",Cybex International 9 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_citations/test_get_citations.csv: -------------------------------------------------------------------------------- 1 | Text,Volume,Reporter,Reporter Full Name,Page,Page2,Court,Year,Source Text 2 | "bob lissner v. test 1 F.2d 1, 2-5 (2d Cir., 1982)",1,F.2d,Federal Reporter,1,2-5,2d Cir.,1982,"1 F.2d 1, 2-5 (2d Cir., 1982)" 3 | "bob lissner v. test 1 F.2d 1, 2-5 (1982)",1,F.2d,Federal Reporter,1,2-5,,1982,"1 F.2d 1, 2-5 (1982)" 4 | "bob lissner v. test 1 F.2d 1, 25 (1982)",1,F.2d,Federal Reporter,1,25,,1982,"1 F.2d 1, 25 (1982)" 5 | bob lissner v. test 1 F.2d 1 (1982),1,F.2d,Federal Reporter,1,,,1982,1 F.2d 1 (1982) 6 | bob lissner v. test 1 F.2d 1,1,F.2d,Federal Reporter,1,,,,1 F.2d 1 7 | "bob lissner v. test 1 F.2d 1, 2-5 (25 Fed. Cl. 20)",1,F.2d,Federal Reporter,1,2-5,,,"1 F.2d 1, 2-5" 8 | ,25,Fed. Cl.,United States Claims Court Reporter,20,,,,25 Fed. Cl. 20 9 | "green cow v. boy 1 Wash. 1, 25 (1795)",1,Wash.,"Virginia Reports, Washington",1,25,,1795,"1 Wash. 1, 25 (1795)" 10 | "green cow v. boy 1 Wash. 1, 25 (1900)",1,Wash.,Washington Reports,1,25,,1900,"1 Wash. 1, 25 (1900)" 11 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_copyright/test_copyright.csv: -------------------------------------------------------------------------------- 1 | Text,Symbol,Year,Name,Text 2 | (C)Copyright 1993-1996 Hughes Information Systems Company,Copyright,1993-1996,Hughes Information Systems Company,(C) Copyright 1993-1996 Hughes Information Systems Company 3 | "(C)Maverick(R) International Processing Services, Inc. 1999",(C),1999,"Maverick (R) International Processing Services, Inc","(C) Maverick (R) International Processing Services, Inc. 1999" 4 | "Copyright (C) 1998, Avid Technology, Inc. and its licensors. All rights 5 | reserved.",(C),1998,"Avid Technology, Inc","Copyright (C) 1998, Avid Technology, Inc" 6 | "Test copyrigh symbol © 2017, SIGN LLC",©,2017,SIGN LLC,"© 2017, SIGN LLC" -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_courts/test_courts_longest_match.csv: -------------------------------------------------------------------------------- 1 | "Text","Court Type","Court Name" 2 | "One one Bankr. E.D.N.C. two two two.","Bankruptcy Court","Eastern District of North Carolina" 3 | "One Bankr. E.D.N.C. E.D.N.C. two E.D.N.C. three","Bankruptcy Court","Eastern District of North Carolina" 4 | ,"Federal District Court","Eastern District of North Carolina" 5 | "E.D.N.C. ","Federal District Court","Eastern District of North Carolina" 6 | "One Bankr. E.D.N.C. Northern District of Mississippi two three 7 | Northern District of New York 8 | ","Bankruptcy Court","Eastern District of North Carolina" 9 | ,"Bankruptcy Court","Northern District of Mississippi" 10 | ,"Federal District Court","Northern District of Mississippi" 11 | ,"Federal District Court","Northern District of New York" 12 | ,"Bankruptcy Court","Northern District of New York" 13 | "One Bankr.E.D.N.C. E.D. N.C. two E.D.N.C. three","Bankruptcy Court","Eastern District of North Carolina" 14 | ,"Federal District Court","Eastern District of North Carolina" 15 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_dates/test_fixed_dates_nonstrict.csv: -------------------------------------------------------------------------------- 1 | Text,Date 2 | "The term of this lease shall 3 | be for a period of five years, commencing 4 | on the 1st day of April, 1995, and terminating on the 31st day of 5 | March, 6 | 2000 with an option for an additional five years at the same terms and 7 | conditions in this lease, provided that TENANT shall have given the 8 | LANDLORD written notice of TENANT’s intention to do so six (6) months prior 9 | to the expiration of this lease and that the Tenant is not in default 10 | of the Lease.",1995-04-01 11 | ,2000-03-31 12 | t may 13 | Lockheed Martin Corporation 14 | he Decided to make a break 15 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_dates/test_fixed_raw_dates.csv: -------------------------------------------------------------------------------- 1 | Text,Date 2 | No later than 2017-06-01.,2017-06-01 3 | "Dated as of June 1, 2017",2017-06-01 4 | Will be completed by June 2017,2017-06-01 5 | Will be completed by June,06-01 6 | "Will be completed by the 1st day of June, 2017",2017-06-01 7 | Commencement Date: 07/01/2004.,2004-07-01 8 | "From 12:01 a.m. on March 1, 1999 (the 'Commencement Date') 9 | through 1l:59 p.m. on November 30, 2002 10 | (the 'Expiration Date')",2002-11-30 11 | ,1999-03-01 00:01:00 12 | "Commencement Date: August 1, 2013.",2013-08-01 13 | "No date here", -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_dict_entities/test_normalize_text.csv: -------------------------------------------------------------------------------- 1 | "Text","Normalized Text" 2 | "Bankr. E.D.N.C."," bankr . e . d . n . c . " 3 | "Bankr.E.D.N.C."," bankr . e . d . n . c . " 4 | "Something Bankr. E.D.N.C. else."," something bankr . e . d . n . c . else . " 5 | "SomethingBankr.E.D.N.C. else."," somethingbankr . e . d . n . c . else . " 6 | "Something/Bankr. E.D.N.C. else."," something/bankr . e . d . n . c . else . " 7 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_distance/test_get_distance.csv: -------------------------------------------------------------------------------- 1 | Text,Distance,Units 2 | That is at least 10 miles away.,10,mile 3 | That is at least 10mi away.,10,mile 4 | That is at least 10 kilometers away.,10,kilometer 5 | That is at least 10km away.,10,kilometer 6 | That is somewhere between 5 miles and 10km from here.,5,mile 7 | ,10,kilometer 8 | There are 10 dogs. 9 | That is a 20Hz oscillation. 10 | This is a 5khz test. 11 | ", 500 miles to go",500,mile 12 | ",500.5 miles to go",500.5,mile 13 | ", fifty miles to the 5khz test.",50,mile 14 | .5 miles to go,0.5,mile 15 | "There is no , distance here" 16 | There are many miles to go 17 | There are ten miles to go,10,mile 18 | There are 50 thousand miles to go,50000,mile 19 | There are fifty thousand miles to go,50000,mile 20 | This is not a 5.4.3.2.1 mi distance 21 | There are 5.4.3 thousand mi reasons 22 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_distance/test_get_distance_source.csv: -------------------------------------------------------------------------------- 1 | "Text","Distance","Unit","Source" 2 | "That is at least 10 miles away.",10,"mile","10 miles" 3 | "That is at least 10mi away.",10,"mile","10mi" 4 | "That is at least 10 kilometers away.",10,"kilometer","10 kilometers" 5 | "That is at least 10km away.",10,"kilometer","10km" 6 | "That is somewhere between 5 miles and 10km from here.",5,"mile","5 miles" 7 | ,10,"kilometer","10km" 8 | "There are 10 dogs.",,, 9 | "That is a 20Hz oscillation.",,, 10 | "This is a 5khz test.",,, 11 | ", 500 miles to go",500,"mile","500 miles" 12 | ",500.5 miles to go",500.5,"mile","500.5 miles" 13 | ", fifty miles to the 5khz test.",50,"mile","fifty miles" 14 | " .5 miles to go",0.5,"mile",".5 miles" 15 | "There is no , distance here",,, 16 | "There are many miles to go",,, 17 | "There are ten miles to go",10,"mile","ten miles" 18 | "There are 50 thousand miles to go",50000,"mile","50 thousand miles" 19 | "There are fifty thousand miles to go",50000,"mile","fifty thousand miles" 20 | "This is not a 5.4.3.2.1 mi distance",,, 21 | "There are 5.4.3 thousand mi reasons",,, 22 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_geoentities/test_geoentities_alias_filtering.csv: -------------------------------------------------------------------------------- 1 | "Text","input_text_languages_str","input_min_alias_len_int","Geo Entity" 2 | "Community of Mmaaddrid should not be detected for single letter Mm especially in name A.M. Best but New York should be too.",,2,"New York" 3 | "Community of Mmaaddrid should be detected for single letter Mm especially in name A.M. Best and New York should be too.",,1,"New York" 4 | ,,,"Community of Madrid" 5 | "There should be no Afghanistan in this sentence because “Afghanistan” and . Only USA.",,,"United States" 6 | "There should not be Aallbbania here – AL is blacklisted for English.","en",, 7 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_geoentities/test_geoentities_en_equal_match_take_lowest_id.csv: -------------------------------------------------------------------------------- 1 | "Text","Geo Entity","Source Text" 2 | "MS abbrev can be either MMMississippi or MMMonserrat. But for this test conflict resolving is enabled and it should detect MS as the entity having the first id in config (geoaliases.csv, entity_id). And this is Monserrat.","Montserrat","MS" 3 | "Here we expect CcaaNNaaddaa to be returned as having lower id than CcaaLLifornia for alias CA.","Canada","CA" 4 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_geoentities/test_geoentities_en_equal_match_take_top_prio.csv: -------------------------------------------------------------------------------- 1 | "Text","Geo Entity","Source Text" 2 | "Here we expect CcaaLLifornia to be returned as having greater priority than CcaaNNaaddaa for alias CA.","California","CA" 3 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_pii/test_pii_list.csv: -------------------------------------------------------------------------------- 1 | "Text","Type","Value" 2 | "Employee ID: 078-05-1120","ssn","078-05-1120" 3 | "My ID is 078-05-1120 and my phone number is 212-212-2121","ssn","078-05-1120" 4 | ,"us_phone","(212) 212-2121" 5 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_pii/test_pii_list_source.csv: -------------------------------------------------------------------------------- 1 | "Text","Type","Value","Source" 2 | "Employee ID: 078-05-1120","ssn","078-05-1120","078-05-1120" 3 | "My ID is 078-05-1120 and my phone number is 212-212-2121","ssn","078-05-1120","078-05-1120" 4 | ,"us_phone","(212) 212-2121","212-212-2121" 5 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_pii/test_ssn_list.csv: -------------------------------------------------------------------------------- 1 | Text,SSN 2 | Employee ID: 123-45-6789,123-45-6789 3 | There is no 12-34-45 SSN here. 4 | Some poor soul had 078-05-1120 once upon a time.,078-05-1120 5 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_pii/test_ssn_list_source.csv: -------------------------------------------------------------------------------- 1 | "Text","SSN","Source" 2 | "Employee ID: 123-45-6789","123-45-6789","123-45-6789" 3 | "There is no 12-34-45 SSN here.",, 4 | "Some poor soul had 078-05-1120 once upon a time.","078-05-1120","078-05-1120" 5 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_pii/test_us_phone_list.csv: -------------------------------------------------------------------------------- 1 | Text,Phone 2 | Home Phone: (212) 212-2121,(212) 212-2121 3 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_pii/test_us_phone_list_source.csv: -------------------------------------------------------------------------------- 1 | "Text","Phone","Source" 2 | "Home Phone: (212) 212-2121","(212) 212-2121","(212) 212-2121" 3 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_ratios/test_get_ratios.csv: -------------------------------------------------------------------------------- 1 | Text,Numerator,Consequent,Decimal 2 | Ratio of not greater than 3.0:1.0.,3.0,1.0,3.0 3 | Ratio of no more than four to one,4.0,1.0,4.0 4 | Ratio of no more than four t one 5 | Ratio of no more than four ot one 6 | Ratio of no more than 4..0:1.0 7 | Ratio of no more than 4.0:1..0 8 | "Level I ---- 1.0:1.0 .18% 9 | Level II 1.0:1.0 2.0:1.0 .21% 10 | Level III 2.0:1.0 ------- .24%",1.0,1.0,1.0 11 | ,1.0,1.0,1.0 12 | ,2.0,1.0,2.0 13 | ,2.0,1.0,2.0 14 | Ratio of 2.0::1.0 15 | Don't catch time 8:30 a.m. 16 | Don't catch time 8:30 am 17 | Don't catch time 8:30 AM 18 | Don't catch time 8:30 p.m. 19 | Don't catch 0:30 pseudo ratio 20 | Don't catch 30:0 pseudo ratio 21 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_ratios/test_get_ratios_source.csv: -------------------------------------------------------------------------------- 1 | Text,Numerator,Consequent,Decimal,Source 2 | Ratio of not greater than 3.0:1.0.,3.0,1.0,3.0,3.0:1.0. 3 | Ratio of no more than four to one,4,1,4.0,four to one 4 | Ratio of no more than four t one 5 | Ratio of no more than four ot one 6 | Ratio of no more than 4..0:1.0 7 | Ratio of no more than 4.0:1..0 8 | "Level I ---- 1.0:1.0 .18% 9 | Level II 1.0:1.0 2.0:1.0 .21% 10 | Level III 2.0:1.0 ------- .24%",1.0,1.0,1.0,1.0:1.0 11 | ,1.0,1.0,1.0,1.0:1.0 12 | ,2.0,1.0,2.0,2.0:1.0 13 | ,2.0,1.0,2.0,2.0:1.0 14 | Ratio of 2.0::1.0 15 | Don't catch time 8:30 a.m. 16 | Don't catch time 8:30 am 17 | Don't catch time 8:30 AM 18 | Don't catch time 8:30 p.m. 19 | Don't catch 0:30 pseudo ratio 20 | Don't catch 30:0 pseudo ratio 21 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_regulations/test_get_regulations.csv: -------------------------------------------------------------------------------- 1 | Text,Regulation Type,Regulation Code,Regulation Str 2 | test 55 C.F.R. 77 code,Code of Federal Regulations,55 CFR 77,55 C.F.R. 77 3 | test 55 CFR 77a-22B code,Code of Federal Regulations,55 CFR 77a-22B,55 CFR 77a-22B 4 | "test 123 U.S.C 5 | 456, code",United States Code,123 USC 456,"123 U.S.C 6 | 456" 7 | "test 123 U.S.C § 456, code",United States Code,123 USC § 456,123 U.S.C § 456 8 | "test 123 U.S.C Section 456, code",United States Code,123 USC Section 456,123 U.S.C Section 456 9 | "test 123 U.S.C Sec. 456, code",United States Code,123 USC Section 456,123 U.S.C Sec. 456 10 | test Public Law No. 123-456 code,Public Law,Public Law No. 123-456,Public Law No. 123-456 11 | test Public Law 123-456 code,Public Law,Public Law No. 123-456,Public Law 123-456 12 | test Pub. Law 123-456 code,Public Law,Public Law No. 123-456,Pub. Law 123-456 13 | test Pub. L. 123-456 code,Public Law,Public Law No. 123-456,Pub. L. 123-456 14 | test 123 Stat. 456 code,Public Law,123 Stat. 456,123 Stat. 456 15 | test Stat. 456 code 16 | test AB USC 456 code 17 | test 678 USC UPD code 18 | test 678 USC UPD code 19 | test 10 Public Law codes 20 | -------------------------------------------------------------------------------- /test_data/lexnlp/extract/en/tests/test_urls/test_urls.csv: -------------------------------------------------------------------------------- 1 | "test http://www.demo.com/2/some-url/document.txt in the text","http://www.demo.com/2/some-url/document.txt" 2 | "test https - https://alpha.demo.com/document.txt in the text","https://alpha.demo.com/document.txt" 3 | "test get arguments - https://alpha.demo.com/document.txt?arg1=1&arg2=2 in the text","https://alpha.demo.com/document.txt?arg1=1&arg2=2" 4 | "test hash - https://alpha.demo.com#hash in the text","https://alpha.demo.com#hash" 5 | "test 2 urls http://alpha.demo.com/ and http://beta.demo.com/","http://alpha.demo.com/" 6 | ,"http://beta.demo.com/" 7 | "test UPPERCASE HTTP://WWW.DEMO.COM in the text","HTTP://WWW.DEMO.COM" -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_noun_lemmas.csv: -------------------------------------------------------------------------------- 1 | Text,Noun Lemma 2 | It has been approved and endorsed by The Associated General Contractors of America.,Associated 3 | ,General 4 | ,Contractors 5 | ,America 6 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_nouns.csv: -------------------------------------------------------------------------------- 1 | Text,Noun 2 | It has been approved and endorsed by The Associated General Contractors of America.,Associated 3 | ,General 4 | ,Contractors 5 | ,America 6 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_tokens.csv: -------------------------------------------------------------------------------- 1 | Text,Token 2 | It has been approved and endorsed by The Associated General Contractors of America.,It 3 | ,has 4 | ,been 5 | ,approved 6 | ,and 7 | ,endorsed 8 | ,by 9 | ,The 10 | ,Associated 11 | ,General 12 | ,Contractors 13 | ,of 14 | ,America 15 | ,. 16 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_tokens_lc.csv: -------------------------------------------------------------------------------- 1 | Text,Token Lowercase 2 | It has been approved and endorsed by The Associated General Contractors of America.,it 3 | ,has 4 | ,been 5 | ,approved 6 | ,and 7 | ,endorsed 8 | ,by 9 | ,the 10 | ,associated 11 | ,general 12 | ,contractors 13 | ,of 14 | ,america 15 | ,. 16 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_tokens_lc_sw.csv: -------------------------------------------------------------------------------- 1 | Text,"Token (Lowercase, No Stop Word)" 2 | It has been approved and endorsed by The Associated General Contractors of America.,approved 3 | ,endorsed 4 | ,associated 5 | ,general 6 | ,contractors 7 | ,america 8 | ,. 9 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_tokens_sw.csv: -------------------------------------------------------------------------------- 1 | Text,Token (No Stop Words) 2 | It has been approved and endorsed by The Associated General Contractors of America.,approved 3 | ,endorsed 4 | ,Associated 5 | ,General 6 | ,Contractors 7 | ,America 8 | ,. 9 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_verb_lemmas.csv: -------------------------------------------------------------------------------- 1 | Text,Verb Lemma 2 | It has been approved and endorsed by The Associated General Contractors of America.,have 3 | ,be 4 | ,approve 5 | ,endorse 6 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_stanford/test_stanford_verbs.csv: -------------------------------------------------------------------------------- 1 | Text,Verb 2 | It has been approved and endorsed by The Associated General Contractors of America.,has 3 | ,been 4 | ,approved 5 | ,endorsed 6 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_adjectives.csv: -------------------------------------------------------------------------------- 1 | Text,Adjective 2 | "Builder shall comply with laws, rules, regulations and requirements of any Regulatory Authorities 3 | that are applicable and existing at the time of the execution of this Agreement that are in effect or which shall 4 | become effective as to any vessels built during the Project Schedule and which affect the construction of works, plants 5 | and vessels, in or on navigable waters and the shores thereof, and all other waters subject to the control of the United 6 | States as set forth in the Contract Documents and shall procure at its own expense such permits from the United States 7 | and from state and local authorities in the jurisdiction in which Builder is constructing the Vessels as may be 8 | necessary in connection with beginning or carrying on the completion of the Work, and shall at times comply with all 9 | United States, state and local laws in the jurisdiction in which Builder is constructing the Vessels in any way 10 | affecting the Work and affecting any documentation of such work with the U.S. Coast Guard.",applicable 11 | ,effective 12 | ,navigable 13 | ,other 14 | ,own 15 | ,such 16 | ,local 17 | ,necessary 18 | ,local 19 | ,such 20 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_adverbs.csv: -------------------------------------------------------------------------------- 1 | Text,Adverb 2 | shall promptly provide notice,promptly 3 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_adverbs_lemma.csv: -------------------------------------------------------------------------------- 1 | Text,Adverb Lemma 2 | shall promptly provide notice,promptly 3 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_lemmas.csv: -------------------------------------------------------------------------------- 1 | Text,Lemma 2 | It has been approved and endorsed by The Associated General Contractors of America.,It 3 | ,have 4 | ,be 5 | ,approve 6 | ,and 7 | ,endorse 8 | ,by 9 | ,The 10 | ,Associated 11 | ,General 12 | ,Contractors 13 | ,of 14 | ,America 15 | ,. 16 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_lemmas_lc.csv: -------------------------------------------------------------------------------- 1 | Text,Lemma (Lowercase) 2 | It has been approved and endorsed by The Associated General Contractors of America.,it 3 | ,have 4 | ,be 5 | ,approve 6 | ,and 7 | ,endorse 8 | ,by 9 | ,the 10 | ,associated 11 | ,general 12 | ,contractors 13 | ,of 14 | ,america 15 | ,. 16 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_lemmas_lc_sw.csv: -------------------------------------------------------------------------------- 1 | Text,Lemma (Lowercase and No Stop Words) 2 | It has been approved and endorsed by The Associated General Contractors of America.,approve 3 | ,endorse 4 | ,associated 5 | ,general 6 | ,contractors 7 | ,america 8 | ,. 9 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_lemmas_sw.csv: -------------------------------------------------------------------------------- 1 | Text,Lemma (No Stop Words) 2 | It has been approved and endorsed by The Associated General Contractors of America.,approve 3 | ,endorse 4 | ,Associated 5 | ,General 6 | ,Contractors 7 | ,America 8 | ,. 9 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_nouns.csv: -------------------------------------------------------------------------------- 1 | Text,Noun 2 | It has been approved and endorsed by The Associated General Contractors of America.,Associated 3 | ,General 4 | ,Contractors 5 | ,America 6 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_nouns_lemma.csv: -------------------------------------------------------------------------------- 1 | Text,Noun Lemma 2 | It has been approved and endorsed by The Associated General Contractors of America.,Associated 3 | ,General 4 | ,Contractors 5 | ,America 6 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_verb_lemmas.csv: -------------------------------------------------------------------------------- 1 | Text,Verb Lemma 2 | It has been approved and endorsed by The Associated General Contractors of America.,have 3 | ,be 4 | ,approve 5 | ,endorse 6 | -------------------------------------------------------------------------------- /test_data/lexnlp/nlp/en/tests/test_tokens/test_verbs.csv: -------------------------------------------------------------------------------- 1 | Text,Verb 2 | It has been approved and endorsed by The Associated General Contractors of America.,has 3 | ,been 4 | ,approved 5 | ,endorsed 6 | -------------------------------------------------------------------------------- /test_data/lexnlp/typed_annotations/de/copyright/copyrights.txt: -------------------------------------------------------------------------------- 1 | siemens.com globale Website Siemens © 1996 – 2019 2 | ------------------------------------------------------------------------------- 3 | total=1 4 | 0)locale=de 5 | 0)company=Website Siemens 6 | 0)year_start=1996 7 | 0)year_end= 8 | 0)coords=(36, 49) 9 | 0)get_cite()=/de/copyright/Website Siemens/1996 10 | 11 | 12 | ------------------------------------------------------------------------------- 13 | Copyright 2019, Siemens 14 | ------------------------------------------------------------------------------- 15 | total=1 16 | 0)company=Siemens 17 | 0)year_start=2019 18 | 0)year_end= 19 | 0)coords=(0, 23) 20 | 0)get_cite()=/de/copyright/Siemens/2019 21 | 22 | 23 | ------------------------------------------------------------------------------- 24 | Eigentumsrecht 2019, Siemens 25 | ------------------------------------------------------------------------------- 26 | total=1 27 | 0)company=Siemens 28 | 0)year_start=2019 29 | 0)year_end= 30 | 0)coords=(0, 28) 31 | 0)get_cite()=/de/copyright/Siemens/2019 32 | 33 | -------------------------------------------------------------------------------- /test_data/lexnlp/typed_annotations/de/geoentity/geoentities.txt: -------------------------------------------------------------------------------- 1 | ein seltsamer Text und Georgien darin erwähnt 2 | ------------------------------------------------------------------------------- 3 | total=1 4 | 0)locale=de 5 | 0)coords=(23, 31) 6 | 0)entity_id=83 7 | 0)name=Georgien 8 | 0)name_en=Georgia 9 | 0)alias=Georgien 10 | 0)iso_3166_2=GE 11 | 0)iso_3166_3=GEO 12 | 0)get_cite()=/de/geoentity/Georgien 13 | 14 | 15 | ------------------------------------------------------------------------------- 16 | ein seltsamer Text und Geeorgia darin erwähnt 17 | ------------------------------------------------------------------------------- 18 | total=0 -------------------------------------------------------------------------------- /test_data/lexnlp/typed_annotations/en/act/acts.txt: -------------------------------------------------------------------------------- 1 | accordance with sections 751(a)(1) and 777(i)(1) of the Act, and 19 CFR 351 2 | ------------------------------------------------------------------------------- 3 | total=1 4 | 0)act_name=Act 5 | 0)coords=(16, 61) 6 | 0)section=751(a)(1) and 777(i)(1) 7 | 0)year= 8 | 0)ambiguous=True 9 | ### '#s#' replaces ' ' 10 | 0)text=sections 751(a)(1) and 777(i)(1) of the Act,#s# 11 | -------------------------------------------------------------------------------- /test_data/lexnlp/typed_annotations/en/phone/phones.txt: -------------------------------------------------------------------------------- 1 | Dial +1-541-754-3010 in case of murder 2 | ------------------------------------------------------------------------------- 3 | total=1 4 | 0)locale=en 5 | 0)coords=(8, 20) 6 | 0)phone=(541) 754-3010 7 | 0)get_cite()=/en/phone/(541) 754-3010 8 | 9 | 10 | ------------------------------------------------------------------------------- 11 | Dial 3.141592564, +1-541 754 3010 in case of murder 12 | ------------------------------------------------------------------------------- 13 | total=1 14 | 0)locale=en 15 | 0)coords=(21, 33) 16 | 0)phone=(541) 754-3010 17 | 0)get_cite()=/en/phone/(541) 754-3010 18 | 19 | 20 | ------------------------------------------------------------------------------- 21 | Dial +1-5417543010 in case of murder 22 | ------------------------------------------------------------------------------- 23 | total=0 24 | -------------------------------------------------------------------------------- /test_data/lexnlp/typed_annotations/en/ssn/ssn.txt: -------------------------------------------------------------------------------- 1 | Somewhere in the form I filled out my SSN (123-45-6789) number 2 | ------------------------------------------------------------------------------- 3 | total=1 4 | 0)locale=en 5 | 0)coords=(43, 54) 6 | 0)get_cite()=/en/ssn/123-45-6789 7 | 0)number=123-45-6789 8 | 9 | 10 | ------------------------------------------------------------------------------- 11 | Here's an invalid SSN number: 123-00-6789 12 | ------------------------------------------------------------------------------- 13 | total=0 -------------------------------------------------------------------------------- /test_data/lexnlp/typed_annotations/es/court/courts.txt: -------------------------------------------------------------------------------- 1 | Sembré una flor sin interés. Yo la sembré para ver el Tribunal Superior, 2 | al volver ya estaba seca y ya no quizo retoñar. 3 | ------------------------------------------------------------------------------- 4 | total=1 5 | 0)locale=es 6 | 0)coords=(28, 71) 7 | 0)jurisdiction=Andalucía 8 | 0)court_type=Tribunal Superior 9 | 0)get_cite()=/es/court/Tribunal Superior/Andalucía/Tribunal Superior 10 | 11 | 12 | 13 | ------------------------------------------------------------------------------- 14 | El actual Tribunal Superior de Justicia de Madrid fue creado en 1985 a partir 15 | del artículo 26 de la Ley Orgánica del Poder Judicial, constituyéndose el 23 16 | de mayo de 1989. 17 | ------------------------------------------------------------------------------- 18 | total=1 19 | 0)locale=es 20 | 0)coords=(9, 49) 21 | 0)jurisdiction=Comunidad de Madrid 22 | 0)court_type=Tribunal Superior 23 | 0)get_cite()=/es/court/Tribunal Superior de Justicia de Madrid/Comunidad de Madrid/Tribunal Superior -------------------------------------------------------------------------------- /test_data/lexnlp/typed_annotations/es/date/dates.txt: -------------------------------------------------------------------------------- 1 | Some dummy sample with Spanish date like 15 de febrero, 28 de abril y 17 de 2 | noviembre de 1995, 1ºde enero de 1999 3 | ------------------------------------------------------------------------------- 4 | total=4 5 | ..)locale=es 6 | 0)date=1995-11-17 00:00:00 7 | 0)coords=(70, 93) 8 | 0)get_cite()=/es/date/1995-11-17 00:00:00 9 | 10 | 1)date=1999-01-01 00:00:00 11 | 1)coords=(95, 113) 12 | 1)get_cite()=/es/date/1999-01-01 00:00:00 13 | 14 | 2)date=1995-02-15 00:00:00 15 | 2)coords=(41, 54) 16 | 2)get_cite()=/es/date/1995-02-15 00:00:00 17 | 18 | 3)date=1995-04-28 00:00:00 19 | 3)coords=(56, 67) 20 | 3)get_cite()=/es/date/1995-04-28 00:00:00 21 | 22 | 23 | 24 | ------------------------------------------------------------------------------- 25 | Esto sucedió el 4 de julio. 26 | ------------------------------------------------------------------------------- 27 | total=1 28 | 0)date.month=7 29 | 0)date.day=4 30 | 0)coords=(16, 26) 31 | 0)get_cite()=/es/date/$YEAR$-07-04 00:00:00 32 | -------------------------------------------------------------------------------- /test_data/output/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/test_data/output/.gitkeep -------------------------------------------------------------------------------- /test_data/table_sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/test_data/table_sample.pdf -------------------------------------------------------------------------------- /test_data/tabular02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LexPredict/lexpredict-lexnlp/330b4e113c9bced0cc06f2c864c5015bb5ed2199/test_data/tabular02.pdf -------------------------------------------------------------------------------- /test_data/test_lexnlp_tests/test_test_extraction_func_on_test_data.csv: -------------------------------------------------------------------------------- 1 | text,expected 2 | qqq,qqq! 3 | www,www! 4 | eee,eee? 5 | rrr,rrr! 6 | --------------------------------------------------------------------------------