├── .flake8
├── .github
├── ISSUE_TEMPLATE
│ ├── bug.md
│ └── feature.md
├── copilot-instructions.md
├── pull_request_template.md
└── workflows
│ ├── delete-preview-docs.yml
│ ├── documentation.yml
│ ├── release.yml
│ ├── test-build.yml
│ └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── LICENSE
├── Makefile
├── README.md
├── changelog.md
├── contributing.md
├── demo
├── app.py
└── requirements.txt
├── docs
├── advanced-tutorials
│ ├── fastapi.md
│ ├── index.md
│ └── word-vectors.md
├── assets
│ ├── fragments
│ │ ├── aids-examples.md
│ │ ├── alcohol-examples.md
│ │ ├── cerebrovascular-accident-examples.md
│ │ ├── ckd-examples.md
│ │ ├── congestive-heart-failure-examples.md
│ │ ├── connective-tissue-disease-examples.md
│ │ ├── copd-examples.md
│ │ ├── dementia-examples.md
│ │ ├── diabetes-examples.md
│ │ ├── hemiplegia-examples.md
│ │ ├── leukemia-examples.md
│ │ ├── liver-disease-examples.md
│ │ ├── lymphoma-examples.md
│ │ ├── myocardial-infarction-examples.md
│ │ ├── peptic-ulcer-disease-examples.md
│ │ ├── peripheral-vascular-disease-examples.md
│ │ ├── solid-tumor-examples.md
│ │ └── tobacco-examples.md
│ ├── images
│ │ ├── class_span_linker.png
│ │ ├── edsnlp-ner.svg
│ │ ├── hybrid-pipeline-example.png
│ │ ├── model-parallelism.png
│ │ ├── multiprocessing.png
│ │ ├── sharing-components.png
│ │ ├── synonym_span_linker.png
│ │ └── transformer-windowing.svg
│ ├── logo
│ │ ├── aphp-blue.svg
│ │ ├── aphp-white.svg
│ │ └── edsnlp.svg
│ ├── overrides
│ │ ├── main.html
│ │ └── partials
│ │ │ └── comments.html
│ ├── stylesheets
│ │ ├── cards.css
│ │ ├── extra.css
│ │ ├── giscus_dark.css
│ │ └── giscus_light.css
│ ├── templates
│ │ └── python
│ │ │ └── material
│ │ │ ├── class.html
│ │ │ ├── docstring.html
│ │ │ ├── docstring
│ │ │ ├── examples.html
│ │ │ └── parameters.html
│ │ │ └── function.html
│ └── termynal
│ │ ├── termynal.css
│ │ └── termynal.js
├── concepts
│ ├── inference.md
│ ├── pipeline.md
│ └── torch-component.md
├── data
│ ├── conll.md
│ ├── converters.md
│ ├── index.md
│ ├── json.md
│ ├── overview.png
│ ├── pandas.md
│ ├── parquet.md
│ ├── polars.md
│ ├── spark.md
│ └── standoff.md
├── index.md
├── pipes
│ ├── architecture.md
│ ├── core
│ │ ├── contextual-matcher.md
│ │ ├── endlines.md
│ │ ├── index.md
│ │ ├── matcher.md
│ │ ├── normalizer.md
│ │ ├── resources
│ │ │ ├── alignment.svg
│ │ │ └── span-alignment.svg
│ │ ├── sentences.md
│ │ └── terminology.md
│ ├── index.md
│ ├── misc
│ │ ├── consultation-dates.md
│ │ ├── dates.md
│ │ ├── index.md
│ │ ├── quantities.md
│ │ ├── reason.md
│ │ ├── sections.md
│ │ ├── split.md
│ │ └── tables.md
│ ├── ner
│ │ ├── adicap.md
│ │ ├── behaviors
│ │ │ ├── alcohol.md
│ │ │ ├── index.md
│ │ │ └── tobacco.md
│ │ ├── cim10.md
│ │ ├── covid.md
│ │ ├── disorders
│ │ │ ├── aids.md
│ │ │ ├── cerebrovascular-accident.md
│ │ │ ├── ckd.md
│ │ │ ├── congestive-heart-failure.md
│ │ │ ├── connective-tissue-disease.md
│ │ │ ├── copd.md
│ │ │ ├── dementia.md
│ │ │ ├── diabetes.md
│ │ │ ├── hemiplegia.md
│ │ │ ├── index.md
│ │ │ ├── leukemia.md
│ │ │ ├── liver-disease.md
│ │ │ ├── lymphoma.md
│ │ │ ├── myocardial-infarction.md
│ │ │ ├── peptic-ulcer-disease.md
│ │ │ ├── peripheral-vascular-disease.md
│ │ │ └── solid-tumor.md
│ │ ├── drugs.md
│ │ ├── index.md
│ │ ├── scores
│ │ │ ├── charlson.md
│ │ │ ├── elston-ellis.md
│ │ │ ├── emergency-ccmu.md
│ │ │ ├── emergency-gemsa.md
│ │ │ ├── emergency-priority.md
│ │ │ ├── index.md
│ │ │ └── sofa.md
│ │ ├── suicide_attempt.md
│ │ ├── tnm.md
│ │ └── umls.md
│ ├── qualifiers
│ │ ├── family.md
│ │ ├── history.md
│ │ ├── hypothesis.md
│ │ ├── index.md
│ │ ├── negation.md
│ │ └── reported-speech.md
│ └── trainable
│ │ ├── biaffine-dependency-parser.md
│ │ ├── embeddings
│ │ ├── span_pooler.md
│ │ ├── text_cnn.md
│ │ └── transformer.md
│ │ ├── extractive-qa.md
│ │ ├── index.md
│ │ ├── ner.md
│ │ ├── span-classifier.md
│ │ └── span-linker.md
├── references.bib
├── resources
│ └── sections.svg
├── scripts
│ ├── autorefs
│ │ ├── LICENSE
│ │ └── plugin.py
│ ├── bibtex.py
│ ├── cards.py
│ ├── clickable_snippets.py
│ ├── griffe_ext.py
│ └── plugin.py
├── tokenizers.md
├── tutorials
│ ├── aggregating-results.md
│ ├── detecting-dates.md
│ ├── endlines.md
│ ├── index.md
│ ├── make-a-training-script.md
│ ├── matching-a-terminology.md
│ ├── multiple-texts.md
│ ├── qualifying-entities.md
│ ├── reason.md
│ ├── spacy101.md
│ ├── training.md
│ ├── tuning.md
│ └── visualization.md
└── utilities
│ ├── connectors
│ ├── brat.md
│ ├── labeltool.md
│ ├── omop.md
│ └── overview.md
│ ├── evaluation.md
│ ├── index.md
│ ├── matchers.md
│ ├── regex.md
│ └── tests
│ ├── blocs.md
│ ├── examples.md
│ └── index.md
├── edsnlp
├── __init__.py
├── conjugator.py
├── connectors
│ ├── __init__.py
│ ├── brat.py
│ ├── labeltool.py
│ └── omop.py
├── core
│ ├── __init__.py
│ ├── pipeline.py
│ ├── registries.py
│ ├── stream.py
│ └── torch_component.py
├── data
│ ├── __init__.py
│ ├── base.py
│ ├── brat.py
│ ├── conll.py
│ ├── converters.py
│ ├── json.py
│ ├── pandas.py
│ ├── parquet.py
│ ├── polars.py
│ ├── spark.py
│ └── standoff.py
├── evaluate.py
├── extensions.py
├── language.py
├── matchers
│ ├── __init__.py
│ ├── phrase.pxd
│ ├── phrase.pyx
│ ├── regex.py
│ ├── simstring.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── offset.py
│ │ └── text.py
├── metrics
│ ├── __init__.py
│ ├── dep_parsing.py
│ ├── ner.py
│ └── span_attributes.py
├── package.py
├── patch_spacy.py
├── pipes
│ ├── __init__.py
│ ├── base.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── contextual_matcher
│ │ │ ├── __init__.py
│ │ │ ├── contextual_matcher.py
│ │ │ ├── factory.py
│ │ │ └── models.py
│ │ ├── endlines
│ │ │ ├── __init__.py
│ │ │ ├── endlines.py
│ │ │ ├── factory.py
│ │ │ ├── functional.py
│ │ │ └── model.py
│ │ ├── matcher
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ └── matcher.py
│ │ ├── normalizer
│ │ │ ├── __init__.py
│ │ │ ├── accents
│ │ │ │ ├── __init__.py
│ │ │ │ ├── accents.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── factory.py
│ │ │ ├── normalizer.py
│ │ │ ├── pollution
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── patterns.py
│ │ │ │ └── pollution.py
│ │ │ ├── quotes
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── patterns.py
│ │ │ │ └── quotes.py
│ │ │ ├── remove_lowercase
│ │ │ │ ├── __init__.py
│ │ │ │ └── factory.py
│ │ │ └── spaces
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ └── spaces.py
│ │ ├── sentences
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── fast_sentences.pxd
│ │ │ ├── fast_sentences.pyx
│ │ │ ├── sentences.py
│ │ │ └── terms.py
│ │ └── terminology
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ └── terminology.py
│ ├── misc
│ │ ├── __init__.py
│ │ ├── consultation_dates
│ │ │ ├── __init__.py
│ │ │ ├── consultation_dates.py
│ │ │ ├── factory.py
│ │ │ └── patterns.py
│ │ ├── dates
│ │ │ ├── __init__.py
│ │ │ ├── dates.py
│ │ │ ├── factory.py
│ │ │ ├── models.py
│ │ │ └── patterns
│ │ │ │ ├── __init__.py
│ │ │ │ ├── absolute.py
│ │ │ │ ├── atomic
│ │ │ │ ├── __init__.py
│ │ │ │ ├── days.py
│ │ │ │ ├── delimiters.py
│ │ │ │ ├── directions.py
│ │ │ │ ├── modes.py
│ │ │ │ ├── months.py
│ │ │ │ ├── numbers.py
│ │ │ │ ├── time.py
│ │ │ │ ├── units.py
│ │ │ │ └── years.py
│ │ │ │ ├── current.py
│ │ │ │ ├── duration.py
│ │ │ │ ├── false_positive.py
│ │ │ │ └── relative.py
│ │ ├── quantities
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── patterns.py
│ │ │ └── quantities.py
│ │ ├── reason
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── patterns.py
│ │ │ └── reason.py
│ │ ├── sections
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── patterns.py
│ │ │ └── sections.py
│ │ ├── split
│ │ │ ├── __init__.py
│ │ │ └── split.py
│ │ └── tables
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── patterns.py
│ │ │ └── tables.py
│ ├── ner
│ │ ├── __init__.py
│ │ ├── adicap
│ │ │ ├── __init__.py
│ │ │ ├── adicap.py
│ │ │ ├── factory.py
│ │ │ ├── models.py
│ │ │ └── patterns.py
│ │ ├── behaviors
│ │ │ ├── __init__.py
│ │ │ ├── alcohol
│ │ │ │ ├── __init__.py
│ │ │ │ ├── alcohol.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ └── tobacco
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── patterns.py
│ │ │ │ └── tobacco.py
│ │ ├── cim10
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ └── patterns.py
│ │ ├── covid
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ └── patterns.py
│ │ ├── disorders
│ │ │ ├── __init__.py
│ │ │ ├── aids
│ │ │ │ ├── __init__.py
│ │ │ │ ├── aids.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── base.py
│ │ │ ├── cerebrovascular_accident
│ │ │ │ ├── __init__.py
│ │ │ │ ├── cerebrovascular_accident.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── ckd
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ckd.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── congestive_heart_failure
│ │ │ │ ├── __init__.py
│ │ │ │ ├── congestive_heart_failure.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── connective_tissue_disease
│ │ │ │ ├── __init__.py
│ │ │ │ ├── connective_tissue_disease.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── copd
│ │ │ │ ├── __init__.py
│ │ │ │ ├── copd.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── dementia
│ │ │ │ ├── __init__.py
│ │ │ │ ├── dementia.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── diabetes
│ │ │ │ ├── __init__.py
│ │ │ │ ├── diabetes.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── hemiplegia
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── hemiplegia.py
│ │ │ │ └── patterns.py
│ │ │ ├── leukemia
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── leukemia.py
│ │ │ │ └── patterns.py
│ │ │ ├── liver_disease
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── liver_disease.py
│ │ │ │ └── patterns.py
│ │ │ ├── lymphoma
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── lymphoma.py
│ │ │ │ └── patterns.py
│ │ │ ├── myocardial_infarction
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── myocardial_infarction.py
│ │ │ │ └── patterns.py
│ │ │ ├── peptic_ulcer_disease
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── patterns.py
│ │ │ │ └── peptic_ulcer_disease.py
│ │ │ ├── peripheral_vascular_disease
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── patterns.py
│ │ │ │ └── peripheral_vascular_disease.py
│ │ │ ├── solid_tumor
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── patterns.py
│ │ │ │ └── solid_tumor.py
│ │ │ └── terms.py
│ │ ├── drugs
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ └── patterns.py
│ │ ├── scores
│ │ │ ├── __init__.py
│ │ │ ├── base_score.py
│ │ │ ├── charlson
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── elston_ellis
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ └── patterns.py
│ │ │ ├── emergency
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ccmu
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── factory.py
│ │ │ │ │ └── patterns.py
│ │ │ │ ├── gemsa
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── factory.py
│ │ │ │ │ └── patterns.py
│ │ │ │ └── priority
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── factory.py
│ │ │ │ │ └── patterns.py
│ │ │ ├── factory.py
│ │ │ └── sofa
│ │ │ │ ├── __init__.py
│ │ │ │ ├── factory.py
│ │ │ │ ├── patterns.py
│ │ │ │ └── sofa.py
│ │ ├── suicide_attempt
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── patterns.py
│ │ │ └── suicide_attempt.py
│ │ ├── tnm
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── model.py
│ │ │ ├── patterns.py
│ │ │ └── tnm.py
│ │ └── umls
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ └── patterns.py
│ ├── qualifiers
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── family
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── family.py
│ │ │ └── patterns.py
│ │ ├── history
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── history.py
│ │ │ └── patterns.py
│ │ ├── hypothesis
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── hypothesis.py
│ │ │ └── patterns.py
│ │ ├── negation
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── negation.py
│ │ │ └── patterns.py
│ │ └── reported_speech
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ ├── patterns.py
│ │ │ └── reported_speech.py
│ ├── terminations.py
│ └── trainable
│ │ ├── __init__.py
│ │ ├── biaffine_dep_parser
│ │ ├── __init__.py
│ │ ├── biaffine_dep_parser.py
│ │ └── factory.py
│ │ ├── embeddings
│ │ ├── __init__.py
│ │ ├── span_pooler
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ └── span_pooler.py
│ │ ├── text_cnn
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ └── text_cnn.py
│ │ ├── transformer
│ │ │ ├── __init__.py
│ │ │ ├── factory.py
│ │ │ └── transformer.py
│ │ └── typing.py
│ │ ├── extractive_qa
│ │ ├── __init__.py
│ │ ├── extractive_qa.py
│ │ └── factory.py
│ │ ├── layers
│ │ ├── __init__.py
│ │ ├── crf.py
│ │ ├── metric.py
│ │ └── text_cnn.py
│ │ ├── ner_crf
│ │ ├── __init__.py
│ │ ├── factory.py
│ │ └── ner_crf.py
│ │ ├── span_classifier
│ │ ├── __init__.py
│ │ ├── factory.py
│ │ └── span_classifier.py
│ │ └── span_linker
│ │ ├── __init__.py
│ │ ├── factory.py
│ │ └── span_linker.py
├── processing
│ ├── __init__.py
│ ├── deprecated_pipe.py
│ ├── multiprocessing.py
│ ├── simple.py
│ └── spark.py
├── reducers.py
├── resources
│ ├── AVC.csv.gz
│ ├── adicap.json.gz
│ ├── cim10.csv.gz
│ ├── drugs.json
│ └── verbs.csv.gz
├── train.py
├── training
│ ├── __init__.py
│ ├── optimizer.py
│ └── trainer.py
├── tune.py
├── utils
│ ├── __init__.py
│ ├── batching.py
│ ├── bindings.py
│ ├── collections.py
│ ├── deprecation.py
│ ├── doc_to_text.py
│ ├── examples.py
│ ├── extensions.py
│ ├── file_system.py
│ ├── filter.py
│ ├── inclusion.py
│ ├── lazy_module.py
│ ├── numbers.py
│ ├── regex.py
│ ├── resources.py
│ ├── span_getters.py
│ ├── spark_dtypes.py
│ ├── stream_sentinels.py
│ ├── torch.py
│ └── typing.py
└── viz
│ └── __init__.py
├── mkdocs.yml
├── notebooks
├── README.md
├── connectors
│ ├── context.py
│ └── omop.md
├── context.py
├── dates
│ ├── context.py
│ ├── prototype.md
│ └── user-guide.md
├── endlines
│ └── endlines-example.md
├── example.txt
├── normalizer
│ ├── context.py
│ ├── profiling.md
│ └── prototype.md
├── pipeline.md
├── premier-pipeline.md
├── sections
│ ├── annotated_sections.csv
│ ├── context.py
│ ├── section-dataset.md
│ ├── sections.xlsx
│ └── testing.md
├── sentences
│ ├── context.py
│ └── sentences.md
├── tnm
│ └── prototype.md
├── tokenizer
│ ├── context.py
│ └── tokenizer.md
└── utilities
│ ├── brat.md
│ └── context.py
├── pyproject.toml
├── scripts
├── adicap.py
├── cim10.py
├── conjugate_verbs.py
├── context.py
└── serve.py
├── setup.py
└── tests
├── conftest.py
├── connectors
├── test_labeltool.py
└── test_omop.py
├── data
├── test_conll.py
├── test_converters.py
├── test_json.py
├── test_pandas.py
├── test_parquet.py
├── test_polars.py
├── test_spark.py
├── test_standoff.py
└── test_stream.py
├── extract_docs_code.py
├── helpers.py
├── matchers
├── test_phrase.py
├── test_regex.py
└── test_simstring.py
├── pipelines
├── core
│ ├── test_contextual_matcher.py
│ ├── test_endlines.py
│ ├── test_matcher.py
│ ├── test_normalisation.py
│ ├── test_sentences.py
│ └── test_terminology.py
├── misc
│ ├── test_consultation_date.py
│ ├── test_consultation_date_town.py
│ ├── test_dates.py
│ ├── test_quantities.py
│ ├── test_reason.py
│ ├── test_sections.py
│ ├── test_split.py
│ └── test_tables.py
├── ner
│ ├── disorders
│ │ ├── AIDS.py
│ │ ├── CKD.py
│ │ ├── COPD.py
│ │ ├── alcohol.py
│ │ ├── cerebrovascular_accident.py
│ │ ├── congestive_heart_failure.py
│ │ ├── connective_tissue_disease.py
│ │ ├── dementia.py
│ │ ├── diabetes.py
│ │ ├── hemiplegia.py
│ │ ├── leukemia.py
│ │ ├── liver_disease.py
│ │ ├── lymphoma.py
│ │ ├── myocardial_infarction.py
│ │ ├── peptic_ulcer_disease.py
│ │ ├── peripheral_vascular_disease.py
│ │ ├── solid_tumor.py
│ │ ├── test_all.py
│ │ └── tobacco.py
│ ├── test_adicap.py
│ ├── test_adicap_decoder.py
│ ├── test_cim10.py
│ ├── test_covid.py
│ ├── test_drugs.py
│ ├── test_score.py
│ ├── test_suicide_attempt.py
│ ├── test_tnm.py
│ ├── test_umls.py
│ └── test_value_extension.py
├── qualifiers
│ ├── conftest.py
│ ├── test_family.py
│ ├── test_history.py
│ ├── test_hypothesis.py
│ ├── test_negation.py
│ └── test_reported_speech.py
├── test_pipelines.py
└── trainable
│ ├── test_extractive_qa.py
│ ├── test_ner.py
│ ├── test_span_linker.py
│ ├── test_span_qualifier.py
│ └── test_transformer.py
├── processing
├── mp_simple_pipe.py
├── test_backends.py
└── test_processing.py
├── readme.md
├── resources
├── brat_data
│ └── subfolder
│ │ ├── doc-1.ann
│ │ ├── doc-1.txt
│ │ ├── doc-2.txt
│ │ └── doc-3.txt
├── docs.jsonl
└── docs.parquet
├── test_conjugator.py
├── test_docs.py
├── test_entrypoints.py
├── test_language.py
├── test_pipeline.py
├── test_reducers.py
├── test_scorers.py
├── test_span_args.py
├── training
├── dataset.jsonl
├── dataset
│ ├── annotation.conf
│ ├── sample-1.ann
│ ├── sample-1.txt
│ ├── sample-2.ann
│ └── sample-2.txt
├── dep_parser_config.yml
├── ner_qlf_diff_bert_config.yml
├── ner_qlf_same_bert_config.yml
├── qlf_config.yml
├── rhapsodie_sample.conllu
├── test_optimizer.py
└── test_train.py
├── tuning
├── config.cfg
├── config.yml
├── test_checkpoints
│ ├── single_phase_gpu_hour
│ │ └── study_.pkl
│ ├── single_phase_n_trials
│ │ └── study_.pkl
│ ├── two_phase_gpu_hour
│ │ ├── config.yml
│ │ ├── results_summary.txt
│ │ └── study_.pkl
│ └── two_phase_n_trials
│ │ ├── config.yml
│ │ ├── results_summary.txt
│ │ └── study_.pkl
├── test_end_to_end.py
├── test_tuning.py
└── test_update_config.py
└── utils
├── test_batching.py
├── test_bindings.py
├── test_collections.py
├── test_examples.py
├── test_filter.py
├── test_package.py
├── test_span_getters.py
├── test_spark_dtypes.py
└── test_typing.py
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | exclude = .git,__pycache__,__init__.py,.mypy_cache,.pytest_cache,.venv,build
4 | per-file-ignores = __init__.py:F401
5 | ignore = W503, E203
6 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "Bug Report"
3 | about: Use this template if you came across a bug or unexpected behaviour differing from the docs.
4 | ---
5 |
6 |
7 |
8 | ## Description
9 |
10 |
11 |
12 | ## How to reproduce the bug
13 |
14 |
15 |
16 | ```python
17 | import spacy
18 |
19 | nlp = spacy.blank("fr")
20 | nlp.add_pipe("eds.normalizer")
21 |
22 | # ...
23 | ```
24 |
25 | ## Your Environment
26 |
27 |
28 |
29 | - Operating System:
30 | - Python Version Used:
31 | - spaCy Version Used:
32 | - EDS-NLP Version Used:
33 | - Environment Information:
34 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "Feature request"
3 | about: Use this template if you'd like EDS-NLP to add a new feature.
4 | title: "Feature request: [feature]"
5 | ---
6 |
7 | ## Feature type
8 |
9 |
10 |
11 | ## Description
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.github/copilot-instructions.md:
--------------------------------------------------------------------------------
1 | First, when starting to develop, install the project with
2 |
3 | ```bash
4 | pip install -e ".[dev]"
5 | pre-commit install
6 | ```
7 |
8 | Then, when fixing an issue, add a new test to reproduce it. If the issue concerns an existing
9 | component, add the test to the corresponding test file, or create a new test file (only when needed, this should not be the most common scenario).
10 |
11 | Create a new branch (or checkout the auto-created branch for the issue).
12 |
13 | Then update the codebase to fix the issue, and run the new test to check that everything is working as expected.
14 |
15 | Update the changelog.md file with a concise explanation of the change/fix/new feature.
16 |
17 | Before commiting, stash, checkout master and pull to ensure you have the latest version of master, then checkout the branch you were working on and rebase it on top of master.
18 | If the rebase has changed something to the codebase, rerun the edited tests to ensure everything is still working as expected.
19 |
20 | Finally, run git log to look at the commit messages and get an idea of what the commit messages should look like (concise, neutral, conventional commits messages).
21 |
22 | ```bash
23 | git log --no-pager
24 | ```
25 |
26 | Finally commit the changes.
27 |
28 | !!! note
29 |
30 | Whenever you run a command, ensure that you do it without making it prompt the user for input (ie, use --no-edit in git rebase, --no-pager, --yes, etc. when possible).
31 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Description
4 |
5 |
6 |
7 | ## Checklist
8 |
9 |
10 |
11 | - [ ] If this PR is a bug fix, the bug is documented in the test suite.
12 | - [ ] Changes were documented in the changelog (pending section).
13 | - [ ] If necessary, changes were made to the documentation (eg new pipeline).
14 |
--------------------------------------------------------------------------------
/.github/workflows/delete-preview-docs.yml:
--------------------------------------------------------------------------------
1 | name: Delete preview docs
2 |
3 | on:
4 | workflow_dispatch:
5 | delete:
6 |
7 | jobs:
8 | delete:
9 | name: Delete Vercel Project
10 | if: github.event.ref_type == 'branch'
11 | runs-on: ubuntu-latest
12 | steps:
13 | - run: |
14 | # Set up Vercel
15 | npm install --global vercel@latest
16 | # Pull Vercel environment
17 | vercel pull --yes --environment=preview --token=${{ secrets.VERCEL_TOKEN }}
18 | # Delete vercel project linked to this branch
19 | vercel remove edsnlp-${{ github.event.ref }} --yes --token=${{ secrets.VERCEL_TOKEN }}
20 |
--------------------------------------------------------------------------------
/.github/workflows/documentation.yml:
--------------------------------------------------------------------------------
1 | name: Documentation
2 |
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches: [master, dev]
7 |
8 | env:
9 | BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
10 | # UV_INDEX_STRATEGY: "unsafe-first-match"
11 | # UV_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
12 | PIP_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
13 |
14 | jobs:
15 | Documentation:
16 | runs-on: ubuntu-22.04
17 | steps:
18 | - uses: actions/checkout@v2
19 |
20 | - uses: actions/setup-python@v4
21 | with:
22 | python-version: "3.9"
23 | cache: 'pip'
24 |
25 | - run: echo WEEK=$(date +%V) >>$GITHUB_ENV
26 | shell: bash
27 |
28 | # - uses: hynek/setup-cached-uv@v1
29 | # with:
30 | # cache-suffix: -docs-${{ matrix.python-version }}-${{ env.WEEK }}
31 |
32 | - name: Install dependencies
33 | run: |
34 | pip install '.[docs]'
35 | # uv venv
36 | # uv pip install '.[docs]'
37 |
38 | - name: Set up Git
39 | run: |
40 | git config user.name ${{ github.actor }}
41 | git config user.email ${{ github.actor }}@users.noreply.github.com
42 |
43 | - name: Build documentation
44 | run: |
45 | git fetch origin gh-pages
46 | mike delete $BRANCH_NAME
47 | mike deploy --push $BRANCH_NAME
48 | # source .venv/bin/activate
49 |
--------------------------------------------------------------------------------
/.github/workflows/test-build.yml:
--------------------------------------------------------------------------------
1 | # This tries to build packages, and tests the packages.
2 | # It runs on every push to branches following the pattern v*.*.*.
3 | # It makes sure that everything will run when the version is released.
4 |
5 | name: Test Build
6 |
7 |
8 | on:
9 | workflow_dispatch:
10 | pull_request:
11 | branches:
12 | - v*.*.*
13 | - build-*
14 |
15 | jobs:
16 | build_wheels:
17 | name: Build wheels on ${{ matrix.os }}
18 | runs-on: ${{ matrix.os }}
19 | strategy:
20 | matrix:
21 | os: [ubuntu-22.04, windows-latest, macos-latest]
22 |
23 | steps:
24 | - uses: actions/checkout@v2
25 |
26 | - name: Build wheels
27 | # 2.4 is too low (can't build for macos, 2.16 is too high (OpenSSL issues)
28 | uses: pypa/cibuildwheel@v2.16.5
29 | env:
30 | CIBW_ARCHS_MACOS: "x86_64 arm64"
31 | CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu
32 |
33 |
34 | build_sdist:
35 | name: Build source distribution
36 | runs-on: ubuntu-22.04
37 | steps:
38 | - uses: actions/checkout@v2
39 |
40 | - name: Build sdist
41 | run: pipx run build --sdist
42 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python
2 | __pycache__/
3 |
4 | # Distribution / packaging
5 | init
6 | .Python
7 | env/
8 | venv/
9 | build/
10 | develop-eggs/
11 | dist/
12 | downloads/
13 | eggs/
14 | .eggs/
15 | lib/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | site/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | *.cpp
25 | *.so
26 | *.c
27 | public/
28 |
29 | # Unit test / coverage reports
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .coverage.*
34 | .cache
35 | nosetests.xml
36 | coverage.xml
37 | *,cover
38 | .hypothesis/
39 | .pytest_cache/
40 |
41 | # Documentation
42 | _build/
43 |
44 | # Notebooks
45 | .ipynb_checkpoints/
46 | *.ipynb
47 |
48 | # Data
49 | *.csv
50 | *.pickle
51 | *.txt
52 | *.xls
53 | *.xlsx
54 | *.tar.gz
55 | *.tsv
56 | *.ann
57 |
58 | # Editors
59 | .idea
60 | .vscode
61 |
62 | # Files
63 | .DS_Store
64 |
65 | # Environment
66 | .venv
67 |
68 | # Test resources
69 | !tests/resources/**/*
70 |
71 | # Generated docs
72 | docs/reference
73 | docs/changelog.md
74 | docs/contributing.md
75 | .vercel
76 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | repos:
4 | - repo: https://github.com/pre-commit/pre-commit-hooks
5 | rev: v3.2.0
6 | hooks:
7 | - id: trailing-whitespace
8 | exclude: |
9 | (?x)^(
10 | tests/resources/.*|
11 | edsnlp/resources/.*
12 | )$
13 | - id: no-commit-to-branch
14 | - id: end-of-file-fixer
15 | - id: check-yaml
16 | args: ["--unsafe"]
17 | - id: check-toml
18 | - id: check-json
19 | - id: check-symlinks
20 | - id: check-added-large-files
21 | - id: detect-private-key
22 | # ruff
23 | - repo: https://github.com/charliermarsh/ruff-pre-commit
24 | # Ruff version.
25 | rev: 'v0.9.6'
26 | hooks:
27 | - id: ruff
28 | args: ['--config', 'pyproject.toml', '--fix', '--show-fixes']
29 | - id: ruff-format
30 | args: ['--config', 'pyproject.toml', '--diff']
31 | - id: ruff-format
32 | args: ['--config', 'pyproject.toml']
33 | - repo: https://github.com/asottile/blacken-docs
34 | rev: v1.10.0
35 | hooks:
36 | - id: blacken-docs
37 | additional_dependencies: [black==20.8b1]
38 | exclude: notebooks/
39 | - repo: https://github.com/econchick/interrogate
40 | rev: 237be78
41 | hooks:
42 | - id: interrogate
43 | args: ["--config=pyproject.toml"]
44 | pass_filenames: false
45 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | # This CITATION.cff file was generated with cffinit.
2 | # Visit https://bit.ly/cffinit to generate yours today!
3 |
4 | cff-version: 1.2.0
5 | title: >-
6 | EDS-NLP: efficient information extraction from
7 | French clinical notes
8 | message: If you use EDS-NLP, please cite us as below.
9 | type: software
10 | authors:
11 | - given-names: Perceval
12 | family-names: Wajsburt
13 | affiliation: Assistance Publique – Hôpitaux de Paris
14 | - given-names: Thomas
15 | family-names: Petit-Jean
16 | affiliation: Assistance Publique – Hôpitaux de Paris
17 | - given-names: Basile
18 | family-names: Dura
19 | orcid: "https://orcid.org/0000-0002-8315-4050"
20 | affiliation: Assistance Publique – Hôpitaux de Paris
21 | - given-names: Ariel
22 | family-names: Cohen
23 | orcid: "https://orcid.org/0000-0002-2550-9773"
24 | affiliation: Assistance Publique – Hôpitaux de Paris
25 | - given-names: Charline
26 | family-names: Jean
27 | affiliation: Assistance Publique – Hôpitaux de Paris
28 | - given-names: Romain
29 | family-names: Bey
30 | affiliation: Assistance Publique – Hôpitaux de Paris
31 | repository-code: "https://github.com/aphp/edsnlp"
32 | url: "http://aphp.github.io/edsnlp"
33 | abstract: >-
34 | EDS-NLP provides a set of spaCy components that are
35 | used to extract information from clinical notes
36 | written in French.
37 | keywords:
38 | - NLP
39 | - clinical
40 | license: BSD-3-Clause
41 | year: 2022
42 | doi: 10.5281/zenodo.6424993
43 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2021 Assistance Publique - Hôpitaux de Paris
2 |
3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
4 |
5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
6 |
7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8 |
9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 |
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | .ONESHELL:
3 | SHELL:=/bin/bash
4 |
5 | .PHONY: create-env install documentation test
6 |
7 | default:
8 | @echo "Call a specific subcommand: create-env,install,documentation,test"
9 |
10 | .venv:
11 | python -m venv .venv
12 |
13 | create-env: .venv
14 |
15 | install : .venv
16 | . .venv/bin/activate
17 | pip install -r '.[dev,setup]'.txt
18 | python scripts/conjugate_verbs.py
19 | pip install -e .
20 | pre-commit install
21 |
22 | documentation: .venv
23 | . .venv/bin/activate
24 | pip install -e '.[docs]'
25 | mkdocs serve
26 |
27 | test: .venv
28 | . .venv/bin/activate
29 | python -m pytest
30 |
--------------------------------------------------------------------------------
/demo/requirements.txt:
--------------------------------------------------------------------------------
1 | pydantic-core==2.14.4
2 | git+https://github.com/aphp/edsnlp.git
3 | streamlit
4 |
--------------------------------------------------------------------------------
/docs/advanced-tutorials/index.md:
--------------------------------------------------------------------------------
1 | # Advanced use cases
2 |
3 | In this section, we review a few advanced use cases:
4 |
5 | - Adding pre-computed word vectors to spaCy
6 | - Deploying your spaCy pipeline as an API
7 | - Creating your own component
8 |
--------------------------------------------------------------------------------
/docs/assets/fragments/aids-examples.md:
--------------------------------------------------------------------------------
1 | === "SIDA"
2 | ```python
3 | text = "Patient atteint du VIH au stade SIDA."
4 | doc = nlp(text)
5 | spans = doc.spans["aids"]
6 |
7 | spans
8 | # Out: [VIH au stade SIDA]
9 | ```
10 |
11 |
12 |
13 | === "VIH"
14 | ```python
15 | text = "Patient atteint du VIH."
16 | doc = nlp(text)
17 | spans = doc.spans["aids"]
18 |
19 | spans
20 | # Out: []
21 | ```
22 |
23 |
24 |
25 | === "Coinfection"
26 | ```python
27 | text = "Il y a un VIH avec coinfection pneumocystose"
28 | doc = nlp(text)
29 | spans = doc.spans["aids"]
30 |
31 | spans
32 | # Out: [VIH]
33 |
34 | span = spans[0]
35 |
36 | span._.assigned
37 | # Out: {'opportunist': [coinfection, pneumocystose]}
38 | ```
39 |
40 |
41 |
42 | === "VIH stade SIDA"
43 | ```python
44 | text = "Présence d'un VIH stade C"
45 | doc = nlp(text)
46 | spans = doc.spans["aids"]
47 |
48 | spans
49 | # Out: [VIH]
50 |
51 | span = spans[0]
52 |
53 | span._.assigned
54 | # Out: {'stage': [C]}
55 | ```
56 |
--------------------------------------------------------------------------------
/docs/assets/fragments/congestive-heart-failure-examples.md:
--------------------------------------------------------------------------------
1 |
2 | === "1"
3 | ```python
4 | text = "Présence d'un oedème pulmonaire"
5 | doc = nlp(text)
6 | spans = doc.spans["congestive_heart_failure"]
7 |
8 | spans
9 | # Out: [oedème pulmonaire]
10 | ```
11 |
12 | === "2"
13 | ```python
14 | text = "Le patient est équipé d'un pace-maker"
15 | doc = nlp(text)
16 | spans = doc.spans["congestive_heart_failure"]
17 |
18 | spans
19 | # Out: [pace-maker]
20 | ```
21 |
22 | === "3"
23 | ```python
24 | text = "Un cardiopathie non décompensée"
25 | doc = nlp(text)
26 | spans = doc.spans["congestive_heart_failure"]
27 |
28 | spans
29 | # Out: []
30 | ```
31 |
32 | === "4"
33 | ```python
34 | text = "Insuffisance cardiaque"
35 | doc = nlp(text)
36 | spans = doc.spans["congestive_heart_failure"]
37 |
38 | spans
39 | # Out: [Insuffisance cardiaque]
40 | ```
41 |
42 | === "5"
43 | ```python
44 | text = "Insuffisance cardiaque minime"
45 | doc = nlp(text)
46 | spans = doc.spans["congestive_heart_failure"]
47 |
48 | spans
49 | # Out: []
50 | ```
51 |
--------------------------------------------------------------------------------
/docs/assets/fragments/connective-tissue-disease-examples.md:
--------------------------------------------------------------------------------
1 | === "1"
2 | ```python
3 | text = "Présence d'une sclérodermie."
4 | doc = nlp(text)
5 | spans = doc.spans["connective_tissue_disease"]
6 |
7 | spans
8 | # Out: [sclérodermie]
9 | ```
10 |
11 |
12 |
13 | === "2"
14 | ```python
15 | text = "Patient atteint d'un lupus."
16 | doc = nlp(text)
17 | spans = doc.spans["connective_tissue_disease"]
18 |
19 | spans
20 | # Out: [lupus]
21 | ```
22 |
23 |
24 |
25 | === "3"
26 | ```python
27 | text = "Présence d'anticoagulants lupiques,"
28 | doc = nlp(text)
29 | spans = doc.spans["connective_tissue_disease"]
30 |
31 | spans
32 | # Out: []
33 | ```
34 |
35 |
36 |
37 | === "4"
38 | ```python
39 | text = "Il y a une MICI."
40 | doc = nlp(text)
41 | spans = doc.spans["connective_tissue_disease"]
42 |
43 | spans
44 | # Out: [MICI]
45 | ```
46 |
47 |
48 |
49 | === "5"
50 | ```python
51 | text = "Syndrome de Raynaud"
52 | doc = nlp(text)
53 | spans = doc.spans["connective_tissue_disease"]
54 |
55 | spans
56 | # Out: [Raynaud]
57 | ```
58 |
--------------------------------------------------------------------------------
/docs/assets/fragments/copd-examples.md:
--------------------------------------------------------------------------------
1 | === "1"
2 | ```python
3 | text = "Une fibrose interstitielle diffuse idiopathique"
4 | doc = nlp(text)
5 | spans = doc.spans["copd"]
6 |
7 | spans
8 | # Out: [fibrose interstitielle diffuse idiopathique]
9 | ```
10 |
11 |
12 |
13 | === "2"
14 | ```python
15 | text = "Patient atteint de pneumoconiose"
16 | doc = nlp(text)
17 | spans = doc.spans["copd"]
18 |
19 | spans
20 | # Out: [pneumoconiose]
21 | ```
22 |
23 |
24 |
25 | === "3"
26 | ```python
27 | text = "Présence d'une HTAP."
28 | doc = nlp(text)
29 | spans = doc.spans["copd"]
30 |
31 | spans
32 | # Out: [HTAP]
33 | ```
34 |
35 |
36 |
37 | === "4"
38 | ```python
39 | text = "On voit une hypertension pulmonaire minime"
40 | doc = nlp(text)
41 | spans = doc.spans["copd"]
42 |
43 | spans
44 | # Out: []
45 | ```
46 |
47 |
48 |
49 | === "5"
50 | ```python
51 | text = "La patiente a été mis sous oxygénorequérance"
52 | doc = nlp(text)
53 | spans = doc.spans["copd"]
54 |
55 | spans
56 | # Out: []
57 | ```
58 |
59 |
60 |
61 | === "6"
62 | ```python
63 | text = "La patiente est sous oxygénorequérance au long cours"
64 | doc = nlp(text)
65 | spans = doc.spans["copd"]
66 |
67 | spans
68 | # Out: [oxygénorequérance au long cours]
69 |
70 | span = spans[0]
71 |
72 | span._.assigned
73 | # Out: {'long': [long cours]}
74 | ```
75 |
--------------------------------------------------------------------------------
/docs/assets/fragments/dementia-examples.md:
--------------------------------------------------------------------------------
1 | === "1"
2 | ```python
3 | text = "D'importants déficits cognitifs"
4 | doc = nlp(text)
5 | spans = doc.spans["dementia"]
6 |
7 | spans
8 | # Out: [déficits cognitifs]
9 | ```
10 |
11 |
12 |
13 | === "2"
14 | ```python
15 | text = "Patient atteint de démence"
16 | doc = nlp(text)
17 | spans = doc.spans["dementia"]
18 |
19 | spans
20 | # Out: [démence]
21 | ```
22 |
23 |
24 |
25 | === "3"
26 | ```python
27 | text = "On retrouve des anti-SLA"
28 | doc = nlp(text)
29 | spans = doc.spans["dementia"]
30 |
31 | spans
32 | # Out: []
33 | ```
34 |
35 |
36 |
37 | === "4"
38 | ```python
39 | text = "Une maladie de Charcot"
40 | doc = nlp(text)
41 | spans = doc.spans["dementia"]
42 |
43 | spans
44 | # Out: [maladie de Charcot]
45 | ```
46 |
--------------------------------------------------------------------------------
/docs/assets/fragments/hemiplegia-examples.md:
--------------------------------------------------------------------------------
1 | === "1"
2 | ```python
3 | text = "Patient hémiplégique"
4 | doc = nlp(text)
5 | spans = doc.spans["hemiplegia"]
6 |
7 | spans
8 | # Out: [hémiplégique]
9 | ```
10 |
11 |
12 |
13 | === "2"
14 | ```python
15 | text = "Paralysie des membres inférieurs"
16 | doc = nlp(text)
17 | spans = doc.spans["hemiplegia"]
18 |
19 | spans
20 | # Out: [Paralysie des membres]
21 | ```
22 |
23 |
24 |
25 | === "3"
26 | ```python
27 | text = "Patient en LIS"
28 | doc = nlp(text)
29 | spans = doc.spans["hemiplegia"]
30 |
31 | spans
32 | # Out: [LIS]
33 | ```
34 |
--------------------------------------------------------------------------------
/docs/assets/fragments/leukemia-examples.md:
--------------------------------------------------------------------------------
1 | === "1"
2 | ```python
3 | text = "Sydrome myéloprolifératif"
4 | doc = nlp(text)
5 | spans = doc.spans["leukemia"]
6 |
7 | spans
8 | # Out: [myéloprolifératif]
9 | ```
10 |
11 |
12 |
13 | === "2"
14 | ```python
15 | text = "Sydrome myéloprolifératif bénin"
16 | doc = nlp(text)
17 | spans = doc.spans["leukemia"]
18 |
19 | spans
20 | # Out: []
21 | ```
22 |
23 |
24 |
25 | === "3"
26 | ```python
27 | text = "Patient atteint d'une LAM"
28 | doc = nlp(text)
29 | spans = doc.spans["leukemia"]
30 |
31 | spans
32 | # Out: [LAM]
33 | ```
34 |
35 |
36 |
37 | === "4"
38 | ```python
39 | text = "Une maladie de Vaquez"
40 | doc = nlp(text)
41 | spans = doc.spans["leukemia"]
42 |
43 | spans
44 | # Out: [Vaquez]
45 | ```
46 |
--------------------------------------------------------------------------------
/docs/assets/fragments/liver-disease-examples.md:
--------------------------------------------------------------------------------
1 | === "1"
2 | ```python
3 | text = "Il y a une fibrose hépatique"
4 | doc = nlp(text)
5 | spans = doc.spans["liver_disease"]
6 |
7 | spans
8 | # Out: [fibrose hépatique]
9 | ```
10 |
11 |
12 |
13 | === "2"
14 | ```python
15 | text = "Une hépatite B chronique"
16 | doc = nlp(text)
17 | spans = doc.spans["liver_disease"]
18 |
19 | spans
20 | # Out: [hépatite B chronique]
21 | ```
22 |
23 |
24 |
25 | === "3"
26 | ```python
27 | text = "Le patient consulte pour une cirrhose"
28 | doc = nlp(text)
29 | spans = doc.spans["liver_disease"]
30 |
31 | spans
32 | # Out: [cirrhose]
33 |
34 | span = spans[0]
35 |
36 | span._.detailed_status
37 | # Out: MODERATE_TO_SEVERE
38 | ```
39 |
40 |
41 |
42 | === "4"
43 | ```python
44 | text = "Greffe hépatique."
45 | doc = nlp(text)
46 | spans = doc.spans["liver_disease"]
47 |
48 | spans
49 | # Out: [Greffe hépatique]
50 |
51 | span = spans[0]
52 |
53 | span._.detailed_status
54 | # Out: MODERATE_TO_SEVERE
55 | ```
56 |
--------------------------------------------------------------------------------
/docs/assets/fragments/lymphoma-examples.md:
--------------------------------------------------------------------------------
1 | === "1"
2 | ```python
3 | text = "Un lymphome de Hodgkin."
4 | doc = nlp(text)
5 | spans = doc.spans["lymphoma"]
6 |
7 | spans
8 | # Out: [lymphome de Hodgkin]
9 | ```
10 |
11 |
12 |
13 | === "2"
14 | ```python
15 | text = "Atteint d'un Waldenstörm"
16 | doc = nlp(text)
17 | spans = doc.spans["lymphoma"]
18 |
19 | spans
20 | # Out: [Waldenstörm]
21 | ```
22 |
23 |
24 |
25 | === "3"
26 | ```python
27 | text = "Un LAGC"
28 | doc = nlp(text)
29 | spans = doc.spans["lymphoma"]
30 |
31 | spans
32 | # Out: [LAGC]
33 | ```
34 |
35 |
36 |
37 | === "4"
38 | ```python
39 | text = "anti LAGC: 10^4/mL"
40 | doc = nlp(text)
41 | spans = doc.spans["lymphoma"]
42 |
43 | spans
44 | # Out: []
45 | ```
46 |
--------------------------------------------------------------------------------
/docs/assets/fragments/myocardial-infarction-examples.md:
--------------------------------------------------------------------------------
1 | === "1"
2 | ```python
3 | text = "Une cardiopathie ischémique"
4 | doc = nlp(text)
5 | spans = doc.spans["myocardial_infarction"]
6 |
7 | spans
8 | # Out: [cardiopathie ischémique]
9 | ```
10 |
11 |
12 |
13 | === "2"
14 | ```python
15 | text = "Une cardiopathie non-ischémique"
16 | doc = nlp(text)
17 | spans = doc.spans["myocardial_infarction"]
18 |
19 | spans
20 | # Out: []
21 | ```
22 |
23 |
24 |
25 | === "3"
26 | ```python
27 | text = "Présence d'un stent sur la marginale"
28 | doc = nlp(text)
29 | spans = doc.spans["myocardial_infarction"]
30 |
31 | spans
32 | # Out: [stent sur la marginale]
33 |
34 | span = spans[0]
35 |
36 | span._.assigned
37 | # Out: {'heart_localized': [marginale]}
38 | ```
39 |
40 |
41 |
42 | === "4"
43 | ```python
44 | text = "Présence d'un stent périphérique"
45 | doc = nlp(text)
46 | spans = doc.spans["myocardial_infarction"]
47 |
48 | spans
49 | # Out: []
50 | ```
51 |
52 |
53 |
54 | === "5"
55 | ```python
56 | text = "infarctus du myocarde"
57 | doc = nlp(text)
58 | spans = doc.spans["myocardial_infarction"]
59 |
60 | spans
61 | # Out: [infarctus du myocarde]
62 |
63 | span = spans[0]
64 |
65 | span._.assigned
66 | # Out: {'heart_localized': [myocarde]}
67 | ```
68 |
--------------------------------------------------------------------------------
/docs/assets/fragments/peptic-ulcer-disease-examples.md:
--------------------------------------------------------------------------------
1 | === "1"
2 | ```python
3 | text = "Beaucoup d'ulcères gastriques"
4 | doc = nlp(text)
5 | spans = doc.spans["peptic_ulcer_disease"]
6 |
7 | spans
8 | # Out: [ulcères gastriques]
9 | ```
10 |
11 |
12 |
13 | === "2"
14 | ```python
15 | text = "Présence d'UGD"
16 | doc = nlp(text)
17 | spans = doc.spans["peptic_ulcer_disease"]
18 |
19 | spans
20 | # Out: [UGD]
21 | ```
22 |
23 |
24 |
25 | === "3"
26 | ```python
27 | text = "La patient à des ulcères"
28 | doc = nlp(text)
29 | spans = doc.spans["peptic_ulcer_disease"]
30 |
31 | spans
32 | # Out: []
33 | ```
34 |
35 |
36 |
37 | === "4"
38 | ```python
39 | text = "Au niveau gastrique: blabla blabla blabla blabla blabla quelques ulcères"
40 | doc = nlp(text)
41 | spans = doc.spans["peptic_ulcer_disease"]
42 |
43 | spans
44 | # Out: [gastrique: blabla blabla blabla blabla blabla quelques ulcères]
45 |
46 | span = spans[0]
47 |
48 | span._.assigned
49 | # Out: {'is_peptic': [gastrique]}
50 | ```
51 |
--------------------------------------------------------------------------------
/docs/assets/images/class_span_linker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/docs/assets/images/class_span_linker.png
--------------------------------------------------------------------------------
/docs/assets/images/hybrid-pipeline-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/docs/assets/images/hybrid-pipeline-example.png
--------------------------------------------------------------------------------
/docs/assets/images/model-parallelism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/docs/assets/images/model-parallelism.png
--------------------------------------------------------------------------------
/docs/assets/images/multiprocessing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/docs/assets/images/multiprocessing.png
--------------------------------------------------------------------------------
/docs/assets/images/sharing-components.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/docs/assets/images/sharing-components.png
--------------------------------------------------------------------------------
/docs/assets/images/synonym_span_linker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/docs/assets/images/synonym_span_linker.png
--------------------------------------------------------------------------------
/docs/assets/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block announce %}
4 | Check out the new Model Training tutorial !
5 | {% endblock %}
6 |
--------------------------------------------------------------------------------
/docs/assets/templates/python/material/docstring/examples.html:
--------------------------------------------------------------------------------
1 | {{ "# Examples\n"|convert_markdown(heading_level, html_id) }}
2 | {% for section_type, sub_section in section.value %}
3 | {% if section_type.value == "text" %}
4 | {{ sub_section|convert_markdown(heading_level, html_id) }}
5 | {% elif section_type.value == "examples" %}
6 | {{ sub_section|convert_markdown(heading_level, html_id) }}
7 | {% endif %}
8 | {% endfor %}
9 |
--------------------------------------------------------------------------------
/docs/data/json.md:
--------------------------------------------------------------------------------
1 | # JSON
2 |
3 | ??? abstract "TLDR"
4 |
5 | ```{ .python .no-check }
6 | import edsnlp
7 |
8 | stream = edsnlp.data.read_json(path, converter="omop")
9 | stream = stream.map_pipeline(nlp)
10 | res = stream.to_json(path, converter="omop")
11 | # or equivalently
12 | edsnlp.data.to_json(stream, path, converter="omop")
13 | ```
14 |
15 | We provide methods to read and write documents (raw or annotated) from and to json files.
16 |
17 | As an example, imagine that we have the following document that uses the OMOP schema
18 |
19 | ```{ title="data.jsonl" }
20 | { "note_id": 0, "note_text": "Le patient ...", "note_datetime": "2021-10-23", "entities": [...] }
21 | { "note_id": 1, "note_text": "Autre doc ...", "note_datetime": "2022-12-24", "entities": [] }
22 | ...
23 | ```
24 |
25 | You could also have multiple `.json` files in a directory, the reader will read them all.
26 |
27 | ## Reading JSON files {: #edsnlp.data.json.read_json }
28 |
29 | ::: edsnlp.data.json.read_json
30 | options:
31 | heading_level: 3
32 | show_source: false
33 | show_toc: false
34 | show_bases: false
35 |
36 | ## Writing JSON files {: #edsnlp.data.json.write_json }
37 |
38 | ::: edsnlp.data.json.write_json
39 | options:
40 | heading_level: 3
41 | show_source: false
42 | show_toc: false
43 | show_bases: false
44 |
--------------------------------------------------------------------------------
/docs/data/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/docs/data/overview.png
--------------------------------------------------------------------------------
/docs/data/polars.md:
--------------------------------------------------------------------------------
1 | # Polars
2 |
3 | ??? abstract "TLDR"
4 |
5 | ```{ .python .no-check }
6 | import edsnlp
7 |
8 | stream = edsnlp.data.from_polars(df, converter="omop")
9 | stream = stream.map_pipeline(nlp)
10 | res = stream.to_polars(converter="omop")
11 | # or equivalently
12 | edsnlp.data.to_polars(stream, converter="omop")
13 | ```
14 |
15 | We provide methods to read and write documents (raw or annotated) from and to Polars DataFrames.
16 |
17 | As an example, imagine that we have the following OMOP dataframe (we'll name it `note_df`)
18 |
19 | | note_id | note_text | note_datetime |
20 | |--------:|:----------------------------------------------|:--------------|
21 | | 0 | Le patient est admis pour une pneumopathie... | 2021-10-23 |
22 |
23 | ## Reading from a Polars Dataframe {: #edsnlp.data.polars.from_polars }
24 |
25 | ::: edsnlp.data.polars.from_polars
26 | options:
27 | heading_level: 3
28 | show_source: false
29 | show_toc: false
30 | show_bases: false
31 |
32 |
33 | ## Writing to a Polars DataFrame {: #edsnlp.data.polars.to_polars }
34 |
35 | ::: edsnlp.data.polars.to_polars
36 | options:
37 | heading_level: 3
38 | show_source: false
39 | show_toc: false
40 | show_bases: false
41 |
--------------------------------------------------------------------------------
/docs/data/standoff.md:
--------------------------------------------------------------------------------
1 | # BRAT and Standoff
2 |
3 | ??? abstract "TLDR"
4 |
5 | ```{ .python .no-check }
6 | import edsnlp
7 |
8 | stream = edsnlp.data.read_standoff(path)
9 | stream = stream.map_pipeline(nlp)
10 | res = stream.write_standoff(path)
11 | # or equivalently
12 | edsnlp.data.write_standoff(stream, path)
13 | ```
14 |
15 | You can easily integrate [BRAT](https://brat.nlplab.org/) into your project by using EDS-NLP's BRAT reader and writer.
16 |
17 | BRAT annotations are in the [standoff format](https://brat.nlplab.org/standoff.html). Consider the following document:
18 |
19 | ```{ title="doc.txt" }
20 | Le patient est admis pour une pneumopathie au coronavirus.
21 | On lui prescrit du paracétamol.
22 | ```
23 |
24 | Brat annotations are stored in a separate file formatted as follows:
25 |
26 | ```{ title="doc.ann" }
27 | T1 Patient 4 11 patient
28 | T2 Disease 31 58 pneumopathie au coronavirus
29 | T3 Drug 79 90 paracétamol
30 | ```
31 |
32 | ## Reading Standoff files {: #edsnlp.data.standoff.read_standoff }
33 |
34 | ::: edsnlp.data.standoff.read_standoff
35 | options:
36 | heading_level: 3
37 | show_source: false
38 | show_toc: false
39 | show_bases: false
40 |
41 | ## Writing Standoff files {: #edsnlp.data.standoff.write_standoff }
42 |
43 | ::: edsnlp.data.standoff.write_standoff
44 | options:
45 | heading_level: 3
46 | show_source: false
47 | show_toc: false
48 | show_bases: false
49 |
--------------------------------------------------------------------------------
/docs/pipes/core/endlines.md:
--------------------------------------------------------------------------------
1 | # Endlines {: #edsnlp.pipes.core.endlines.factory.create_component }
2 |
3 | ::: edsnlp.pipes.core.endlines.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/core/index.md:
--------------------------------------------------------------------------------
1 | # Core Components
2 |
3 | This section deals with "core" functionalities offered by EDS-NLP:
4 |
5 | - Generic matchers against regular expressions and list of terms
6 | - Text cleaning
7 | - Sentence boundaries detection
8 |
9 | ## Available components
10 |
11 |
12 |
13 | | Component | Description |
14 | |-------------------------|-------------------------------------------------|
15 | | `eds.normalizer` | Non-destructive input text normalisation |
16 | | `eds.sentences` | Better sentence boundary detection |
17 | | `eds.matcher` | A simple yet powerful entity extractor |
18 | | `eds.terminology` | A simple yet powerful terminology matcher |
19 | | `eds.contextual_matcher` | A conditional entity extractor |
20 | | `eds.endlines` | An unsupervised model to classify each end line |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/docs/pipes/core/matcher.md:
--------------------------------------------------------------------------------
1 | # Matcher {: #edsnlp.pipes.core.matcher.factory.create_component }
2 |
3 | ::: edsnlp.pipes.core.matcher.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/core/sentences.md:
--------------------------------------------------------------------------------
1 | # Sentences {: #edsnlp.pipes.core.sentences.factory.create_component }
2 |
3 | ::: edsnlp.pipes.core.sentences.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/core/terminology.md:
--------------------------------------------------------------------------------
1 | # Terminology {: #edsnlp.pipes.core.terminology.factory.create_component }
2 |
3 | ::: edsnlp.pipes.core.terminology.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/misc/consultation-dates.md:
--------------------------------------------------------------------------------
1 | # Consultation dates {: #edsnlp.pipes.misc.consultation_dates.factory.create_component }
2 |
3 | ::: edsnlp.pipes.misc.consultation_dates.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/misc/dates.md:
--------------------------------------------------------------------------------
1 | # Dates {: #edsnlp.pipes.misc.dates.factory.create_component }
2 |
3 | ::: edsnlp.pipes.misc.dates.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/misc/index.md:
--------------------------------------------------------------------------------
1 | # Miscellaneous
2 |
3 | This section regroups components that extract information that can be used by other components, but have little medical value in itself.
4 |
5 | For instance, the date detection and normalisation pipeline falls in this category.
6 |
7 | ## Available components
8 |
9 |
10 |
11 | | Component | Description |
12 | |--------------------------|---------------------------------------------|
13 | | `eds.dates` | Date extraction and normalisation |
14 | | `eds.consultation_dates` | Identify consultation dates |
15 | | `eds.quantities` | Quantity extraction and normalisation |
16 | | `eds.sections` | Section detection |
17 | | `eds.reason` | Rule-based hospitalisation reason detection |
18 | | `eds.tables` | Tables detection |
19 | | `eds.split` | Doc splitting |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/docs/pipes/misc/quantities.md:
--------------------------------------------------------------------------------
1 | # Quantities {: #edsnlp.pipes.misc.quantities.factory.create_component }
2 |
3 | ::: edsnlp.pipes.misc.quantities.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/misc/reason.md:
--------------------------------------------------------------------------------
1 | # Reasons {: #edsnlp.pipes.misc.reason.factory.create_component }
2 |
3 | ::: edsnlp.pipes.misc.reason.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/misc/sections.md:
--------------------------------------------------------------------------------
1 | # Sections {: #edsnlp.pipes.misc.sections.factory.create_component }
2 |
3 | ::: edsnlp.pipes.misc.sections.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/misc/split.md:
--------------------------------------------------------------------------------
1 | # Split {: #edsnlp.pipes.misc.split.split.Split }
2 |
3 | ::: edsnlp.pipes.misc.split.split.Split
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 | skip_parameters: ["nlp", "name"]
10 |
--------------------------------------------------------------------------------
/docs/pipes/misc/tables.md:
--------------------------------------------------------------------------------
1 | # Tables {: #edsnlp.pipes.misc.tables.factory.create_component }
2 |
3 | ::: edsnlp.pipes.misc.tables.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/adicap.md:
--------------------------------------------------------------------------------
1 | # Adicap {: #edsnlp.pipes.ner.adicap.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.adicap.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/behaviors/alcohol.md:
--------------------------------------------------------------------------------
1 | # Alcohol consumption {: #edsnlp.pipes.ner.behaviors.alcohol.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.behaviors.alcohol.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 |
--------------------------------------------------------------------------------
/docs/pipes/ner/behaviors/tobacco.md:
--------------------------------------------------------------------------------
1 | # Tobacco consumption {: #edsnlp.pipes.ner.behaviors.tobacco.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.behaviors.tobacco.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 |
--------------------------------------------------------------------------------
/docs/pipes/ner/cim10.md:
--------------------------------------------------------------------------------
1 | # CIM10 {: #edsnlp.pipes.ner.cim10.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.cim10.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/covid.md:
--------------------------------------------------------------------------------
1 | # COVID {: #edsnlp.pipes.ner.covid.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.covid.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/aids.md:
--------------------------------------------------------------------------------
1 | # AIDS {: #edsnlp.pipes.ner.disorders.aids.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.aids.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/cerebrovascular-accident.md:
--------------------------------------------------------------------------------
1 | # Cerebrovascular accident {: #edsnlp.pipes.ner.disorders.cerebrovascular_accident.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.cerebrovascular_accident.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/ckd.md:
--------------------------------------------------------------------------------
1 | # CKD {: #edsnlp.pipes.ner.disorders.ckd.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.ckd.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/congestive-heart-failure.md:
--------------------------------------------------------------------------------
1 | # Congestive heart failure {: #edsnlp.pipes.ner.disorders.congestive_heart_failure.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.congestive_heart_failure.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/connective-tissue-disease.md:
--------------------------------------------------------------------------------
1 | # Connective tissue disease {: #edsnlp.pipes.ner.disorders.connective_tissue_disease.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.connective_tissue_disease.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/copd.md:
--------------------------------------------------------------------------------
1 | # COPD {: #edsnlp.pipes.ner.disorders.copd.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.copd.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/dementia.md:
--------------------------------------------------------------------------------
1 | # Dementia {: #edsnlp.pipes.ner.disorders.dementia.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.dementia.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/diabetes.md:
--------------------------------------------------------------------------------
1 | # Diabetes {: #edsnlp.pipes.ner.disorders.diabetes.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.diabetes.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/hemiplegia.md:
--------------------------------------------------------------------------------
1 | # Hemiplegia {: #edsnlp.pipes.ner.disorders.hemiplegia.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.hemiplegia.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/leukemia.md:
--------------------------------------------------------------------------------
1 | # Leukemia {: #edsnlp.pipes.ner.disorders.leukemia.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.leukemia.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/liver-disease.md:
--------------------------------------------------------------------------------
1 | # Liver disease {: #edsnlp.pipes.ner.disorders.liver_disease.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.liver_disease.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/lymphoma.md:
--------------------------------------------------------------------------------
1 | # Lymphoma {: #edsnlp.pipes.ner.disorders.lymphoma.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.lymphoma.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/myocardial-infarction.md:
--------------------------------------------------------------------------------
1 | # Myocardial infarction {: #edsnlp.pipes.ner.disorders.myocardial_infarction.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.myocardial_infarction.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/peptic-ulcer-disease.md:
--------------------------------------------------------------------------------
1 | # Peptic ulcer disease {: #edsnlp.pipes.ner.disorders.peptic_ulcer_disease.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.peptic_ulcer_disease.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/peripheral-vascular-disease.md:
--------------------------------------------------------------------------------
1 | # Peripheral vascular disease {: #edsnlp.pipes.ner.disorders.peripheral_vascular_disease.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.peripheral_vascular_disease.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/disorders/solid-tumor.md:
--------------------------------------------------------------------------------
1 | # Solid tumor {: #edsnlp.pipes.ner.disorders.solid_tumor.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.disorders.solid_tumor.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/drugs.md:
--------------------------------------------------------------------------------
1 | # Drugs {: #edsnlp.pipes.ner.drugs.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.drugs.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/scores/charlson.md:
--------------------------------------------------------------------------------
1 | # Charlson {: #edsnlp.pipes.ner.scores.charlson.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.scores.charlson.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/scores/elston-ellis.md:
--------------------------------------------------------------------------------
1 | # Elston-Ellis {: #edsnlp.pipes.ner.scores.elston_ellis.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.scores.elston_ellis.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/scores/emergency-ccmu.md:
--------------------------------------------------------------------------------
1 | # Emergency CCMU {: #edsnlp.pipes.ner.scores.emergency.ccmu.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.scores.emergency.ccmu.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/scores/emergency-gemsa.md:
--------------------------------------------------------------------------------
1 | # Emergency GEMSA {: #edsnlp.pipes.ner.scores.emergency.gemsa.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.scores.emergency.gemsa.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/scores/emergency-priority.md:
--------------------------------------------------------------------------------
1 | # Emergency Priority {: #edsnlp.pipes.ner.scores.emergency.priority.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.scores.emergency.priority.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/scores/sofa.md:
--------------------------------------------------------------------------------
1 | # SOFA {: #edsnlp.pipes.ner.scores.sofa.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.scores.sofa.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/suicide_attempt.md:
--------------------------------------------------------------------------------
1 | # Suicide Attempt {: #edsnlp.pipes.ner.suicide_attempt.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.suicide_attempt.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: true
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/tnm.md:
--------------------------------------------------------------------------------
1 | # TNM {: #edsnlp.pipes.ner.tnm.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.tnm.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/ner/umls.md:
--------------------------------------------------------------------------------
1 | # UMLS {: #edsnlp.pipes.ner.umls.factory.create_component }
2 |
3 | ::: edsnlp.pipes.ner.umls.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/qualifiers/family.md:
--------------------------------------------------------------------------------
1 | # Family Context {: #edsnlp.pipes.qualifiers.family.factory.create_component }
2 |
3 | ::: edsnlp.pipes.qualifiers.family.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/qualifiers/history.md:
--------------------------------------------------------------------------------
1 | # Medical History {: #edsnlp.pipes.qualifiers.history.factory.create_component }
2 |
3 | ::: edsnlp.pipes.qualifiers.history.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/qualifiers/hypothesis.md:
--------------------------------------------------------------------------------
1 | # Hypothesis {: #edsnlp.pipes.qualifiers.hypothesis.factory.create_component }
2 |
3 | ::: edsnlp.pipes.qualifiers.hypothesis.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/qualifiers/negation.md:
--------------------------------------------------------------------------------
1 | # Negation {: #edsnlp.pipes.qualifiers.negation.factory.create_component }
2 |
3 | ::: edsnlp.pipes.qualifiers.negation.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/qualifiers/reported-speech.md:
--------------------------------------------------------------------------------
1 | # Reported Speech {: #edsnlp.pipes.qualifiers.reported_speech.factory.create_component }
2 |
3 | ::: edsnlp.pipes.qualifiers.reported_speech.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/trainable/biaffine-dependency-parser.md:
--------------------------------------------------------------------------------
1 | # Trainable Biaffine Dependency Parser {: #edsnlp.pipes.trainable.biaffine_dep_parser.factory.create_component }
2 |
3 | ::: edsnlp.pipes.trainable.biaffine_dep_parser.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/trainable/embeddings/span_pooler.md:
--------------------------------------------------------------------------------
1 | # Span Pooler {: #edsnlp.pipes.trainable.embeddings.span_pooler.factory.create_component }
2 |
3 | ::: edsnlp.pipes.trainable.embeddings.span_pooler.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/trainable/embeddings/text_cnn.md:
--------------------------------------------------------------------------------
1 | # Text CNN {: #edsnlp.pipes.trainable.embeddings.text_cnn.factory.create_component }
2 |
3 | ::: edsnlp.pipes.trainable.embeddings.text_cnn.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/trainable/embeddings/transformer.md:
--------------------------------------------------------------------------------
1 | # Transformer {: #edsnlp.pipes.trainable.embeddings.transformer.factory.create_component }
2 |
3 | ::: edsnlp.pipes.trainable.embeddings.transformer.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/trainable/extractive-qa.md:
--------------------------------------------------------------------------------
1 | # Extractive Question Answering {: #edsnlp.pipes.trainable.extractive_qa.factory.create_component }
2 |
3 | ::: edsnlp.pipes.trainable.extractive_qa.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/trainable/ner.md:
--------------------------------------------------------------------------------
1 | # Trainable NER {: #edsnlp.pipes.trainable.ner_crf.factory.create_component }
2 |
3 | ::: edsnlp.pipes.trainable.ner_crf.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/trainable/span-classifier.md:
--------------------------------------------------------------------------------
1 | # Trainable Span Classifier {: #edsnlp.pipes.trainable.span_classifier.factory.create_component }
2 |
3 | ::: edsnlp.pipes.trainable.span_classifier.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/pipes/trainable/span-linker.md:
--------------------------------------------------------------------------------
1 | # Trainable Span Linker {: #edsnlp.pipes.trainable.span_linker.factory.create_component }
2 |
3 | ::: edsnlp.pipes.trainable.span_linker.factory.create_component
4 | options:
5 | heading_level: 2
6 | show_bases: false
7 | show_source: false
8 | only_class_level: true
9 |
--------------------------------------------------------------------------------
/docs/scripts/autorefs/LICENSE:
--------------------------------------------------------------------------------
1 | ISC License
2 |
3 | Copyright (c) 2019, Oleh Prypin
4 | Copyright (c) 2019, Timothée Mazzucotelli
5 |
6 | Permission to use, copy, modify, and/or distribute this software for any
7 | purpose with or without fee is hereby granted, provided that the above
8 | copyright notice and this permission notice appear in all copies.
9 |
10 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 |
--------------------------------------------------------------------------------
/docs/utilities/connectors/brat.md:
--------------------------------------------------------------------------------
1 | # BRAT Connector
2 |
3 | BRAT is currently the only supported in-text annotation editor at EDS. BRAT annotations are in the [standoff format](https://brat.nlplab.org/standoff.html). Consider the following document:
4 |
5 | ```
6 | Le patient est admis pour une pneumopathie au coronavirus.
7 | On lui prescrit du paracétamol.
8 | ```
9 |
10 | It could be annotated as follows :
11 |
12 | ```
13 | T1 Patient 4 11 patient
14 | T2 Disease 31 58 pneumopathie au coronavirus
15 | T3 Drug 79 90 paracétamol
16 | ```
17 |
18 | The point of the BRAT connector is to go from the standoff annotation format to an annotated spaCy document :
19 |
20 | ```{ .python .no-check }
21 | import edsnlp
22 | from edsnlp.connectors.brat import BratConnector
23 |
24 | # Instantiate the connector
25 | brat = BratConnector("path/to/brat")
26 |
27 | # Instantiate the spacy pipeline
28 | nlp = edsnlp.blank("eds")
29 |
30 | # Convert all BRAT files to a list of documents
31 | docs = brat.brat2docs(nlp)
32 | doc = docs[0]
33 |
34 | doc.ents
35 | # Out: [patient, pneumopathie au coronavirus, paracétamol]
36 |
37 | doc.ents[0].label_
38 | # Out: Patient
39 | ```
40 |
41 | The connector can also go the other way around, enabling pre-annotations and an ersatz of active learning.
42 |
--------------------------------------------------------------------------------
/docs/utilities/connectors/labeltool.md:
--------------------------------------------------------------------------------
1 | # LabelTool Connector
2 |
3 | LabelTool is an in-house module enabling rapid annotation of pre-extracted entities.
4 |
5 | We provide a ready-to-use function that converts a list of annotated spaCy documents into a `pandas` DataFrame that is readable to LabelTool.
6 |
7 | ```python
8 | import edsnlp, edsnlp.pipes as eds
9 |
10 | from edsnlp.connectors.labeltool import docs2labeltool
11 |
12 | corpus = [
13 | "Ceci est un document médical.",
14 | "Le patient n'est pas malade.",
15 | ]
16 |
17 | # Instantiate the spacy pipeline
18 | nlp = edsnlp.blank("fr")
19 | nlp.add_pipe(eds.sentences())
20 | nlp.add_pipe(eds.matcher(terms=dict(medical="médical", malade="malade")))
21 | nlp.add_pipe(eds.negation())
22 |
23 | # Convert all BRAT files to a list of documents
24 | docs = nlp.pipe(corpus)
25 |
26 | df = docs2labeltool(docs, extensions=["negation"])
27 | ```
28 |
29 | The results:
30 |
31 | | note_id | note_text | start | end | label | lexical_variant | negation |
32 | | ------- | ----------------------------- | ----- | --- | ------- | --------------- | -------- |
33 | | 0 | Ceci est un document médical. | 21 | 28 | medical | médical | False |
34 | | 1 | Le patient n'est pas malade. | 21 | 27 | malade | malade | True |
35 |
--------------------------------------------------------------------------------
/docs/utilities/connectors/overview.md:
--------------------------------------------------------------------------------
1 | # Overview of connectors
2 |
3 | EDS-NLP provides a series of connectors apt to convert back and forth from different formats into spaCy representation.
4 |
5 | We provide the following connectors:
6 |
7 | - [BRAT](./brat.md)
8 | - [OMOP](./omop.md)
9 |
10 |
--------------------------------------------------------------------------------
/docs/utilities/evaluation.md:
--------------------------------------------------------------------------------
1 | # Pipeline evaluation
2 |
--------------------------------------------------------------------------------
/docs/utilities/index.md:
--------------------------------------------------------------------------------
1 | # Utilities
2 |
3 | EDS-NLP provides a few utilities to deploy pipelines, process RegExps, etc.
4 |
--------------------------------------------------------------------------------
/docs/utilities/regex.md:
--------------------------------------------------------------------------------
1 | # Work with RegExp
2 |
--------------------------------------------------------------------------------
/docs/utilities/tests/blocs.md:
--------------------------------------------------------------------------------
1 | # Testing Code Blocs
2 |
3 | We created a utility that scans through the documentation, extracts code blocs and executes them to check that everything is indeed functional.
4 |
5 | There is more! Whenever the utility comes across an example (denoted by `# Out: `, see example below), an `assert` statement is dynamically added to the snippet to check that the output matches.
6 |
7 | For instance:
8 |
9 | ```python
10 | a = 1
11 |
12 | a
13 | # Out: 1
14 | ```
15 |
16 | Is transformed into:
17 |
18 | ```python
19 | a = 1
20 |
21 | v = a
22 | assert repr(v) == "1"
23 | ```
24 |
25 | We can disable code checking for a specific code bloc by adding a `.no-check` class to the code bloc:
26 |
27 | ````md
28 | ```python { .no-check }
29 | test = undeclared_function(42)
30 | ```
31 | ````
32 |
33 | Visit the source code of [test_docs.py](https://github.com/aphp/edsnlp/blob/master/tests/test_docs.py) for more information.
34 |
--------------------------------------------------------------------------------
/docs/utilities/tests/examples.md:
--------------------------------------------------------------------------------
1 | # Creating Examples
2 |
3 | Testing a NER/qualifier pipeline can be a hassle. We created a utility to simplify that process.
4 |
5 | Using the [`parse_example`][edsnlp.utils.examples.parse_example] method, you can define a full example in a human-readable way:
6 |
7 | ```python
8 | from edsnlp.utils.examples import parse_example
9 |
10 | example = "Absence d'image osseuse d'allure évolutive."
11 |
12 | text, entities = parse_example(example)
13 |
14 | text
15 | # Out: "Absence d'image osseuse d'allure évolutive."
16 |
17 | entities
18 | # Out: [Entity(start_char=10, end_char=42, modifiers=[Modifier(key='negated', value=True)])]
19 | ```
20 |
21 | Entities are defined using the `` tag. You can encode complexe information by adding keys into the tag (see example above). The `parse_example` method strips the text of the tags, and outputs a list of `Entity` objects that contain:
22 |
23 | - the character indices of the entity ;
24 | - custom user-defined "modifiers".
25 |
26 | See the [dedicated reference page][edsnlp.utils.examples.parse_example] for more information.
27 |
--------------------------------------------------------------------------------
/docs/utilities/tests/index.md:
--------------------------------------------------------------------------------
1 | # Tests Utilities
2 |
3 | We provide a few testing utilities that simplify the process of:
4 |
5 | - creating testing examples for NLP pipelines;
6 | - testing documentation code blocs.
7 |
--------------------------------------------------------------------------------
/edsnlp/connectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .brat import BratConnector
2 | from .omop import OmopConnector
3 |
--------------------------------------------------------------------------------
/edsnlp/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .registries import registry
2 | from .pipeline import PipelineProtocol
3 |
--------------------------------------------------------------------------------
/edsnlp/data/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import TYPE_CHECKING
2 | from edsnlp.utils.lazy_module import lazify
3 |
4 | lazify()
5 |
6 | if TYPE_CHECKING:
7 | from .base import from_iterable, to_iterable
8 | from .standoff import read_standoff, write_standoff
9 | from .brat import read_brat, write_brat
10 | from .conll import read_conll
11 | from .json import read_json, write_json
12 | from .parquet import read_parquet, write_parquet
13 | from .spark import from_spark, to_spark
14 | from .pandas import from_pandas, to_pandas
15 | from .polars import from_polars, to_polars
16 | from .converters import get_dict2doc_converter, get_doc2dict_converter
17 |
--------------------------------------------------------------------------------
/edsnlp/data/brat.py:
--------------------------------------------------------------------------------
1 | from edsnlp.data.standoff import (
2 | dump_standoff_file,
3 | parse_standoff_file,
4 | read_standoff,
5 | write_standoff,
6 | )
7 |
8 | load_from_brat = parse_standoff_file
9 | export_to_brat = dump_standoff_file
10 |
11 | read_brat = read_standoff
12 | write_brat = write_standoff
13 |
--------------------------------------------------------------------------------
/edsnlp/extensions.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from datetime import date, datetime
3 |
4 | from dateutil.parser import parse as parse_date
5 | from spacy.tokens import Doc
6 |
7 | if not Doc.has_extension("note_id"):
8 | Doc.set_extension("note_id", default=None)
9 |
10 |
11 | def set_note_datetime(doc, dt):
12 | try:
13 | if type(dt) is datetime:
14 | pass
15 | elif isinstance(dt, str):
16 | dt = parse_date(dt)
17 | elif isinstance(dt, (int, float)):
18 | dt = datetime.fromtimestamp(dt)
19 | elif isinstance(dt, date):
20 | dt = datetime(dt.year, dt.month, dt.day)
21 | elif dt is None:
22 | pass
23 | key = doc._._get_key("note_datetime")
24 | doc.doc.user_data[key] = dt
25 | return
26 | except Exception:
27 | pass
28 |
29 | warnings.warn(f"Cannot cast {dt} as a note datetime", UserWarning)
30 |
31 |
32 | def get_note_datetime(doc):
33 | key = doc._._get_key("note_datetime")
34 | return doc.user_data.get(key, None)
35 |
36 |
37 | if not Doc.has_extension("note_datetime"):
38 | Doc.set_extension(
39 | "note_datetime",
40 | getter=get_note_datetime,
41 | setter=set_note_datetime,
42 | )
43 |
44 | if not Doc.has_extension("birth_datetime"):
45 | Doc.set_extension("birth_datetime", default=None)
46 |
--------------------------------------------------------------------------------
/edsnlp/matchers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/matchers/__init__.py
--------------------------------------------------------------------------------
/edsnlp/matchers/phrase.pxd:
--------------------------------------------------------------------------------
1 | from libcpp.vector cimport vector
2 | from spacy.matcher.phrasematcher cimport PhraseMatcher
3 | from spacy.structs cimport SpanC
4 | from spacy.tokens.doc cimport Doc
5 | from spacy.tokens.span cimport Span
6 | from spacy.typedefs cimport attr_t
7 |
8 |
9 | cdef class EDSPhraseMatcher(PhraseMatcher):
10 | cdef attr_t space_hash
11 | cdef attr_t excluded_hash
12 |
13 | cdef void find_matches(self, Doc doc, int start_idx, int end_idx, vector[SpanC] *matches) nogil
14 |
--------------------------------------------------------------------------------
/edsnlp/matchers/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List, Union
2 |
3 | ListOrStr = Union[List[str], str]
4 | DictOrPattern = Union[str, List[str], Dict[str, Union[str, List[str]]]]
5 | Patterns = Dict[str, DictOrPattern]
6 |
7 |
8 | def normalize_token_attr(attr):
9 | if attr.startswith("doc.") or attr.startswith("span."):
10 | return None
11 | attr = attr.replace("token.", "")
12 | lower = attr.replace("_", "").lower()
13 | return "text" if lower == "orth" else lower
14 |
15 |
16 | ATTRIBUTES = {
17 | "LOWER": "lower_",
18 | "TEXT": "text",
19 | "NORM": "norm_",
20 | "SHAPE": "shape_",
21 | }
22 |
23 | from .offset import alignment # noqa: E402, F401
24 | from .text import get_text # noqa: E402, F401
25 |
--------------------------------------------------------------------------------
/edsnlp/matchers/utils/offset.py:
--------------------------------------------------------------------------------
1 | from edsnlp.utils.doc_to_text import get_char_offsets as alignment # noqa: E402, F401
2 |
--------------------------------------------------------------------------------
/edsnlp/matchers/utils/text.py:
--------------------------------------------------------------------------------
1 | from edsnlp.utils.doc_to_text import get_text # noqa: E402, F401
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/core/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/core/contextual_matcher/__init__.py:
--------------------------------------------------------------------------------
1 | from .contextual_matcher import ContextualMatcher
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/contextual_matcher/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 | from edsnlp.pipes.core.contextual_matcher import ContextualMatcher
3 |
4 | DEFAULT_CONFIG = dict(
5 | assign_as_span=False,
6 | alignment_mode="expand",
7 | attr="NORM",
8 | regex_flags=0,
9 | ignore_excluded=False,
10 | ignore_space_tokens=False,
11 | include_assigned=False,
12 | label_name=None,
13 | label=None,
14 | span_setter={"ents": True},
15 | )
16 |
17 | create_component = registry.factory.register(
18 | "eds.contextual_matcher",
19 | deprecated=["eds.contextual-matcher", "contextual-matcher"],
20 | )(ContextualMatcher)
21 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/endlines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/core/endlines/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/core/endlines/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .endlines import EndLinesMatcher
4 |
5 | DEFAULT_CONFIG = dict(
6 | model_path=None,
7 | )
8 |
9 | create_component = registry.factory.register(
10 | "eds.endlines",
11 | assigns=["doc.ents", "doc.spans"],
12 | deprecated=["spaces"],
13 | )(EndLinesMatcher)
14 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/endlines/functional.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 |
7 | def get_dir_path(file):
8 | path_file = os.path.dirname(os.path.realpath(file))
9 | return path_file
10 |
11 |
12 | def build_path(file, relative_path):
13 | """
14 | Function to build an absolut path.
15 |
16 | Parameters
17 | ----------
18 | file: main file from where we are calling. It could be __file__
19 | relative_path: str,
20 | relative path from the main file to the desired output
21 |
22 | Returns
23 | -------
24 | path: absolute path
25 | """
26 | dir_path = get_dir_path(file)
27 | path = os.path.abspath(os.path.join(dir_path, relative_path))
28 | return path
29 |
30 |
31 | def _convert_series_to_array(s: pd.Series) -> np.ndarray:
32 | """Converts pandas series of n elements to an array of shape (n,1).
33 |
34 | Parameters
35 | ----------
36 | s : pd.Series
37 |
38 | Returns
39 | -------
40 | np.ndarray
41 | """
42 | X = s.to_numpy().reshape(-1, 1).astype("O") # .astype(np.int64)
43 | return X
44 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/matcher/__init__.py:
--------------------------------------------------------------------------------
1 | from .matcher import GenericMatcher
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/matcher/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .matcher import GenericMatcher
4 |
5 | DEFAULT_CONFIG = dict(
6 | terms=None,
7 | regex=None,
8 | attr="TEXT",
9 | ignore_excluded=False,
10 | ignore_space_tokens=False,
11 | term_matcher="exact",
12 | term_matcher_config={},
13 | span_setter={"ents": True},
14 | )
15 |
16 | create_component = registry.factory.register(
17 | "eds.matcher",
18 | assigns=["doc.ents", "doc.spans"],
19 | deprecated=["matcher"],
20 | )(GenericMatcher)
21 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/__init__.py:
--------------------------------------------------------------------------------
1 | from spacy.tokens import Token
2 |
3 | if not Token.has_extension("excluded"):
4 | Token.set_extension("excluded", default=False)
5 |
6 |
7 | def excluded_or_space_getter(t):
8 | return t.is_space or t.tag_ == "EXCLUDED"
9 |
10 |
11 | if not Token.has_extension("excluded_or_space"):
12 | Token.set_extension(
13 | "excluded_or_space",
14 | getter=excluded_or_space_getter,
15 | )
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/accents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/core/normalizer/accents/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/accents/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from . import patterns
4 | from .accents import AccentsConverter
5 |
6 | DEFAULT_CONFIG = dict(
7 | accents=patterns.accents,
8 | )
9 |
10 | create_component = registry.factory.register(
11 | "eds.accents",
12 | assigns=["token.norm"],
13 | deprecated=["accents"],
14 | )(AccentsConverter)
15 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/accents/patterns.py:
--------------------------------------------------------------------------------
1 | from typing import List, Tuple
2 |
3 | # Accentuated characters
4 | accents: List[Tuple[str, str]] = [
5 | ("ç", "c"),
6 | ("àáâä", "a"),
7 | ("èéêë", "e"),
8 | ("ìíîï", "i"),
9 | ("òóôö", "o"),
10 | ("ùúûü", "u"),
11 | ]
12 | # Add uppercase
13 | accents += [(k.upper(), v.upper()) for k, v in accents]
14 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/pollution/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/core/normalizer/pollution/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/pollution/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .patterns import default_enabled
4 | from .pollution import PollutionTagger
5 |
6 | DEFAULT_CONFIG = dict(
7 | pollution=default_enabled,
8 | )
9 |
10 | create_component = registry.factory.register(
11 | "eds.pollution",
12 | assigns=["doc.spans"],
13 | deprecated=["pollution"],
14 | )(PollutionTagger)
15 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/quotes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/core/normalizer/quotes/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/quotes/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .patterns import quotes_and_apostrophes
4 | from .quotes import QuotesConverter
5 |
6 | DEFAULT_CONFIG = dict(
7 | quotes=quotes_and_apostrophes,
8 | )
9 |
10 | create_component = registry.factory.register(
11 | "eds.quotes",
12 | assigns=["token.norm"],
13 | deprecated=["quotes"],
14 | )(QuotesConverter)
15 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/quotes/patterns.py:
--------------------------------------------------------------------------------
1 | from typing import List, Tuple
2 |
3 | # Source : https://util.unicode.org/UnicodeJsps/character.jsp?a=02EE
4 | quotes: List[str] = [
5 | """,
6 | "〃",
7 | "ײ",
8 | "᳓",
9 | "″",
10 | "״",
11 | "‶",
12 | "˶",
13 | "ʺ",
14 | "“",
15 | "”",
16 | "˝",
17 | "‟",
18 | ]
19 |
20 | # Source : https://util.unicode.org/UnicodeJsps/character.jsp?a=0027
21 | apostrophes: List[str] = [
22 | "`",
23 | "΄",
24 | "'",
25 | "ˈ",
26 | "ˊ",
27 | "ᑊ",
28 | "ˋ",
29 | "ꞌ",
30 | "ᛌ",
31 | "𖽒",
32 | "𖽑",
33 | "‘",
34 | "’",
35 | "י",
36 | "՚",
37 | "‛",
38 | "՝",
39 | "`",
40 | "`",
41 | "′",
42 | "׳",
43 | "´",
44 | "ʹ",
45 | "˴",
46 | "ߴ",
47 | "‵",
48 | "ߵ",
49 | "ʹ",
50 | "ʻ",
51 | "ʼ",
52 | "´",
53 | "᾽",
54 | "ʽ",
55 | "῾",
56 | "ʾ",
57 | "᾿",
58 | ]
59 |
60 | quotes_and_apostrophes: List[Tuple[str, str]] = [
61 | ("".join(quotes), '"'),
62 | ("".join(apostrophes), "'"),
63 | ]
64 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/remove_lowercase/__init__.py:
--------------------------------------------------------------------------------
1 | from .factory import create_component
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/remove_lowercase/factory.py:
--------------------------------------------------------------------------------
1 | from spacy.tokens import Doc
2 |
3 | from edsnlp.core import PipelineProtocol, registry
4 |
5 |
6 | def remove_lowercase(doc: Doc):
7 | """
8 | Add case on the `NORM` custom attribute. Should always be applied first.
9 |
10 | Parameters
11 | ----------
12 | doc : Doc
13 | The spaCy `Doc` object.
14 |
15 | Returns
16 | -------
17 | Doc
18 | The document, with case put back in `NORM`.
19 | """
20 |
21 | for token in doc:
22 | token.norm_ = token.text
23 |
24 | return doc
25 |
26 |
27 | @registry.factory.register(
28 | "eds.remove_lowercase",
29 | assigns=["token.norm"],
30 | deprecated=[
31 | "remove-lowercase",
32 | "eds.remove-lowercase",
33 | ],
34 | )
35 | def create_component(
36 | nlp: PipelineProtocol,
37 | name: str,
38 | ):
39 | """
40 | Add case on the `NORM` custom attribute. Should always be applied first.
41 |
42 | Parameters
43 | ----------
44 | nlp : PipelineProtocol
45 | The pipeline object.
46 | name : str
47 | The name of the component.
48 | """
49 | return remove_lowercase # pragma: no cover
50 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/spaces/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/core/normalizer/spaces/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/spaces/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .spaces import SpacesTagger
4 |
5 | DEFAULT_CONFIG = dict(newline=True)
6 |
7 | create_component = registry.factory.register(
8 | "eds.spaces",
9 | assigns=["token.tag"],
10 | deprecated=["spaces"],
11 | )(SpacesTagger)
12 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/normalizer/spaces/spaces.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from spacy.tokens import Doc
4 |
5 | from edsnlp.core import PipelineProtocol
6 | from edsnlp.pipes.base import BaseComponent
7 |
8 |
9 | class SpacesTagger(BaseComponent):
10 | """
11 | We assign "SPACE" to `token.tag` to be used by optimized components
12 | such as the EDSPhraseMatcher
13 |
14 | Parameters
15 | ----------
16 | nlp : Optional[PipelineProtocol]
17 | The pipeline object.
18 | name : Optional[str]
19 | The component name.
20 | newline : bool
21 | Whether to update the newline tokens too
22 | """
23 |
24 | def __init__(
25 | self,
26 | nlp: Optional[PipelineProtocol] = None,
27 | name: Optional[str] = "spaces",
28 | *,
29 | newline: bool = True,
30 | ):
31 | super().__init__(nlp, name)
32 | self.newline = newline
33 |
34 | def __call__(self, doc: Doc) -> Doc:
35 | """
36 | Apply the component to the doc.
37 |
38 | Parameters
39 | ----------
40 | doc: Doc
41 |
42 | Returns
43 | -------
44 | doc: Doc
45 | """
46 | space_hash = doc.vocab.strings["SPACE"]
47 | for token in doc:
48 | if len(token.text.strip()) == 0:
49 | token.tag = space_hash
50 |
51 | return doc
52 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/sentences/__init__.py:
--------------------------------------------------------------------------------
1 | from .sentences import SentenceSegmenter
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/sentences/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .sentences import SentenceSegmenter
4 |
5 | create_component = registry.factory.register(
6 | "eds.sentences",
7 | assigns=["token.is_sent_start"],
8 | deprecated=["sentences"],
9 | )(SentenceSegmenter)
10 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/sentences/fast_sentences.pxd:
--------------------------------------------------------------------------------
1 | from libcpp cimport bool
2 | from libcpp.set cimport set
3 | from spacy.tokens.doc cimport Doc
4 | from spacy.typedefs cimport attr_t
5 |
6 | cdef class SentenceSegmenter(object):
7 | cdef str name
8 |
9 | cdef class FastSentenceSegmenter(object):
10 | cdef bool ignore_excluded
11 | cdef attr_t newline_hash
12 | cdef attr_t excluded_hash
13 | cdef attr_t endline_hash
14 | cdef set[attr_t] punct_chars_hash
15 | cdef set[attr_t] capitalized_shapes_hash
16 | cdef bool check_capitalized
17 | cdef int min_newline_count
18 |
19 | cdef void process(self, Doc doc) nogil
20 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/sentences/terms.py:
--------------------------------------------------------------------------------
1 | # Default punctuation defined for the sentencizer : https://spacy.io/api/sentencizer
2 | punctuation = {
3 | "!",
4 | ".",
5 | "?",
6 | "܂",
7 | "‼",
8 | "‽",
9 | "⁇",
10 | "⁈",
11 | "⁉",
12 | "﹖",
13 | "﹗",
14 | "!",
15 | ".",
16 | "?",
17 | }
18 |
--------------------------------------------------------------------------------
/edsnlp/pipes/core/terminology/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/core/terminology/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/core/terminology/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .terminology import TerminologyMatcher
4 |
5 | DEFAULT_CONFIG = dict(
6 | terms=None,
7 | regex=None,
8 | attr="TEXT",
9 | ignore_excluded=False,
10 | ignore_space_tokens=False,
11 | term_matcher="exact",
12 | term_matcher_config=None,
13 | span_setter={"ents": True},
14 | )
15 |
16 | create_component = registry.factory.register(
17 | "eds.terminology",
18 | assigns=["doc.ents", "doc.spans"],
19 | deprecated=["terminology"],
20 | )(TerminologyMatcher)
21 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/misc/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/consultation_dates/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/misc/consultation_dates/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/consultation_dates/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .consultation_dates import ConsultationDatesMatcher
4 |
5 | DEFAULT_CONFIG = dict(
6 | consultation_mention=True,
7 | town_mention=False,
8 | document_date_mention=False,
9 | attr="NORM",
10 | ignore_excluded=False,
11 | ignore_spacy_tokens=False,
12 | label="consultation_date",
13 | span_setter={"ents": True, "consultation_dates": True},
14 | )
15 |
16 | create_component = registry.factory.register(
17 | "eds.consultation_dates",
18 | assigns=["doc.spans", "doc.ents"],
19 | deprecated=["consultation_dates"],
20 | )(ConsultationDatesMatcher)
21 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/consultation_dates/patterns.py:
--------------------------------------------------------------------------------
1 | consultation_mention = [
2 | "rendez-vous pris",
3 | r"consultation",
4 | r"consultation.{1,8}examen",
5 | r"\bcs\b",
6 | "examen clinique",
7 | r"de compte rendu",
8 | r"date de l'examen",
9 | r"examen realise le",
10 | "date de la visite",
11 | ]
12 |
13 | town_mention = [
14 | "paris",
15 | "kremlin.bicetre",
16 | "creteil",
17 | "boulogne.billancourt",
18 | "villejuif",
19 | "clamart",
20 | "bobigny",
21 | "clichy",
22 | "ivry.sur.seine",
23 | "issy.les.moulineaux",
24 | "draveil",
25 | "limeil",
26 | "champcueil",
27 | "roche.guyon",
28 | "bondy",
29 | "colombes",
30 | "hendaye",
31 | "berck.sur.mer",
32 | "labruyere",
33 | "garches",
34 | "sevran",
35 | "hyeres",
36 | ]
37 |
38 | document_date_mention = [
39 | "imprime le",
40 | r"signe electroniquement",
41 | "signe le",
42 | "saisi le",
43 | "dicte le",
44 | "tape le",
45 | "date de reference",
46 | r"date\s*:",
47 | "dactylographie le",
48 | "date du rapport",
49 | ]
50 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/__init__.py:
--------------------------------------------------------------------------------
1 | from .dates import DatesMatcher
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .dates import DatesMatcher
4 |
5 | DEFAULT_CONFIG = dict(
6 | absolute=None,
7 | relative=None,
8 | duration=None,
9 | false_positive=None,
10 | on_ents_only=False,
11 | span_getter=None,
12 | merge_mode="intersect",
13 | detect_periods=False,
14 | detect_time=True,
15 | period_proximity_threshold=3,
16 | as_ents=False,
17 | attr="LOWER",
18 | date_label="date",
19 | duration_label="duration",
20 | period_label="period",
21 | span_setter={
22 | "dates": ["date"],
23 | "durations": ["duration"],
24 | "periods": ["period"],
25 | },
26 | )
27 |
28 | create_component = registry.factory.register(
29 | "eds.dates",
30 | assigns=["doc.spans", "doc.ents"],
31 | deprecated=["dates"],
32 | )(DatesMatcher)
33 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/patterns/__init__.py:
--------------------------------------------------------------------------------
1 | from .absolute import absolute_pattern, absolute_pattern_with_time
2 | from .current import current_pattern
3 | from .duration import duration_pattern
4 | from .false_positive import false_positive_pattern
5 | from .relative import relative_pattern
6 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/patterns/atomic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/misc/dates/patterns/atomic/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/patterns/atomic/delimiters.py:
--------------------------------------------------------------------------------
1 | from edsnlp.utils.regex import make_pattern
2 |
3 | raw_delimiters = [r"\/", r"[-−]"]
4 | delimiters = raw_delimiters + [r"\.", r"[^\S]+"]
5 |
6 | raw_delimiter_pattern = make_pattern(raw_delimiters)
7 | raw_delimiter_with_spaces_pattern = make_pattern(raw_delimiters + [r"[^\S]+"])
8 | delimiter_pattern = make_pattern(delimiters)
9 |
10 | ante_num_pattern = (
11 | f"(?depuis|depuis\s+le|il\s+y\s+a|à)",
5 | r"(?Pdans)",
6 | ]
7 |
8 | following_directions = [
9 | r"(?Pprochaine?s?|suivante?s?|plus\s+tard)",
10 | r"(?Pderni[eè]re?s?|passée?s?|pr[ée]c[ée]dente?s?|plus\s+t[ôo]t)",
11 | ]
12 |
13 | preceding_direction_pattern = make_pattern(preceding_directions, with_breaks=True)
14 | following_direction_pattern = make_pattern(following_directions, with_breaks=True)
15 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/patterns/atomic/modes.py:
--------------------------------------------------------------------------------
1 | from edsnlp.utils.regex import make_pattern
2 |
3 | modes = [
4 | r"(?Pdepuis|depuis\s+le|[àa]\s+partir\s+d[eu]|du)",
5 | r"(?Pjusqu'[àa]u?|au)",
6 | ]
7 |
8 | mode_pattern = make_pattern(modes, with_breaks=True)
9 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/patterns/atomic/months.py:
--------------------------------------------------------------------------------
1 | from edsnlp.utils.regex import make_pattern
2 |
3 | letter_months = [
4 | r"(?Pjanvier|janv\.?)",
5 | r"(?Pf[ée]vrier|f[ée]v\.?)",
6 | r"(?Pmars|mar\.?)",
7 | r"(?Pavril|avr\.?)",
8 | r"(?Pmai)",
9 | r"(?Pjuin)",
10 | r"(?Pjuillet|juill?\.?)",
11 | r"(?Pao[uû]t)",
12 | r"(?Pseptembre|sept?\.?)",
13 | r"(?Poctobre|oct\.?)",
14 | r"(?Pnovembre|nov\.?)",
15 | r"(?Pd[ée]cembre|d[ée]c\.?)",
16 | ]
17 |
18 |
19 | letter_month_pattern = make_pattern(letter_months, with_breaks=True)
20 |
21 | numeric_month_pattern = r"(?{numeric_month_pattern})"
25 | lz_numeric_month_pattern = f"(?P{lz_numeric_month_pattern})"
26 | month_pattern = f"({letter_month_pattern}|{numeric_month_pattern})"
27 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/patterns/atomic/time.py:
--------------------------------------------------------------------------------
1 | hour_pattern = r"(?0?[0-9]|1\d|2[0-3])(?!\d)"
2 | lz_hour_pattern = r"(?0[1-9]|[12]\d|3[01])(?!\d)"
3 |
4 | minute_pattern = r"(?0?[1-9]|[1-5]\d)(?!\d)"
5 | lz_minute_pattern = r"(?0[0-9]|[1-5]\d)(?!\d)"
6 |
7 | second_pattern = r"(?0?[1-9]|[1-5]\d)(?!\d)"
8 | lz_second_pattern = r"(?0[0-9]|[1-5]\d)(?!\d)"
9 |
10 | # The time pattern is always optional
11 | time_pattern = (
12 | r"(\s.{,3}"
13 | + f"{hour_pattern}[h:]({lz_minute_pattern})?"
14 | + f"((:|m|min){lz_second_pattern})?"
15 | + ")?"
16 | )
17 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/patterns/atomic/units.py:
--------------------------------------------------------------------------------
1 | from edsnlp.utils.regex import make_pattern
2 |
3 | units = [
4 | r"(?Pans?|ann[ée]es?)",
5 | r"(?Psemestres?)",
6 | r"(?Ptrimestres?)",
7 | r"(?Pmois)",
8 | r"(?Psemaines?)",
9 | r"(?Pjours?|journ[ée]es?)",
10 | r"(?Ph|heures?)",
11 | r"(?Pmin|minutes?)",
12 | r"(?Psec|secondes?|s)",
13 | ]
14 |
15 | unit_pattern = make_pattern(units, with_breaks=True)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/patterns/atomic/years.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 | from typing import List
3 |
4 | from edsnlp.utils.regex import make_pattern
5 |
6 | year_patterns: List[str] = [
7 | r"19\d\d",
8 | ] + [str(year) for year in range(2000, date.today().year + 2)]
9 |
10 | full_year_pattern = make_pattern(year_patterns, name="year")
11 | year_pattern = make_pattern(year_patterns + [r"\d\d"], name="year")
12 |
13 | full_year_pattern = r"(?cette\s+ann[ée]e)(?![-\s]l[àa])",
7 | r"(?Pce\s+jour|aujourd['\s]?hui)",
8 | r"(?Pcette\s+semaine|ces\sjours[-\s]ci)",
9 | r"(?Pce\smois([-\s]ci)?)",
10 | ]
11 |
12 | current_pattern = make_pattern(current_patterns, with_breaks=True)
13 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/patterns/duration.py:
--------------------------------------------------------------------------------
1 | from .atomic import numbers, units
2 |
3 | cue_pattern = r"(pendant|durant|pdt)"
4 |
5 | duration_pattern = [
6 | cue_pattern + r".{,3}" + numbers.number_pattern + r"\s*" + units.unit_pattern
7 | ]
8 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/dates/patterns/false_positive.py:
--------------------------------------------------------------------------------
1 | from edsnlp.utils.regex import make_pattern
2 |
3 | from .atomic.delimiters import delimiters
4 |
5 | # Pagination
6 | page_patterns = [r"\d\/\d"]
7 |
8 | # Phone numbers
9 | phone_patterns = [r"(\d\d" + delimiter + r"){3,}\d\d" for delimiter in delimiters]
10 |
11 | false_positive_pattern = make_pattern(page_patterns + phone_patterns)
12 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/quantities/__init__.py:
--------------------------------------------------------------------------------
1 | from edsnlp.pipes.misc.quantities.quantities import QuantitiesMatcher
2 | from edsnlp.pipes.misc.quantities.patterns import *
3 |
4 | from . import factory
5 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/quantities/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from . import patterns
4 | from .quantities import QuantitiesMatcher
5 |
6 | DEFAULT_CONFIG = dict(
7 | quantities=list(patterns.common_quantities.keys()), # noqa: E501
8 | units_config=patterns.units_config,
9 | number_terms=patterns.number_terms,
10 | number_regex=patterns.number_regex,
11 | stopwords=patterns.stopwords,
12 | unit_divisors=patterns.unit_divisors,
13 | ignore_excluded=True,
14 | compose_units=True,
15 | attr="NORM",
16 | extract_ranges=False,
17 | range_patterns=patterns.range_patterns,
18 | after_snippet_limit=6,
19 | before_snippet_limit=10,
20 | span_getter=None,
21 | merge_mode="intersect",
22 | as_ents=False,
23 | span_setter=None,
24 | )
25 |
26 | create_component = registry.factory.register(
27 | "eds.quantities",
28 | assigns=["doc.spans", "doc.ents"],
29 | deprecated=["eds.measures", "eds.measurements"],
30 | )(QuantitiesMatcher)
31 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/reason/__init__.py:
--------------------------------------------------------------------------------
1 | from .patterns import reasons
2 | from .reason import ReasonMatcher
3 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/reason/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .reason import ReasonMatcher
4 |
5 | DEFAULT_CONFIG = dict(
6 | reasons=None,
7 | attr="TEXT",
8 | use_sections=False,
9 | ignore_excluded=False,
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.reason",
14 | assigns=["doc.spans", "doc.ents"],
15 | deprecated=["reason"],
16 | )(ReasonMatcher)
17 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/reason/patterns.py:
--------------------------------------------------------------------------------
1 | reasons = dict(
2 | reasons=[
3 | r"(?i)motif de l.?hospitalisation : .+",
4 | r"(?i)hospitalis[ée].?.*(pour|. cause|suite [àa]).+",
5 | (
6 | r"(?i)(consulte|prise en charge"
7 | r"(?!\set\svous\sassurer\sun\straitement\sadapté)).*pour.+"
8 | ),
9 | r"(?i)motif\sd.hospitalisation\s:.+",
10 | r"(?i)au total\s?\:?\s?\n?.+",
11 | r"(?i)motif\sde\sla\sconsultation",
12 | r"(?i)motif\sd.admission",
13 | r"(?i)conclusion\smedicale",
14 | ]
15 | )
16 |
17 | sections_reason = ["motif", "conclusion"]
18 |
19 | section_exclude = [
20 | "antécédents",
21 | "antécédents familiaux",
22 | "histoire de la maladie",
23 | ]
24 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/sections/__init__.py:
--------------------------------------------------------------------------------
1 | from .sections import SectionsMatcher
2 |
3 | Sections = SectionsMatcher
4 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/sections/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .patterns import sections
4 | from .sections import SectionsMatcher
5 |
6 | DEFAULT_CONFIG = dict(
7 | sections=sections,
8 | add_patterns=True,
9 | attr="NORM",
10 | ignore_excluded=True,
11 | )
12 |
13 | create_component = registry.factory.register(
14 | "eds.sections",
15 | assigns=["doc.spans", "doc.ents"],
16 | deprecated=["sections"],
17 | )(SectionsMatcher)
18 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/split/__init__.py:
--------------------------------------------------------------------------------
1 | from .split import Split
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/tables/__init__.py:
--------------------------------------------------------------------------------
1 | from .tables import TablesMatcher
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/tables/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 | from edsnlp.pipes.misc.tables import TablesMatcher
3 |
4 | DEFAULT_CONFIG = dict(
5 | tables_pattern=None,
6 | sep_pattern=None,
7 | attr="TEXT",
8 | ignore_excluded=True,
9 | )
10 |
11 | create_component = registry.factory.register(
12 | "eds.tables",
13 | assigns=["doc.spans", "doc.ents"],
14 | deprecated=["tables"],
15 | )(TablesMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/misc/tables/patterns.py:
--------------------------------------------------------------------------------
1 | sep = ["¦", "|"]
2 | regex_template = [r"(?:{sep}?(?:[^{sep}\n]*{sep})+[^{sep}\n]*{sep}?\n){{{n},}}"]
3 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/adicap/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/adicap/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/adicap/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .adicap import AdicapMatcher
4 | from .patterns import adicap_prefix, base_code
5 |
6 | DEFAULT_CONFIG = dict(
7 | pattern=base_code,
8 | prefix=adicap_prefix,
9 | window=500,
10 | attr="TEXT",
11 | label="adicap",
12 | span_setter={"ents": True, "adicap": True},
13 | )
14 |
15 | create_component = registry.factory.register(
16 | "eds.adicap",
17 | assigns=["doc.ents", "doc.spans"],
18 | )(AdicapMatcher)
19 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/adicap/models.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import pydantic
4 |
5 |
6 | class AdicapCode(pydantic.BaseModel):
7 | code: str
8 | sampling_mode: Optional[str] = None
9 | technic: Optional[str] = None
10 | organ: Optional[str] = None
11 | pathology: Optional[str] = None
12 | pathology_type: Optional[str] = None
13 | behaviour_type: Optional[str] = None
14 |
15 | def norm(self) -> str:
16 | return self.code
17 |
18 | def __str__(self):
19 | return self.norm()
20 |
21 | if pydantic.VERSION < "2":
22 | model_dump = pydantic.BaseModel.dict
23 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/adicap/patterns.py:
--------------------------------------------------------------------------------
1 | """
2 | Source :
3 | https://esante.gouv.fr/sites/default/files/media_entity/documents/cgts_sem_adicap_fiche-detaillee.pdf
4 | """
5 |
6 |
7 | # d1_4 = r"[A-Z]{4}"
8 | d1_4 = r"[A-Z]\.?[A-Z]\.?[A-Z]{2}\.?"
9 | d5_8_v1 = r"\d{4}"
10 | d5_8_v2 = r"\d{4}|[A-Z][0-9A-Z][A-Z][0-9]"
11 | d5_8_v3 = r"[0-9A-Z][0-9][09A-Z][0-9]"
12 | d5_8_v4 = r"0[A-Z][0-9]{2}"
13 |
14 |
15 | adicap_prefix = r"(?i)(codification|adicap)"
16 | base_code = (
17 | r"("
18 | + d1_4
19 | + r"(?:"
20 | + d5_8_v1
21 | + r"|"
22 | + d5_8_v2
23 | + r"|"
24 | + d5_8_v3
25 | + r"|"
26 | + d5_8_v4
27 | + r"))"
28 | )
29 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/behaviors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/behaviors/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/behaviors/alcohol/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/behaviors/alcohol/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/behaviors/alcohol/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .alcohol import AlcoholMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="alcohol",
9 | span_setter={"ents": True, "alcohol": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.alcohol",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(AlcoholMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/behaviors/alcohol/patterns.py:
--------------------------------------------------------------------------------
1 | default_pattern = dict(
2 | source="alcohol",
3 | regex=[
4 | r"\balco[ol]",
5 | r"\bethyl",
6 | r"(? Dict[str, List[str]]:
9 | df = pd.read_csv(BASE_DIR / "resources" / "cim10.csv.gz")
10 |
11 | df["code_pattern"] = df["code"]
12 | df["code_point"] = df["code"].str[:2] + "." + df["code"].str[2:]
13 | df["code_space"] = df["code"].str[0] + " " + df["code"].str[1:]
14 | df["code_space_point"] = (
15 | df["code"].str[0] + " " + df["code"].str[1] + "." + df["code"].str[2:]
16 | )
17 |
18 | df = pd.concat(
19 | [
20 | df[["code", "short"]].rename(columns={"short": "patterns"}),
21 | df[["code", "long"]].rename(columns={"long": "patterns"}),
22 | df[["code", "code_pattern"]].rename(columns={"code_pattern": "patterns"}),
23 | df[["code", "code_point"]].rename(columns={"code_point": "patterns"}),
24 | df[["code", "code_space"]].rename(columns={"code_space": "patterns"}),
25 | df[["code", "code_space_point"]].rename(
26 | columns={"code_space_point": "patterns"}
27 | ),
28 | ]
29 | )
30 |
31 | patterns = df.groupby("code")["patterns"].agg(list).to_dict()
32 |
33 | return patterns
34 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/covid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/covid/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/covid/patterns.py:
--------------------------------------------------------------------------------
1 | from edsnlp.utils.regex import make_pattern
2 |
3 | covid = [
4 | r"covid([-\s]?19)?",
5 | r"sars[-\s]?cov[-\s]?2",
6 | r"corona[-\s]?virus",
7 | ]
8 |
9 | diseases = [r"pneumopathies?", r"infections?"]
10 |
11 | patterns = [r"(" + make_pattern(diseases) + r"\s[àa]u?\s)?" + make_pattern(covid)]
12 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/aids/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/aids/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/aids/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .aids import AIDSMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="aids",
9 | span_setter={"ents": True, "aids": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.aids",
14 | assigns=["doc.ents", "doc.spans"],
15 | deprecated=["eds.AIDS"],
16 | )(AIDSMatcher)
17 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/aids/patterns.py:
--------------------------------------------------------------------------------
1 | aids = dict(
2 | source="aids",
3 | regex=[
4 | r"(vih.{1,5}stade.{1,5})?\bsida\b",
5 | ],
6 | regex_attr="NORM",
7 | )
8 |
9 | hiv = dict(
10 | source="hiv",
11 | regex=[
12 | r"\bhiv\b",
13 | r"\bvih\b",
14 | ],
15 | exclude=dict(
16 | regex=["serologie", "prelevement"],
17 | window=(-20, 20),
18 | limit_to_sentence=False,
19 | ),
20 | assign=[
21 | dict(
22 | name="opportunist",
23 | regex=r"("
24 | + r"|".join(
25 | [
26 | r"kapo[sz]i",
27 | r"toxoplasmose",
28 | r"meningo.?encephalite.toxo",
29 | r"pneumocystose",
30 | r"\bpep\b",
31 | r"pneumocystis",
32 | r"cryptococcose",
33 | r"cytomégalovirus",
34 | r"myobact",
35 | r"opportunist",
36 | r"co.?infect",
37 | ]
38 | )
39 | + ")"
40 | + r"(?!.{0,20}(?:non|0))",
41 | window=(-10, 30),
42 | limit_to_sentence=False,
43 | ),
44 | dict(
45 | name="stage",
46 | regex=r"stade.{0,5}\b(b|c)\b",
47 | window=10,
48 | ),
49 | ],
50 | regex_attr="NORM",
51 | )
52 |
53 | default_patterns = [
54 | aids,
55 | hiv,
56 | ]
57 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/cerebrovascular_accident/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/cerebrovascular_accident/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/cerebrovascular_accident/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .cerebrovascular_accident import CerebrovascularAccidentMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="cerebrovascular_accident",
9 | span_setter={"ents": True, "cerebrovascular_accident": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.cerebrovascular_accident",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(CerebrovascularAccidentMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/ckd/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/ckd/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/ckd/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .ckd import CKDMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="ckd",
9 | span_setter={"ents": True, "ckd": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.ckd",
14 | assigns=["doc.ents", "doc.spans"],
15 | deprecated=["eds.CKD"],
16 | )(CKDMatcher)
17 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/congestive_heart_failure/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/congestive_heart_failure/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/congestive_heart_failure/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .congestive_heart_failure import CongestiveHeartFailureMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="congestive_heart_failure",
9 | span_setter={"ents": True, "congestive_heart_failure": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.congestive_heart_failure",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(CongestiveHeartFailureMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/connective_tissue_disease/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/connective_tissue_disease/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/connective_tissue_disease/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .connective_tissue_disease import ConnectiveTissueDiseaseMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="connective_tissue_disease",
9 | span_setter={"ents": True, "connective_tissue_disease": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.connective_tissue_disease",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(ConnectiveTissueDiseaseMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/copd/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/copd/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/copd/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .copd import COPDMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="copd",
9 | span_setter={"ents": True, "copd": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.copd",
14 | assigns=["doc.ents", "doc.spans"],
15 | deprecated=["eds.COPD"],
16 | )(COPDMatcher)
17 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/dementia/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/dementia/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/dementia/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .dementia import DementiaMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="dementia",
9 | span_setter={"ents": True, "dementia": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.dementia",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(DementiaMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/diabetes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/diabetes/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/diabetes/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .diabetes import DiabetesMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="diabetes",
9 | span_setter={"ents": True, "diabetes": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.diabetes",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(DiabetesMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/hemiplegia/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/hemiplegia/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/hemiplegia/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .hemiplegia import HemiplegiaMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="hemiplegia",
9 | span_setter={"ents": True, "hemiplegia": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.hemiplegia",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(HemiplegiaMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/hemiplegia/patterns.py:
--------------------------------------------------------------------------------
1 | main_pattern = dict(
2 | source="main",
3 | regex=[
4 | r"hemiplegi",
5 | r"tetraplegi",
6 | r"quadriplegi",
7 | r"paraplegi",
8 | r"neuropathie.{1,25}motrice.{1,30}type [5V]",
9 | r"charcot.?marie.?tooth",
10 | r"locked.?in",
11 | r"syndrome.{1,5}(enfermement|verrouillage)|(desafferen)",
12 | r"paralysie.{1,10}hemicorps",
13 | r"paralysie.{1,10}jambe",
14 | r"paralysie.{1,10}membre",
15 | r"paralysie.{1,10}cote",
16 | r"paralysie.{1,5}cerebrale.{1,5}spastique",
17 | ],
18 | regex_attr="NORM",
19 | )
20 |
21 | acronym = dict(
22 | source="acronym",
23 | regex=[
24 | r"\bLIS\b",
25 | r"\bNMSH\b",
26 | ],
27 | regex_attr="TEXT",
28 | )
29 |
30 | default_patterns = [
31 | main_pattern,
32 | acronym,
33 | ]
34 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/leukemia/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/leukemia/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/leukemia/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .leukemia import LeukemiaMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="leukemia",
9 | span_setter={"ents": True, "leukemia": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.leukemia",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(LeukemiaMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/liver_disease/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/liver_disease/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/liver_disease/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .liver_disease import LiverDiseaseMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="liver_disease",
9 | span_setter={"ents": True, "liver_disease": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.liver_disease",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(LiverDiseaseMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/lymphoma/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/lymphoma/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/lymphoma/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .lymphoma import LymphomaMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="lymphoma",
9 | span_setter={"ents": True, "lymphoma": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.lymphoma",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(LymphomaMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/myocardial_infarction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/myocardial_infarction/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/myocardial_infarction/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .myocardial_infarction import MyocardialInfarctionMatcher
4 | from .patterns import default_patterns
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="myocardial_infarction",
9 | span_setter={"ents": True, "myocardial_infarction": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.myocardial_infarction",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(MyocardialInfarctionMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/myocardial_infarction/patterns.py:
--------------------------------------------------------------------------------
1 | from ..terms import HEART
2 |
3 | main_pattern = dict(
4 | source="main",
5 | regex=[
6 | r"coronaropathie",
7 | r"angor.{1,5}instable",
8 | r"cardiopathie(?!.{0,20}non).{0,20}(ischem|arteriosc)",
9 | r"cardio.?myopathie(?!.{0,20}non).{0,20}(ischem|arteriosc)",
10 | r"ischemi.{1,15}myocard",
11 | r"syndrome.{1,5}corona.{1,10}aigu",
12 | r"syndrome.{1,5}corona.{1,10}st",
13 | r"pontage.{1,5}mammaire",
14 | ],
15 | regex_attr="NORM",
16 | )
17 |
18 | with_localization = dict(
19 | source="with_localization",
20 | regex=[
21 | r"\bstent",
22 | r"endoprothese",
23 | r"pontage",
24 | r"anevr[iy]sme",
25 | "infarctus",
26 | r"angioplasti",
27 | ],
28 | assign=[
29 | dict(
30 | name="heart_localized",
31 | regex="(" + r"|".join(HEART) + ")",
32 | window=(-10, 10),
33 | ),
34 | ],
35 | regex_attr="NORM",
36 | )
37 |
38 | acronym = dict(
39 | source="acronym",
40 | regex=[
41 | r"\bidm\b",
42 | r"\bsca\b",
43 | r"\batl\b",
44 | ],
45 | regex_attr="NORM",
46 | assign=dict(
47 | name="segment",
48 | regex=r"st([+-])",
49 | window=2,
50 | ),
51 | )
52 |
53 |
54 | default_patterns = [
55 | main_pattern,
56 | with_localization,
57 | acronym,
58 | ]
59 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/peptic_ulcer_disease/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/peptic_ulcer_disease/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/peptic_ulcer_disease/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .patterns import default_patterns
4 | from .peptic_ulcer_disease import PepticUlcerDiseaseMatcher
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="peptic_ulcer_disease",
9 | span_setter={"ents": True, "peptic_ulcer_disease": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.peptic_ulcer_disease",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(PepticUlcerDiseaseMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/peptic_ulcer_disease/patterns.py:
--------------------------------------------------------------------------------
1 | main_pattern = dict(
2 | source="main",
3 | regex=[
4 | r"ulcere.{1,10}gastr",
5 | r"ulcere.{1,10}duoden",
6 | r"ulcere.{1,10}antra",
7 | r"ulcere.{1,10}pept",
8 | r"ulcere.{1,10}estomac",
9 | r"ulcere.{1,10}curling",
10 | r"ulcere.{1,10}bulb",
11 | r"(œ|oe)sophagites.{1,5}pepti.{1,10}ulcer",
12 | r"gastrite.{1,20}ulcer",
13 | r"antrite.{1,5}ulcer",
14 | ],
15 | regex_attr="NORM",
16 | )
17 |
18 | acronym = dict(
19 | source="acronym",
20 | regex=[
21 | r"\bUGD\b",
22 | ],
23 | regex_attr="TEXT",
24 | )
25 |
26 | generic = dict(
27 | source="generic",
28 | regex=r"ulcere",
29 | regex_attr="NORM",
30 | assign=dict(
31 | name="is_peptic",
32 | regex=r"\b(gastr|digest)",
33 | window=(-20, 20),
34 | limit_to_sentence=False,
35 | ),
36 | )
37 |
38 | default_patterns = [
39 | main_pattern,
40 | acronym,
41 | generic,
42 | ]
43 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/peripheral_vascular_disease/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/peripheral_vascular_disease/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/peripheral_vascular_disease/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .patterns import default_patterns
4 | from .peripheral_vascular_disease import PeripheralVascularDiseaseMatcher
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | label="peripheral_vascular_disease",
9 | span_setter={"ents": True, "peripheral_vascular_disease": True},
10 | )
11 |
12 | create_component = registry.factory.register(
13 | "eds.peripheral_vascular_disease",
14 | assigns=["doc.ents", "doc.spans"],
15 | )(PeripheralVascularDiseaseMatcher)
16 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/solid_tumor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/disorders/solid_tumor/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/solid_tumor/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .patterns import default_patterns
4 | from .solid_tumor import SolidTumorMatcher
5 |
6 | DEFAULT_CONFIG = dict(
7 | patterns=default_patterns,
8 | use_tnm=False,
9 | label="solid_tumor",
10 | span_setter={"ents": True, "solid_tumor": True},
11 | )
12 |
13 | create_component = registry.factory.register(
14 | "eds.solid_tumor",
15 | assigns=["doc.ents", "doc.spans"],
16 | )(SolidTumorMatcher)
17 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/disorders/terms.py:
--------------------------------------------------------------------------------
1 | HEART = [
2 | r"cardi",
3 | r"coronari",
4 | r"coronair",
5 | r"\bcd\b",
6 | r"\biva\d?\b",
7 | r"\bivp\d?\b",
8 | r"\bivg\d?\b",
9 | r"\bivd\d?\b",
10 | r"intra.?va\b",
11 | r"intra.?vp\b",
12 | r"intra.?vg\b",
13 | r"intra.?vd\b",
14 | r"circonflexe",
15 | r"\bcx\b",
16 | r"marginale",
17 | r"\bmg\b",
18 | r"\bdiago",
19 | r"\brvp\b",
20 | r"myocard",
21 | "apical",
22 | "septal",
23 | "ventricul",
24 | "coeur",
25 | "cœur",
26 | "auriculaire",
27 | "parietal",
28 | "septum",
29 | ]
30 |
31 | BRAIN = [
32 | r"cerveau",
33 | r"cereb",
34 | r"cran",
35 | r"v4",
36 | r"m1",
37 | r"aica",
38 | r"\bpica",
39 | r"basilaire",
40 | r"polygone de willis",
41 | r"cercle de willis",
42 | r"sylvien",
43 | r"arachnoi",
44 | r"meninge",
45 | r"dura(?:l|ux)",
46 | r"puncti",
47 | r"front",
48 | r"tempo",
49 | r"occipi",
50 | r"parieta",
51 | # r"segment", Too generic
52 | ]
53 |
54 | PERIPHERAL = [
55 | "pied",
56 | "main",
57 | r"\bmi\b",
58 | r"\bmig\b",
59 | r"\bmid\b",
60 | "membre",
61 | "jambe",
62 | "bras",
63 | "doigt",
64 | "digital",
65 | "orteil",
66 | ]
67 |
68 | ASYMPTOMATIC = [
69 | r"asympto",
70 | r"sans.decompens",
71 | r"non.decompens",
72 | ]
73 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/drugs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/drugs/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/drugs/patterns.py:
--------------------------------------------------------------------------------
1 | import json
2 | from typing import Dict, List
3 |
4 | from edsnlp import BASE_DIR
5 |
6 | drugs_file = BASE_DIR / "resources" / "drugs.json"
7 |
8 |
9 | def get_patterns() -> Dict[str, List[str]]:
10 | with open(drugs_file, "r") as f:
11 | return json.load(f)
12 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/__init__.py:
--------------------------------------------------------------------------------
1 | from edsnlp.pipes.ner.scores.base_score import SimpleScoreMatcher
2 |
3 | Score = SimpleScoreMatcher
4 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/charlson/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/scores/charlson/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/charlson/patterns.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 |
3 | import spacy
4 |
5 | regex = [r"charlson"]
6 |
7 | value_extract = r"^.*?[\n\W]*?(\d+)"
8 |
9 | score_normalization_str = "score_normalization.charlson"
10 |
11 |
12 | @spacy.registry.misc(score_normalization_str)
13 | def score_normalization(extracted_score: Union[str, None]):
14 | """
15 | Charlson score normalization.
16 | If available, returns the integer value of the Charlson score.
17 | """
18 | score_range = list(range(0, 30))
19 | try:
20 | if (extracted_score is not None) and (int(extracted_score) in score_range):
21 | return int(extracted_score)
22 | except ValueError:
23 | return None
24 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/elston_ellis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/scores/elston_ellis/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/elston_ellis/patterns.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Union
3 |
4 | import spacy
5 |
6 | regex = [r"[Ee]lston (& |et |and )?[Ee]llis", r"\b[Ee]{2}\b"]
7 |
8 | pattern1 = r"[^\d\(\)]*[0-3]"
9 | pattern2 = r".{0,2}[\+,]"
10 | value_extract = rf"(?s).(\({pattern1}{pattern2}{pattern1}{pattern2}{pattern1}\))"
11 |
12 | score_normalization_str = "score_normalization.elstonellis"
13 |
14 |
15 | @spacy.registry.misc(score_normalization_str)
16 | def score_normalization(extracted_score: Union[str, None]):
17 | """
18 | Elston and Ellis score normalization.
19 | If available, returns the integer value of the Elston and Ellis score.
20 | """
21 | try:
22 | x = 0
23 | for i in re.findall(r"[0-3]", extracted_score):
24 | x += int(i)
25 |
26 | if x <= 5:
27 | return 1
28 |
29 | elif x <= 7:
30 | return 2
31 |
32 | else:
33 | return 3
34 |
35 | except ValueError:
36 | return None
37 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/emergency/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/scores/emergency/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/emergency/ccmu/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/scores/emergency/ccmu/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/emergency/ccmu/patterns.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 |
3 | import spacy
4 |
5 | regex = [r"\bccmu\b"]
6 |
7 | value_extract = r"^.*?[\n\W]*?(\d+)"
8 |
9 | score_normalization_str = "score_normalization.ccmu"
10 |
11 |
12 | @spacy.registry.misc(score_normalization_str)
13 | def score_normalization(extracted_score: Union[str, None]):
14 | """
15 | CCMU score normalization.
16 | If available, returns the integer value of the CCMU score.
17 | """
18 | score_range = [1, 2, 3, 4, 5]
19 | if (extracted_score is not None) and (int(extracted_score) in score_range):
20 | return int(extracted_score)
21 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/emergency/gemsa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/scores/emergency/gemsa/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/emergency/gemsa/patterns.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 |
3 | import spacy
4 |
5 | regex = [r"\bgemsa\b"]
6 |
7 | value_extract = r"^.*?[\n\W]*?(\d+)"
8 |
9 | score_normalization_str = "score_normalization.gemsa"
10 |
11 |
12 | @spacy.registry.misc(score_normalization_str)
13 | def score_normalization(extracted_score: Union[str, None]):
14 | """
15 | GEMSA score normalization.
16 | If available, returns the integer value of the GEMSA score.
17 | """
18 | score_range = [1, 2, 3, 4, 5, 6]
19 | if (extracted_score is not None) and (int(extracted_score) in score_range):
20 | return int(extracted_score)
21 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/emergency/priority/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/scores/emergency/priority/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/emergency/priority/patterns.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 |
3 | import spacy
4 |
5 | regex = [r"\bpriorite\b"]
6 |
7 | value_extract = r"^.*?[\n\W]*?(\d+)"
8 |
9 | score_normalization_str = "score_normalization.priority"
10 |
11 |
12 | @spacy.registry.misc(score_normalization_str)
13 | def score_normalization(extracted_score: Union[str, None]):
14 | """
15 | Priority score normalization.
16 | If available, returns the integer value of the priority score.
17 | """
18 | score_range = list(range(0, 6))
19 | if (extracted_score is not None) and (int(extracted_score) in score_range):
20 | return int(extracted_score)
21 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 | from edsnlp.pipes.ner.scores.base_score import SimpleScoreMatcher
3 |
4 | DEFAULT_CONFIG = dict(
5 | regex=None,
6 | attr="NORM",
7 | value_extract=None,
8 | score_normalization=None,
9 | window=7,
10 | ignore_excluded=False,
11 | ignore_space_tokens=False,
12 | flags=0,
13 | span_setter={"ents": True},
14 | )
15 |
16 | create_component = registry.factory.register(
17 | "eds.score",
18 | assigns=["doc.ents", "doc.spans"],
19 | deprecated=["score"],
20 | )(SimpleScoreMatcher)
21 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/sofa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/scores/sofa/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/scores/sofa/patterns.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 |
3 | import spacy
4 |
5 | regex = [r"\bsofa\b"]
6 |
7 | digits = r"[^\d]*(\d*)"
8 |
9 | value_extract = [
10 | dict(
11 | name="method_max",
12 | regex=r"(max)",
13 | reduce_mode="keep_first",
14 | ),
15 | dict(
16 | name="method_24h",
17 | regex=r"(24h)",
18 | reduce_mode="keep_first",
19 | ),
20 | dict(
21 | name="method_adm",
22 | regex=r"(admission)",
23 | reduce_mode="keep_first",
24 | ),
25 | dict(
26 | name="value",
27 | regex=r"^.*?[\n\W]*?(\d+)(?![h0-9])",
28 | ),
29 | ]
30 |
31 | score_normalization_str = "score_normalization.sofa"
32 |
33 |
34 | @spacy.registry.misc(score_normalization_str)
35 | def score_normalization(extracted_score: Union[str, None]):
36 | """
37 | Sofa score normalization.
38 | If available, returns the integer value of the SOFA score.
39 | """
40 | score_range = list(range(0, 30))
41 | if (extracted_score is not None) and (int(extracted_score) in score_range):
42 | return int(extracted_score)
43 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/suicide_attempt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/suicide_attempt/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/suicide_attempt/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 | from edsnlp.pipes.ner.suicide_attempt.suicide_attempt import SuicideAttemptMatcher
3 |
4 | create_component = registry.factory.register(
5 | "eds.suicide_attempt",
6 | assigns=["doc.ents", "doc.spans"],
7 | )(SuicideAttemptMatcher)
8 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/tnm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/tnm/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/tnm/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .patterns import tnm_pattern
4 | from .tnm import TNMMatcher
5 |
6 | DEFAULT_CONFIG = dict(
7 | pattern=tnm_pattern,
8 | attr="TEXT",
9 | label="tnm",
10 | span_setter={"ents": True, "tnm": True},
11 | )
12 |
13 | create_component = registry.factory.register(
14 | "eds.tnm",
15 | assigns=["doc.ents", "doc.spans"],
16 | deprecated=["eds.TNM"],
17 | )(TNMMatcher)
18 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/tnm/patterns.py:
--------------------------------------------------------------------------------
1 | prefix_pattern = r"(?P[cpPyraum]p?)"
2 | tumour_pattern = r"T\s?(?P([0-4o]|is))?(?P[abcdx]|mi)?"
3 | tumour_pattern += r"(?:\((?P[^()]{1,10})\))?"
4 | node_pattern = r"(\s{,2}\/?\s{,2}([cpPyraum]p?)?\s{,2}N\s?(?P[0-3o]|x)"
5 | node_pattern += (
6 | r"(?P[abcdx]|mi)?(?:\((?P[^()]{1,10})\))?)"
7 | )
8 |
9 | metastasis_pattern = (
10 | r"(\s{,2}\/?\s{,2}([cpPyraum]p?)?\s{,2}M\s?(?P([01o]|x))x?)" # noqa: E501
11 | )
12 | resection_completeness = r"(\s{,2}\/?\s{,2}R\s?(?P[012]))"
13 |
14 | version_pattern = (
15 | r"\(?(?Puicc|accj|tnm|UICC|ACCJ|TNM)"
16 | r"\s+([éeE]ditions|[éeE]d\.?)?\s{,2}?"
17 | r"(?P\d{4}|\d{2})\)?"
18 | )
19 |
20 | spacer = r"(.|\n){1,5}"
21 |
22 | tnm_pattern = f"(?<={version_pattern}{spacer})?"
23 | tnm_pattern += prefix_pattern + r"\s{,2}?" + f"({tumour_pattern})"
24 | tnm_pattern += r"(\s{,2}" + f"{node_pattern})?"
25 | tnm_pattern += r"(\s{,2}" + f"{metastasis_pattern})?"
26 | tnm_pattern += r"(\s{,2}" + f"{resection_completeness})?"
27 | tnm_pattern += f"({spacer}{version_pattern})?"
28 | tnm_pattern = r"(?:\b|^)" + tnm_pattern + r"(?:\b|$)"
29 |
--------------------------------------------------------------------------------
/edsnlp/pipes/ner/umls/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/ner/umls/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/qualifiers/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/family/__init__.py:
--------------------------------------------------------------------------------
1 | from .family import FamilyContextQualifier
2 |
3 | FamilyContext = FamilyContextQualifier
4 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/family/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .family import FamilyContextQualifier
4 |
5 | DEFAULT_CONFIG = dict(
6 | attr="NORM",
7 | family=None,
8 | termination=None,
9 | use_sections=True,
10 | span_getter=None,
11 | on_ents_only=True,
12 | explain=False,
13 | )
14 |
15 | create_component = registry.factory.register(
16 | "eds.family",
17 | assigns=["span._.family"],
18 | deprecated=["family"],
19 | )(FamilyContextQualifier)
20 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/family/patterns.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | family: List[str] = [
4 | "aïeul",
5 | "aïeux",
6 | "antécédent familial",
7 | "antécédents familiaux",
8 | "arrière-grand-mère",
9 | "arrière-grand-père",
10 | "arrière-grands-parents",
11 | "cousin",
12 | "cousine",
13 | "cousines",
14 | "cousins",
15 | "enfant",
16 | "enfants",
17 | "épouse",
18 | "époux",
19 | "familial",
20 | "familiale",
21 | "familiales",
22 | "familiaux",
23 | "famille",
24 | "fiancé",
25 | "fiancée",
26 | "fils",
27 | "fille",
28 | "filles",
29 | "frère",
30 | "frères",
31 | "grand-mère",
32 | "grand-père",
33 | "grands-parents",
34 | "maman",
35 | "mari",
36 | "mère",
37 | "oncle",
38 | "papa",
39 | "parent",
40 | "parents",
41 | "père",
42 | "soeur",
43 | "sœur",
44 | "sœurs",
45 | "soeurs",
46 | "tante",
47 | "neveu",
48 | "neveux",
49 | "nièce",
50 | "nièces",
51 | ]
52 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/history/__init__.py:
--------------------------------------------------------------------------------
1 | from .history import HistoryQualifier
2 |
3 | History = HistoryQualifier
4 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/history/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .history import HistoryQualifier
4 |
5 | DEFAULT_CONFIG = dict(
6 | history=None,
7 | termination=None,
8 | use_sections=False,
9 | use_dates=False,
10 | attr="NORM",
11 | history_limit=14,
12 | closest_dates_only=True,
13 | exclude_birthdate=True,
14 | span_getter=None,
15 | on_ents_only=True,
16 | explain=False,
17 | )
18 |
19 | create_component = registry.factory.register(
20 | "eds.history",
21 | assigns=["span._.history"],
22 | deprecated=[
23 | "history",
24 | "antecedents",
25 | "eds.antecedents",
26 | ],
27 | )(HistoryQualifier)
28 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/history/patterns.py:
--------------------------------------------------------------------------------
1 | history = [
2 | "antécédents",
3 | "atcd",
4 | "atcds",
5 | "tacds",
6 | "antécédent",
7 | ]
8 |
9 | sections_history = [
10 | "antécédents",
11 | "antécédents familiaux",
12 | "histoire de la maladie",
13 | ]
14 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/hypothesis/__init__.py:
--------------------------------------------------------------------------------
1 | from .hypothesis import HypothesisQualifier
2 |
3 | Hypothesis = HypothesisQualifier
4 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/hypothesis/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .hypothesis import HypothesisQualifier
4 |
5 | DEFAULT_CONFIG = dict(
6 | pseudo=None,
7 | preceding=None,
8 | following=None,
9 | verbs_eds=None,
10 | verbs_hyp=None,
11 | termination=None,
12 | attr="NORM",
13 | span_getter=None,
14 | on_ents_only=True,
15 | within_ents=False,
16 | explain=False,
17 | )
18 |
19 | create_component = registry.factory.register(
20 | "eds.hypothesis",
21 | assigns=["span._.hypothesis"],
22 | deprecated=["hypothesis"],
23 | )(HypothesisQualifier)
24 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/negation/__init__.py:
--------------------------------------------------------------------------------
1 | from .negation import NegationQualifier
2 |
3 | Negation = NegationQualifier
4 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/negation/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .negation import NegationQualifier
4 |
5 | DEFAULT_CONFIG = dict(
6 | pseudo=None,
7 | preceding=None,
8 | preceding_regex=None,
9 | following=None,
10 | verbs=None,
11 | termination=None,
12 | attr="NORM",
13 | span_getter=None,
14 | on_ents_only=True,
15 | within_ents=False,
16 | explain=False,
17 | )
18 |
19 | create_component = registry.factory.register(
20 | "eds.negation",
21 | assigns=["span._.negation"],
22 | deprecated=["negation"],
23 | )(NegationQualifier)
24 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/reported_speech/__init__.py:
--------------------------------------------------------------------------------
1 | from .reported_speech import ReportedSpeechQualifier
2 |
3 | ReportedSpeech = ReportedSpeechQualifier
4 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/reported_speech/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import registry
2 |
3 | from .reported_speech import ReportedSpeechQualifier
4 |
5 | DEFAULT_CONFIG = dict(
6 | pseudo=None,
7 | preceding=None,
8 | following=None,
9 | quotation=None,
10 | verbs=None,
11 | attr="NORM",
12 | span_getter=None,
13 | on_ents_only=True,
14 | within_ents=False,
15 | explain=False,
16 | )
17 |
18 | create_component = registry.factory.register(
19 | "eds.reported_speech",
20 | assigns=["span._.reported_speech"],
21 | deprecated=[
22 | "reported_speech",
23 | "rspeech",
24 | ],
25 | )(ReportedSpeechQualifier)
26 |
--------------------------------------------------------------------------------
/edsnlp/pipes/qualifiers/reported_speech/patterns.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | verbs: List[str] = [
4 | # 'admettre', > False positive: "admis à l'hopital"
5 | "affirmer",
6 | "ajouter",
7 | "assurer",
8 | "confirmer",
9 | "demander",
10 | "dire",
11 | "déclarer",
12 | "décrire",
13 | "décrire",
14 | "démontrer",
15 | "expliquer",
16 | "faire remarquer",
17 | "indiquer",
18 | "informer",
19 | "insinuer",
20 | "insister",
21 | "jurer",
22 | "nier",
23 | "nier",
24 | "noter",
25 | "objecter",
26 | "observer",
27 | "parler",
28 | "promettre",
29 | "préciser",
30 | "prétendre",
31 | "prévenir",
32 | "raconter",
33 | "rappeler",
34 | "rapporter",
35 | "reconnaître",
36 | "réfuter",
37 | "répliquer",
38 | "répondre",
39 | "répéter",
40 | "révéler",
41 | "se plaindre",
42 | "souhaiter",
43 | "souligner",
44 | "supplier",
45 | "verbaliser",
46 | "vouloir",
47 | "vouloir",
48 | ]
49 |
50 | following: List[str] = [r"d'après le patient", r"d'après la patiente"]
51 |
52 | preceding: List[str] = [
53 | r"pas de critique de",
54 | r"crainte de",
55 | r"menace de",
56 | r"insiste sur le fait que",
57 | r"d'après le patient",
58 | r"d'après la patiente",
59 | r"peur de",
60 | ]
61 | quotation: str = r"(\".+\")|(\«.+\»)"
62 |
--------------------------------------------------------------------------------
/edsnlp/pipes/terminations.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | termination: List[str] = [
4 | "et",
5 | "bien que",
6 | "même si",
7 | "mais",
8 | "or",
9 | "alors que",
10 | "sauf",
11 | "cependant",
12 | "pourtant",
13 | "cause de",
14 | "source de",
15 | "hormis",
16 | "car",
17 | "parce que",
18 | "pourtant",
19 | "puisque",
20 | "ni",
21 | "en raison de",
22 | "qui",
23 | "que",
24 | "ainsi que",
25 | "avec",
26 | "toutefois",
27 | "en dehors",
28 | "dans le cadre",
29 | "du fait",
30 | ".",
31 | ",",
32 | ";",
33 | "...",
34 | "…",
35 | "(",
36 | ")",
37 | '"',
38 | ]
39 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/trainable/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/biaffine_dep_parser/__init__.py:
--------------------------------------------------------------------------------
1 | from .factory import create_component
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/biaffine_dep_parser/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .biaffine_dep_parser import TrainableBiaffineDependencyParser
4 |
5 | create_component = registry.factory.register(
6 | "eds.biaffine_dep_parser",
7 | assigns=["token.head", "token.dep"],
8 | )(TrainableBiaffineDependencyParser)
9 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/embeddings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/trainable/embeddings/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/embeddings/span_pooler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/trainable/embeddings/span_pooler/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/embeddings/span_pooler/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .span_pooler import SpanPooler
4 |
5 | create_component = registry.factory.register(
6 | "eds.span_pooler",
7 | assigns=[],
8 | deprecated=[],
9 | )(SpanPooler)
10 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/embeddings/text_cnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/trainable/embeddings/text_cnn/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/embeddings/text_cnn/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .text_cnn import TextCnnEncoder
4 |
5 | create_component = registry.factory.register(
6 | "eds.text_cnn",
7 | assigns=[],
8 | deprecated=[],
9 | )(TextCnnEncoder)
10 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/embeddings/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/trainable/embeddings/transformer/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/embeddings/transformer/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .transformer import Transformer
4 |
5 | create_component = registry.factory.register(
6 | "eds.transformer",
7 | assigns=[],
8 | deprecated=[],
9 | )(Transformer)
10 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/extractive_qa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/trainable/extractive_qa/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/extractive_qa/factory.py:
--------------------------------------------------------------------------------
1 | from typing import TYPE_CHECKING
2 |
3 | from edsnlp import registry
4 |
5 | from .extractive_qa import TrainableExtractiveQA
6 |
7 | create_component = registry.factory.register(
8 | "eds.extractive_qa",
9 | assigns=[],
10 | deprecated=[],
11 | )(TrainableExtractiveQA)
12 |
13 | if TYPE_CHECKING:
14 | create_component = TrainableExtractiveQA
15 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/pipes/trainable/layers/__init__.py
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/ner_crf/__init__.py:
--------------------------------------------------------------------------------
1 | from .factory import create_component
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/ner_crf/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .ner_crf import TrainableNerCrf
4 |
5 | create_component = registry.factory.register(
6 | "eds.ner_crf",
7 | assigns=["doc.ents", "doc.spans"],
8 | deprecated=[
9 | "eds.nested_ner",
10 | "nested_ner",
11 | ],
12 | )(TrainableNerCrf)
13 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/span_classifier/__init__.py:
--------------------------------------------------------------------------------
1 | from .factory import create_component
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/span_classifier/factory.py:
--------------------------------------------------------------------------------
1 | from edsnlp import registry
2 |
3 | from .span_classifier import TrainableSpanClassifier
4 |
5 | create_component = registry.factory.register(
6 | "eds.span_classifier",
7 | assigns=[],
8 | deprecated=["eds.span_qualifier"],
9 | )(TrainableSpanClassifier)
10 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/span_linker/__init__.py:
--------------------------------------------------------------------------------
1 | from .factory import create_component
2 |
--------------------------------------------------------------------------------
/edsnlp/pipes/trainable/span_linker/factory.py:
--------------------------------------------------------------------------------
1 | from typing import TYPE_CHECKING
2 |
3 | from edsnlp import registry
4 |
5 | from .span_linker import TrainableSpanLinker
6 |
7 | create_component = registry.factory.register(
8 | "eds.span_linker",
9 | assigns=[],
10 | deprecated=[],
11 | )(TrainableSpanLinker)
12 |
13 | if TYPE_CHECKING:
14 | create_component = TrainableSpanLinker
15 |
--------------------------------------------------------------------------------
/edsnlp/processing/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import TYPE_CHECKING
2 |
3 | from edsnlp.utils.lazy_module import lazify
4 |
5 | lazify()
6 |
7 | if TYPE_CHECKING:
8 | from .deprecated_pipe import pipe # DEPRECATED
9 | from .spark import execute_spark_backend
10 | from .simple import execute_simple_backend
11 | from .multiprocessing import execute_multiprocessing_backend
12 |
--------------------------------------------------------------------------------
/edsnlp/resources/AVC.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/resources/AVC.csv.gz
--------------------------------------------------------------------------------
/edsnlp/resources/adicap.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/resources/adicap.json.gz
--------------------------------------------------------------------------------
/edsnlp/resources/cim10.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/resources/cim10.csv.gz
--------------------------------------------------------------------------------
/edsnlp/resources/verbs.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/resources/verbs.csv.gz
--------------------------------------------------------------------------------
/edsnlp/train.py:
--------------------------------------------------------------------------------
1 | from confit import Cli
2 |
3 | from edsnlp.training.trainer import * # noqa: F403
4 | from edsnlp.training.trainer import registry, train
5 |
6 | app = Cli(pretty_exceptions_show_locals=False)
7 | train_command = app.command(name="train", registry=registry)(train)
8 |
9 | if __name__ == "__main__":
10 | app()
11 |
--------------------------------------------------------------------------------
/edsnlp/training/__init__.py:
--------------------------------------------------------------------------------
1 | from .trainer import train, GenericScorer, TrainingData
2 | from .optimizer import ScheduledOptimizer, LinearSchedule
3 |
--------------------------------------------------------------------------------
/edsnlp/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/edsnlp/utils/__init__.py
--------------------------------------------------------------------------------
/edsnlp/utils/deprecation.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from typing import Any, Union
3 |
4 | from confit import VisibleDeprecationWarning
5 | from spacy.tokens import Doc, Span, Token
6 |
7 |
8 | def deprecated_extension(name: str, new_name: str) -> None:
9 | msg = (
10 | f'The extension "{name}" is deprecated and will be '
11 | "removed in a future version. "
12 | f'Please use "{new_name}" instead.'
13 | )
14 |
15 | warnings.warn(msg, VisibleDeprecationWarning)
16 |
17 |
18 | class deprecated_getter_factory:
19 | def __init__(self, name: str, new_name: str):
20 | self.name = name
21 | self.new_name = new_name
22 |
23 | def __call__(self, toklike: Union[Token, Span, Doc]) -> Any:
24 | n = f"{type(toklike).__name__}._.{self.name}"
25 | nn = f"{type(toklike).__name__}._.{self.new_name}"
26 |
27 | deprecated_extension(n, nn)
28 |
29 | return getattr(toklike._, self.new_name)
30 |
--------------------------------------------------------------------------------
/edsnlp/utils/extensions.py:
--------------------------------------------------------------------------------
1 | import functools
2 | from typing import Any, List
3 |
4 |
5 | def rgetattr(obj: Any, attr: str, *args: List[Any]) -> Any:
6 | """
7 | Get attribute recursively
8 |
9 | Parameters
10 | ----------
11 | obj : Any
12 | An object
13 | attr : str
14 | The name of the attribute to get. Can contain dots.
15 | """
16 |
17 | def _getattr(obj, attr):
18 | return None if obj is None else getattr(obj, attr, *args)
19 |
20 | return functools.reduce(_getattr, [obj] + attr.split("."))
21 |
--------------------------------------------------------------------------------
/edsnlp/utils/inclusion.py:
--------------------------------------------------------------------------------
1 | from spacy.tokens import Span
2 |
3 |
4 | def check_inclusion(span: Span, start: int, end: int) -> bool:
5 | """
6 | Checks whether the span overlaps the boundaries.
7 |
8 | Parameters
9 | ----------
10 | span : Span
11 | Span to check.
12 | start : int
13 | Start of the boundary
14 | end : int
15 | End of the boundary
16 |
17 | Returns
18 | -------
19 | bool
20 | Whether the span overlaps the boundaries.
21 | """
22 |
23 | if span.start >= end or span.end <= start:
24 | return False
25 | return True
26 |
27 |
28 | def check_sent_inclusion(span: Span, start: int, end: int) -> bool:
29 | """
30 | Checks whether the span overlaps the boundaries.
31 |
32 | Parameters
33 | ----------
34 | span : Span
35 | Span to check.
36 | start : int
37 | Start of the boundary
38 | end : int
39 | End of the boundary
40 |
41 | Returns
42 | -------
43 | bool
44 | Whether the span overlaps the boundaries.
45 | """
46 | if span.sent.start >= end or span.sent.end <= start:
47 | return False
48 | return True
49 |
--------------------------------------------------------------------------------
/edsnlp/utils/numbers.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 |
3 | from spacy.tokens import Span
4 |
5 | from edsnlp.matchers.utils import get_text
6 |
7 | DIGITS_VALUE = list(range(11))
8 | DIGITS_STR = [
9 | ["zero"],
10 | ["un", "une", "i"],
11 | ["deux", "ii"],
12 | ["trois", "iii"],
13 | ["quatre", "iv"],
14 | ["cinq", "v"],
15 | ["six", "vi"],
16 | ["sept", "vii"],
17 | ["huit", "viii"],
18 | ["neuf", "ix"],
19 | ["dix", "x"],
20 | ]
21 |
22 | DIGITS_MAPPINGS = {
23 | string: digit for digit, strings in enumerate(DIGITS_STR) for string in strings
24 | }
25 |
26 |
27 | def parse_digit(s: Union[str, Span], **kwargs):
28 | if isinstance(s, Span):
29 | string = get_text(
30 | s,
31 | attr=kwargs.get("attr", "TEXT"),
32 | ignore_excluded=kwargs.get("ignore_excluded", True),
33 | )
34 | else:
35 | string = s
36 | string = string.lower().strip()
37 | try:
38 | return int(string)
39 | except ValueError:
40 | parsed = DIGITS_MAPPINGS.get(string, None)
41 | return parsed
42 |
--------------------------------------------------------------------------------
/edsnlp/utils/stream_sentinels.py:
--------------------------------------------------------------------------------
1 | class StreamSentinel:
2 | pass
3 |
4 |
5 | class FragmentEndSentinel(StreamSentinel):
6 | kind = "fragment"
7 |
8 | def __init__(self, name: str):
9 | self.name = name
10 |
11 |
12 | class DatasetEndSentinel(StreamSentinel):
13 | # Singleton is important since the DatasetEndSentinel object may be passed to
14 | # other processes, i.e. pickled, depickled, while it should
15 | # always be the same object.
16 | kind = "dataset"
17 | instance = None
18 |
19 | def __new__(cls, *args, **kwargs):
20 | if cls.instance is None:
21 | cls.instance = super().__new__(cls)
22 | return cls.instance
23 |
24 |
25 | DATASET_END_SENTINEL = DatasetEndSentinel()
26 |
--------------------------------------------------------------------------------
/edsnlp/viz/__init__.py:
--------------------------------------------------------------------------------
1 | from .quick_examples import QuickExample
2 |
--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | # Notebooks
2 |
3 | Check out the pipeline notebook to experiment with baseline components written in spaCy.
4 |
--------------------------------------------------------------------------------
/notebooks/connectors/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | REPO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
5 | sys.path.insert(0, REPO_PATH)
6 |
--------------------------------------------------------------------------------
/notebooks/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | REPO_PATH = os.path.abspath(os.path.join(os.path.dirname("__file__"), ".."))
5 | sys.path.insert(0, REPO_PATH)
6 |
--------------------------------------------------------------------------------
/notebooks/dates/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | REPO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
5 | sys.path.insert(0, REPO_PATH)
6 |
--------------------------------------------------------------------------------
/notebooks/example.txt:
--------------------------------------------------------------------------------
1 | Motif :
2 | Le patient est admis le 29 août pour des difficultés respiratoires.
3 |
4 | Antécédents familiaux :
5 | Le père est asthmatique, sans traitement particulier.
6 |
7 | HISTOIRE DE LA MALADIE
8 | Le patient dit avoir de la toux depuis trois jours. Elle a empiré jusqu'à nécessiter un passage aux urgences.
9 |
10 | Conclusion
11 | Possible infection au coronavirus
12 |
--------------------------------------------------------------------------------
/notebooks/normalizer/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | REPO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
5 | sys.path.insert(0, REPO_PATH)
6 |
--------------------------------------------------------------------------------
/notebooks/sections/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | REPO_PATH = os.path.abspath(os.path.join(os.path.dirname("__file__"), "..", ".."))
5 | sys.path.insert(0, REPO_PATH)
6 |
--------------------------------------------------------------------------------
/notebooks/sections/sections.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/notebooks/sections/sections.xlsx
--------------------------------------------------------------------------------
/notebooks/sentences/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | REPO_PATH = os.path.abspath(os.path.join(os.path.dirname("__file__"), "..", ".."))
5 | sys.path.insert(0, REPO_PATH)
6 |
--------------------------------------------------------------------------------
/notebooks/tnm/prototype.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupyter:
3 | jupytext:
4 | formats: md,ipynb
5 | text_representation:
6 | extension: .md
7 | format_name: markdown
8 | format_version: '1.3'
9 | jupytext_version: 1.13.0
10 | kernelspec:
11 | display_name: Python 3 (ipykernel)
12 | language: python
13 | name: python3
14 | ---
15 |
16 | ```python
17 | %reload_ext autoreload
18 | %autoreload 2
19 | ```
20 |
21 | ```python
22 | import spacy
23 | from spacy import displacy
24 | from spacy.tokens import Doc
25 | ```
26 |
27 | # TNM mentions
28 |
29 | ```python
30 | nlp = spacy.blank("fr")
31 | dates = nlp.add_pipe("eds.tnm")
32 | ```
33 |
34 | ```python
35 | text = "patient a un pTNM : pT0N2M1"
36 | ```
37 |
38 | ```python
39 | doc = nlp(text)
40 | ```
41 |
42 | ```python
43 | tnms = doc.spans['tnm']
44 | ```
45 |
46 | ```python
47 | def display_tnm(doc: Doc):
48 | doc.ents = doc.spans['tnm']
49 | return displacy.render(doc, style='ent')
50 | ```
51 |
52 | ```python
53 | display_tnm(doc)
54 | ```
55 |
56 | ```python
57 | for tnm in tnms:
58 | print(f"{str(tnm):<25}{repr(tnm._.value)}")
59 | ```
60 |
61 | ```python
62 |
63 | ```
64 |
--------------------------------------------------------------------------------
/notebooks/tokenizer/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | REPO_PATH = os.path.abspath(os.path.join(os.path.dirname("__file__"), "..", ".."))
5 | sys.path.insert(0, REPO_PATH)
6 |
--------------------------------------------------------------------------------
/notebooks/utilities/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | REPO_PATH = os.path.abspath(os.path.join(os.path.dirname("__file__"), "..", ".."))
5 | sys.path.insert(0, REPO_PATH)
6 |
--------------------------------------------------------------------------------
/scripts/cim10.py:
--------------------------------------------------------------------------------
1 | """
2 | Process CIM10 patterns.
3 |
4 | !!! warning "Watch out for the encoding"
5 |
6 | We had to convert the CIM-10 file from windows-1252 to utf-8.
7 |
8 | Source: https://www.atih.sante.fr/plateformes-de-transmission-et-logiciels/logiciels-espace-de-telechargement/id_lot/456
9 | """ # noqa
10 |
11 | from pathlib import Path
12 |
13 | import pandas as pd
14 | import typer
15 |
16 |
17 | def run(
18 | raw: Path = typer.Argument(..., help="Path to the raw file"),
19 | output: Path = typer.Option(
20 | "edsnlp/resources/cim10.csv.gz", help="Path to the output CSV table."
21 | ),
22 | ) -> None:
23 | """
24 | Convenience script to automatically process the CIM10 terminology
25 | into a processable file.
26 | """
27 |
28 | df = pd.read_csv(raw, sep="|", header=None)
29 |
30 | typer.echo(f"Processing {len(df)} French ICD codes...")
31 |
32 | df.columns = ["code", "type", "ssr", "psy", "short", "long"]
33 | for column in ["code", "short", "long"]:
34 | df[column] = df[column].str.strip()
35 |
36 | typer.echo(f"Saving to {output}")
37 |
38 | df.to_csv(output, index=False)
39 |
40 | typer.echo("Done !")
41 |
42 |
43 | if __name__ == "__main__":
44 | typer.run(run)
45 |
--------------------------------------------------------------------------------
/scripts/conjugate_verbs.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from pathlib import Path
3 |
4 | import context # noqa
5 | import typer
6 |
7 | from edsnlp.conjugator import conjugate
8 | from edsnlp.pipelines.qualifiers.hypothesis.patterns import verbs_eds, verbs_hyp
9 | from edsnlp.pipelines.qualifiers.negation.patterns import verbs as neg_verbs
10 | from edsnlp.pipelines.qualifiers.reported_speech.patterns import verbs as rspeech_verbs
11 |
12 | warnings.filterwarnings("ignore")
13 |
14 |
15 | def conjugate_verbs(
16 | output_path: Path = typer.Argument(
17 | "edsnlp/resources/verbs.csv.gz", help="Path to the output CSV table."
18 | )
19 | ) -> None:
20 | """
21 | Convenience script to automatically conjugate a set of verbs,
22 | using mlconjug3 library.
23 | """
24 |
25 | all_verbs = set(neg_verbs + rspeech_verbs + verbs_eds + verbs_hyp)
26 |
27 | typer.echo(f"Conjugating {len(all_verbs)} verbs...")
28 |
29 | df = conjugate(list(all_verbs))
30 |
31 | typer.echo(f"Saving to {output_path}")
32 |
33 | output_path.parent.mkdir(exist_ok=True, parents=True)
34 | df.to_csv(output_path, index=False)
35 |
36 | typer.echo("Done !")
37 |
38 |
39 | if __name__ == "__main__":
40 | typer.run(conjugate_verbs)
41 |
--------------------------------------------------------------------------------
/scripts/context.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from pathlib import Path
3 |
4 | sys.path.insert(0, str(Path(__file__).parent.parent))
5 |
--------------------------------------------------------------------------------
/tests/connectors/test_labeltool.py:
--------------------------------------------------------------------------------
1 | from edsnlp.connectors.labeltool import docs2labeltool
2 |
3 | texts = [
4 | "Le patient est malade",
5 | "Le patient n'est pas malade",
6 | "Le patient est peut-être malade",
7 | "Le patient dit qu'il est malade",
8 | ]
9 |
10 |
11 | def test_docs2labeltool(nlp):
12 |
13 | modifiers = ["negated", "hypothesis", "reported_speech"]
14 |
15 | docs = list(nlp.pipe(texts))
16 |
17 | df = docs2labeltool(docs, extensions=modifiers)
18 | assert len(df)
19 |
20 | df = docs2labeltool(docs)
21 | assert len(df)
22 |
--------------------------------------------------------------------------------
/tests/data/test_conll.py:
--------------------------------------------------------------------------------
1 | from itertools import islice
2 | from pathlib import Path
3 |
4 | import pytest
5 | from typing_extensions import Literal
6 |
7 | import edsnlp
8 |
9 |
10 | @pytest.mark.parametrize("num_cpu_workers", [0, 2])
11 | @pytest.mark.parametrize("shuffle", ["dataset"])
12 | def test_read_shuffle_loop(
13 | num_cpu_workers: int,
14 | shuffle: Literal["dataset", "fragment"],
15 | ):
16 | input_file = (
17 | Path(__file__).parent.parent.resolve() / "training" / "rhapsodie_sample.conllu"
18 | )
19 | notes = edsnlp.data.read_conll(
20 | input_file,
21 | shuffle=shuffle,
22 | seed=42,
23 | loop=True,
24 | ).set_processing(num_cpu_workers=num_cpu_workers)
25 | notes = list(islice(notes, 6))
26 | assert len(notes) == 6
27 | # 32 ce ce PRON _ Gender=Masc|Number=Sing|Person=3|PronType=Dem 30 obl:arg _ _ # noqa: E501
28 | word_attrs = {
29 | "text": "ce",
30 | "lemma_": "ce",
31 | "pos_": "PRON",
32 | "dep_": "obl:arg",
33 | "morph": "Gender=Masc|Number=Sing|Person=3|PronType=Dem",
34 | "head": "profité",
35 | }
36 | word = notes[0][31]
37 | for attr, val in word_attrs.items():
38 | assert str(getattr(word, attr)) == val
39 |
--------------------------------------------------------------------------------
/tests/data/test_spark.py:
--------------------------------------------------------------------------------
1 | import edsnlp
2 |
3 |
4 | def test_read_write(blank_nlp, text, df_notes_pyspark):
5 | # line below is just to mix params to avoid running too many tests
6 | shuffle = "dataset" if blank_nlp.lang == "eds" else False
7 |
8 | reader = edsnlp.data.from_spark(
9 | df_notes_pyspark,
10 | converter="omop",
11 | nlp=blank_nlp,
12 | shuffle=shuffle,
13 | ).set_processing(backend="simple")
14 | doc = list(reader)[0]
15 | assert doc.text == text
16 |
17 | blank_nlp.add_pipe("eds.matcher", config={"terms": {"douleur": ["douleurs"]}})
18 | blank_nlp.add_pipe("eds.negation")
19 | docs = blank_nlp.pipe(reader)
20 |
21 | writer = edsnlp.data.to_spark(
22 | docs,
23 | converter="omop",
24 | span_attributes=["negation"],
25 | span_getter=["ents"],
26 | )
27 | res = writer.toPandas().to_dict(orient="records")
28 | assert len(res) == 20
29 | assert sum(len(r["entities"]) for r in res) == 20
30 |
--------------------------------------------------------------------------------
/tests/helpers.py:
--------------------------------------------------------------------------------
1 | import spacy
2 |
3 | import edsnlp
4 |
5 |
6 | def make_nlp(lang):
7 | if lang == "eds":
8 | model = spacy.blank("eds")
9 | else:
10 | model = edsnlp.blank("fr")
11 |
12 | model.add_pipe("eds.normalizer")
13 |
14 | model.add_pipe("eds.sentences")
15 | model.add_pipe("eds.sections")
16 |
17 | model.add_pipe(
18 | "eds.matcher",
19 | config=dict(
20 | terms=dict(patient="patient"),
21 | attr="NORM",
22 | ignore_excluded=True,
23 | ),
24 | )
25 | model.add_pipe(
26 | "eds.matcher",
27 | name="matcher2",
28 | config=dict(
29 | regex=dict(anomalie=r"anomalie"),
30 | ),
31 | )
32 |
33 | model.add_pipe("eds.hypothesis")
34 | model.add_pipe("eds.negation")
35 | model.add_pipe("eds.family")
36 | model.add_pipe("eds.history")
37 | model.add_pipe("eds.reported_speech")
38 |
39 | model.add_pipe("eds.dates")
40 | model.add_pipe("eds.quantities")
41 |
42 | return model
43 |
--------------------------------------------------------------------------------
/tests/pipelines/core/test_terminology.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from edsnlp.core import PipelineProtocol
4 | from edsnlp.utils.examples import parse_example
5 |
6 | example = "1g de doliprane"
7 |
8 |
9 | @pytest.mark.parametrize("term_matcher", ["exact", "simstring"])
10 | def test_terminology(blank_nlp: PipelineProtocol, term_matcher: str):
11 | blank_nlp.add_pipe(
12 | "eds.terminology",
13 | config=dict(
14 | label="drugs",
15 | terms=dict(paracetamol=["doliprane", "tylenol", "paracetamol"]),
16 | attr="NORM",
17 | term_matcher=term_matcher,
18 | ),
19 | )
20 |
21 | text, entities = parse_example(example)
22 |
23 | doc = blank_nlp(text)
24 |
25 | assert len(entities) == len(doc.ents)
26 |
27 | for ent, entity in zip(doc.ents, entities):
28 | assert ent.text == text[entity.start_char : entity.end_char]
29 | assert ent.kb_id_ == entity.modifiers[0].value
30 |
--------------------------------------------------------------------------------
/tests/pipelines/misc/test_consultation_date_town.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/tests/pipelines/misc/test_consultation_date_town.py
--------------------------------------------------------------------------------
/tests/pipelines/misc/test_reason.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | from pytest import mark
3 |
4 | text = """COMPTE RENDU D'HOSPITALISATION du 11/07/2018 au 12/07/2018
5 | MOTIF D'HOSPITALISATION
6 | Monsieur Dupont Jean Michel, de sexe masculin, âgée de 39 ans, née le 23/11/1978,
7 | a été hospitalisé du 11/08/2019 au 17/08/2019 pour une quinte de toux.
8 |
9 | ANTÉCÉDENTS
10 | Antécédents médicaux :
11 | Premier épisode: il a été hospitalisé pour asthme en mai 2018."""
12 |
13 |
14 | @mark.parametrize("use_sections", [True, False])
15 | def test_reason(lang, use_sections):
16 | nlp = spacy.blank(lang)
17 | # Extraction d'entités nommées
18 | nlp.add_pipe(
19 | "eds.matcher",
20 | config=dict(
21 | terms=dict(
22 | respiratoire=[
23 | "asthmatique",
24 | "asthme",
25 | "toux",
26 | ]
27 | )
28 | ),
29 | )
30 | nlp.add_pipe("eds.normalizer")
31 | nlp.add_pipe("eds.reason", config=dict(use_sections=use_sections))
32 | nlp.remove_pipe("eds.reason")
33 | nlp.add_pipe("eds.sections")
34 | nlp.add_pipe("eds.reason", config=dict(use_sections=use_sections))
35 |
36 | doc = nlp(text)
37 | reason = doc.spans["reasons"][0]
38 | entities = reason._.ents_reason
39 |
40 | assert entities[0].label_ == "respiratoire"
41 | assert reason._.is_reason
42 | assert doc.ents[1]._.is_reason is not use_sections
43 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/AIDS.py:
--------------------------------------------------------------------------------
1 | results_aids = dict(
2 | has_match=[
3 | True,
4 | False,
5 | True,
6 | True,
7 | ],
8 | detailled_status=[
9 | None,
10 | None,
11 | None,
12 | None,
13 | ],
14 | assign=None,
15 | texts=[
16 | "Patient atteint du VIH au stade SIDA.",
17 | "Patient atteint du VIH.",
18 | "Il y a un VIH avec coinfection pneumocystose",
19 | "Présence d'un VIH stade C",
20 | ],
21 | )
22 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/CKD.py:
--------------------------------------------------------------------------------
1 | results_ckd = dict(
2 | has_match=[
3 | True,
4 | False,
5 | True,
6 | True,
7 | False,
8 | True,
9 | False,
10 | True,
11 | True,
12 | True,
13 | False,
14 | ],
15 | detailled_status=None,
16 | assign=8 * [None] + [{"stage": "IV"}, {"dfg": 30}, None],
17 | texts=[
18 | "Patient atteint d'une glomérulopathie.",
19 | "Patient atteint d'une tubulopathie aigüe.",
20 | "Patient transplanté rénal",
21 | "Présence d'une insuffisance rénale aigüe sur chronique",
22 | "Le patient a été dialysé", # ponctuelle
23 | "Le patient est dialysé chaque lundi", # chronique
24 | "Présence d'une IRC", # severity non mentionned
25 | "Présence d'une IRC sévère",
26 | "Présence d'une IRC de classe IV",
27 | "Présence d'une IRC avec DFG à 30", # severe
28 | "Présence d'une maladie rénale avec DFG à 110", # no renal failure
29 | ],
30 | )
31 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/COPD.py:
--------------------------------------------------------------------------------
1 | results_copd = dict(
2 | has_match=[
3 | True,
4 | True,
5 | True,
6 | False,
7 | False,
8 | True,
9 | ],
10 | detailled_status=None,
11 | assign=None,
12 | texts=[
13 | "Une fibrose interstitielle diffuse idiopathique",
14 | "Patient atteint de pneumoconiose",
15 | "Présence d'une HTAP.",
16 | "On voit une hypertension pulmonaire minime",
17 | "La patiente a été mis sous oxygénorequérance", # Ponctual: not extracted
18 | "La patiente est sous oxygénorequérance au long cours",
19 | ],
20 | )
21 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/alcohol.py:
--------------------------------------------------------------------------------
1 | results_alcohol = dict(
2 | has_match=[
3 | True,
4 | True,
5 | False,
6 | False,
7 | True,
8 | True,
9 | True,
10 | True,
11 | True,
12 | ],
13 | detailled_status=[
14 | None,
15 | None,
16 | None,
17 | None,
18 | "ABSTINENCE",
19 | None,
20 | None,
21 | "ABSTINENCE",
22 | None,
23 | ],
24 | negation=[
25 | None,
26 | None,
27 | None,
28 | None,
29 | None,
30 | None,
31 | True,
32 | None,
33 | True,
34 | ],
35 | assign=None,
36 | texts=[
37 | "Patient alcoolique.",
38 | "OH chronique.",
39 | "Prise d'alcool occasionnelle",
40 | "Application d'un pansement alcoolisé",
41 | "Alcoolisme sevré",
42 | "Alcoolisme non sevré",
43 | "Alcool: 0",
44 | "Le patient est en cours de sevrage éthylotabagique",
45 | "Patient alcoolique: non.",
46 | ],
47 | )
48 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/cerebrovascular_accident.py:
--------------------------------------------------------------------------------
1 | results_cerebrovascular_accident = dict(
2 | has_match=[
3 | False,
4 | True,
5 | True,
6 | False,
7 | True,
8 | True,
9 | True,
10 | ],
11 | detailled_status=None,
12 | assign=None,
13 | texts=[
14 | "Patient hospitalisé à AVC.",
15 | "Hospitalisation pour un AVC.",
16 | "Saignement intracranien",
17 | "Thrombose périphérique",
18 | "Thrombose sylvienne",
19 | "Infarctus cérébral",
20 | "Soigné via un thrombolyse",
21 | ],
22 | )
23 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/congestive_heart_failure.py:
--------------------------------------------------------------------------------
1 | results_congestive_heart_failure = dict(
2 | has_match=[
3 | True,
4 | True,
5 | False,
6 | True,
7 | False,
8 | ],
9 | detailled_status=None,
10 | assign=None,
11 | texts=[
12 | "Présence d'un oedème pulmonaire",
13 | "Le patient est équipé d'un pace-maker",
14 | "Un cardiopathie non décompensée", # no decompensation
15 | "Insuffisance cardiaque",
16 | "Insuffisance cardiaque minime", # minimal severity
17 | ],
18 | )
19 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/connective_tissue_disease.py:
--------------------------------------------------------------------------------
1 | results_connective_tissue_disease = dict(
2 | has_match=[
3 | True,
4 | True,
5 | False,
6 | True,
7 | True,
8 | ],
9 | detailled_status=None,
10 | assign=None,
11 | texts=[
12 | "Présence d'une sclérodermie.",
13 | "Patient atteint d'un lupus.",
14 | "Présence d'anticoagulants lupiques,",
15 | "Il y a une MICI.",
16 | "Syndrome de Raynaud",
17 | ],
18 | )
19 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/dementia.py:
--------------------------------------------------------------------------------
1 | results_dementia = dict(
2 | has_match=[
3 | True,
4 | True,
5 | False,
6 | True,
7 | ],
8 | detailled_status=None,
9 | assign=None,
10 | texts=[
11 | "D'importants déficits cognitifs",
12 | "Patient atteint de démence",
13 | "On retrouve des anti-SLA", # antibody
14 | "Une maladie de Charcot",
15 | ],
16 | )
17 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/diabetes.py:
--------------------------------------------------------------------------------
1 | results_diabetes = dict(
2 | has_match=[
3 | True,
4 | True,
5 | True,
6 | False,
7 | True,
8 | True,
9 | True,
10 | ],
11 | detailled_status=[
12 | "WITHOUT_COMPLICATION",
13 | "WITHOUT_COMPLICATION",
14 | "WITHOUT_COMPLICATION",
15 | None,
16 | "WITH_COMPLICATION",
17 | "WITH_COMPLICATION",
18 | "WITH_COMPLICATION",
19 | ],
20 | assign=None,
21 | texts=[
22 | "Présence d'un DT2",
23 | "Présence d'un DNID",
24 | "Patient diabétique",
25 | "Un diabète insipide",
26 | "Atteinte neurologique d'origine diabétique",
27 | "Une rétinopathie diabétique",
28 | "Il y a un mal perforant plantaire",
29 | ],
30 | )
31 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/hemiplegia.py:
--------------------------------------------------------------------------------
1 | results_hemiplegia = dict(
2 | has_match=[
3 | True,
4 | True,
5 | True,
6 | ],
7 | detailled_status=None,
8 | assign=None,
9 | texts=[
10 | "Patient hémiplégique",
11 | "Paralysie des membres inférieurs",
12 | "Patient en LIS", # locked-in syndrom
13 | ],
14 | )
15 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/leukemia.py:
--------------------------------------------------------------------------------
1 | results_leukemia = dict(
2 | has_match=[
3 | True,
4 | False,
5 | True,
6 | True,
7 | ],
8 | detailled_status=None,
9 | assign=None,
10 | texts=[
11 | "Sydrome myéloprolifératif",
12 | "Sydrome myéloprolifératif bénin",
13 | "Patient atteint d'une LAM",
14 | "Une maladie de Vaquez",
15 | ],
16 | )
17 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/liver_disease.py:
--------------------------------------------------------------------------------
1 | results_liver_disease = dict(
2 | has_match=4 * [True],
3 | detailled_status=[
4 | "MILD",
5 | "MILD",
6 | "MODERATE_TO_SEVERE",
7 | "MODERATE_TO_SEVERE",
8 | ],
9 | assign=None,
10 | texts=[
11 | "Il y a une fibrose hépatique",
12 | "Une hépatite B chronique",
13 | "Le patient consulte pour une cirrhose",
14 | "Greffe hépatique.",
15 | ],
16 | )
17 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/lymphoma.py:
--------------------------------------------------------------------------------
1 | results_lymphoma = dict(
2 | has_match=[
3 | True,
4 | True,
5 | True,
6 | False,
7 | ],
8 | detailled_status=None,
9 | assign=None,
10 | texts=[
11 | "Un lymphome de Hodgkin.",
12 | "Atteint d'un Waldenstörm",
13 | "Un LAGC",
14 | "anti LAGC: 10^4/mL", # Dosage
15 | ],
16 | )
17 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/myocardial_infarction.py:
--------------------------------------------------------------------------------
1 | results_myocardial_infarction = dict(
2 | has_match=[
3 | True,
4 | False,
5 | True,
6 | False,
7 | True,
8 | ],
9 | detailled_status=None,
10 | assign=None,
11 | texts=[
12 | "Une cardiopathie ischémique",
13 | "Une cardiopathie non-ischémique",
14 | "Présence d'un stent sur la marginale",
15 | "Présence d'un stent périphérique",
16 | "infarctus du myocarde",
17 | ],
18 | )
19 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/peptic_ulcer_disease.py:
--------------------------------------------------------------------------------
1 | results_peptic_ulcer_disease = dict(
2 | has_match=[
3 | True,
4 | True,
5 | False,
6 | True,
7 | ],
8 | detailled_status=None,
9 | assign=None,
10 | texts=[
11 | "Beaucoup d'ulcères gastriques",
12 | "Présence d'UGD",
13 | "La patient à des ulcères",
14 | "Au niveau gastrique: " + 5 * "blabla " + "quelques ulcères",
15 | ],
16 | )
17 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/peripheral_vascular_disease.py:
--------------------------------------------------------------------------------
1 | results_peripheral_vascular_disease = dict(
2 | has_match=[
3 | True,
4 | True,
5 | False,
6 | True,
7 | False,
8 | False,
9 | True,
10 | False,
11 | True,
12 | True,
13 | False,
14 | True,
15 | False,
16 | ],
17 | detailled_status=None,
18 | assign=None,
19 | texts=[
20 | "Un AOMI",
21 | "Présence d'un infarctus rénal",
22 | "Une angiopathie cérébrale",
23 | "Une angiopathie",
24 | "Une thrombose cérébrale",
25 | "Une thrombose des veines superficielles",
26 | "Une thrombose",
27 | "Effectuer un bilan pre-trombose",
28 | "Une ischémie des MI est remarquée.",
29 | "Plusieurs cas d'EP",
30 | "Effectuer des cures d'EP",
31 | "Le patient est hypertendu", # Echange plasmatique
32 | "Une hypertension portale",
33 | ],
34 | )
35 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/solid_tumor.py:
--------------------------------------------------------------------------------
1 | results_solid_tumor = dict(
2 | has_match=[True, True, False, True, True, True, True, True, True],
3 | detailled_status=[
4 | "LOCALIZED",
5 | "LOCALIZED",
6 | None,
7 | "METASTASIS",
8 | "METASTASIS",
9 | "LOCALIZED",
10 | "METASTASIS",
11 | "METASTASIS",
12 | "METASTASIS",
13 | "METASTASIS",
14 | ],
15 | assign=None,
16 | texts=[
17 | "Présence d'un carcinome intra-hépatique.",
18 | "Patient avec un K sein.",
19 | "Il y a une tumeur bénigne",
20 | "Tumeur métastasée",
21 | "Cancer du poumon au stade 4",
22 | "Cancer du poumon au stade 2",
23 | "Présence de nombreuses lésions secondaires",
24 | "Patient avec fracture abcddd secondaire. Cancer de",
25 | "Patient avec lesions non ciblées",
26 | "TNM: pTx N1 M1",
27 | ],
28 | )
29 |
30 | solid_tumor_config = dict(use_patterns_metastasis_ct_scan=True, use_tnm=True)
31 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/disorders/tobacco.py:
--------------------------------------------------------------------------------
1 | results_tobacco = dict(
2 | has_match=[
3 | True,
4 | True,
5 | False,
6 | True,
7 | True,
8 | True,
9 | True,
10 | True,
11 | True,
12 | ],
13 | detailled_status=[
14 | None,
15 | None,
16 | None,
17 | "ABSTINENCE",
18 | None,
19 | None,
20 | "ABSTINENCE",
21 | None,
22 | None,
23 | ],
24 | negation=[
25 | None,
26 | None,
27 | None,
28 | None,
29 | True,
30 | True,
31 | None,
32 | True,
33 | True,
34 | ],
35 | assign=[{"PA": 15}] + 8 * [None],
36 | texts=[
37 | "Tabagisme évalué à 15 PA",
38 | "Patient tabagique",
39 | "Tabagisme festif",
40 | "On a un tabagisme ancien",
41 | "Tabac: 0",
42 | "Tabagisme passif",
43 | "Tabac: sevré depuis 5 ans",
44 | "Le patient ne fume aucun truc.",
45 | "Le patient fume 0 PA.",
46 | ],
47 | )
48 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/test_cim10.py:
--------------------------------------------------------------------------------
1 | from edsnlp.core import PipelineProtocol
2 | from edsnlp.utils.examples import parse_example
3 |
4 | examples = [
5 | "Patient admis pour fièvres typhoïde et paratyphoïde",
6 | "Patient admis pour C2.21",
7 | ]
8 |
9 |
10 | def test_cim10(blank_nlp: PipelineProtocol):
11 | blank_nlp.add_pipe("eds.cim10")
12 |
13 | for text, entities in map(parse_example, examples):
14 | doc = blank_nlp(text)
15 |
16 | assert len(doc.ents) == len(entities)
17 |
18 | for ent, entity in zip(doc.ents, entities):
19 | assert ent.text == text[entity.start_char : entity.end_char]
20 | assert ent.kb_id_ == entity.modifiers[0].value
21 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/test_covid.py:
--------------------------------------------------------------------------------
1 | def test_covid(blank_nlp):
2 | examples = [
3 | ("Patient admis pour coronavirus", "coronavirus"),
4 | ("Patient admis pour pneumopathie à coronavirus", "pneumopathie à coronavirus"),
5 | ]
6 |
7 | blank_nlp.add_pipe("eds.covid")
8 |
9 | for example, text in examples:
10 | doc = blank_nlp(example)
11 |
12 | covid = doc.ents[0]
13 | assert covid.text == text
14 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/test_drugs.py:
--------------------------------------------------------------------------------
1 | def test_drugs(blank_nlp):
2 | blank_nlp.add_pipe("eds.normalizer")
3 | blank_nlp.add_pipe("eds.drugs")
4 |
5 | text = "Traitement habituel: Kardégic, cardensiel (bisoprolol), glucophage, lasilix"
6 | doc = blank_nlp(text)
7 | drugs_expected = [
8 | ("Kardégic", "B01AC06"),
9 | ("cardensiel", "C07AB07"),
10 | ("bisoprolol", "C07AB07"),
11 | ("glucophage", "A10BA02"),
12 | ("lasilix", "C03CA01"),
13 | ]
14 | drugs_detected = [(x.text, x.kb_id_) for x in doc.ents]
15 | assert drugs_detected == drugs_expected
16 |
--------------------------------------------------------------------------------
/tests/pipelines/ner/test_tnm.py:
--------------------------------------------------------------------------------
1 | from edsnlp.utils.examples import parse_example
2 |
3 | examples = [
4 | "TNM: aTxN1M0",
5 | "TNM: p Tx N1M 0",
6 | "TNM: p Tx N1M 0 (UICC 20)",
7 | "TNM: aTxN1M0 (UICC 68)",
8 | "TNM: aTxN1 R2",
9 | "TNM: pT2c N0 R0 (TNM 2010)",
10 | "TNM: aTx / N1 / M0",
11 | "TNM: pT2 N1mi",
12 | "TNM: pT1(m)N1 M0",
13 | "TNM: pT1bN0(sn)",
14 | "TNM: pT1 pN1 M0\n \n ",
15 | "TNM: aTxN1M0 ",
16 | "TNM: cT3N0M0 \n \n",
17 | "TNM: PT",
18 | "TNM: p T \n",
19 | "TNM: a T \n",
20 | "TNM: pT \n \n0",
21 | ]
22 |
23 |
24 | def test_scores(blank_nlp):
25 | blank_nlp.add_pipe("eds.tnm")
26 |
27 | for example in examples:
28 | text, entities = parse_example(example=example)
29 |
30 | doc = blank_nlp(text)
31 |
32 | assert len(entities) == len(doc.ents)
33 |
34 | for entity, ent in zip(entities, doc.ents):
35 | norm = entity.modifiers[0].value
36 | assert ent.text == text[entity.start_char : entity.end_char]
37 | assert norm == ent._.value.norm()
38 |
--------------------------------------------------------------------------------
/tests/pipelines/qualifiers/conftest.py:
--------------------------------------------------------------------------------
1 | from pytest import fixture
2 |
3 |
4 | @fixture(params=[True, False])
5 | def blank_nlp(blank_nlp, request, lang):
6 | if request.param:
7 | blank_nlp.add_pipe("normalizer")
8 | return blank_nlp
9 |
--------------------------------------------------------------------------------
/tests/pipelines/test_pipelines.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import edsnlp
4 |
5 |
6 | def test_pipelines(doc):
7 | assert len(doc.ents) == 3
8 | patient, _, anomalie = doc.ents
9 |
10 | assert not patient._.negation
11 | assert anomalie._.negation
12 |
13 | assert not doc[0]._.history
14 |
15 |
16 | def test_import_all():
17 | import edsnlp.pipes
18 |
19 | for name in dir(edsnlp.pipes):
20 | if not name.startswith("_") and "endlines" not in name:
21 | try:
22 | getattr(edsnlp.pipes, name)
23 | except (ImportError, AttributeError) as e:
24 | if "torch" in str(e):
25 | pass
26 |
27 |
28 | def test_non_existing_pipe():
29 | with pytest.raises(AttributeError) as e:
30 | getattr(edsnlp.pipes, "non_existing_pipe")
31 |
32 | assert str(e.value) == "module edsnlp.pipes has no attribute non_existing_pipe"
33 |
--------------------------------------------------------------------------------
/tests/readme.md:
--------------------------------------------------------------------------------
1 | # Testing the algorithm
2 |
3 | Various tests for the components of the spaCy pipeline.
4 |
5 | We decided to design tests entity-wise, meaning that we only check the validity
6 | of the computed modality on a set of entities. This design choice is motivated by
7 | the fact that :
8 |
9 | 1. That's what we actually care about. We want our pipeline to detect negation,
10 | family context, patient history and hypothesis relative to a given entity.
11 |
12 | 2. Deciding on the span of an annotation (negated, hypothesis, etc) is tricky.
13 | Consider the example : `"Le patient n'est pas malade."`. Should the negated span
14 | correspond to `["est", "malade"]`, `["malade"]`, `["n'", "est", "pas", "malade", "."]` ?
15 |
16 | 3. Depending on the design of the algorithm, the span might be off, even though it
17 | can correctly assign polarity to a given entity (but considered that the punctuation
18 | was negated as well).
19 | By relaxing the need to infer the correct span, we avoid giving an unfair disadvantage
20 | to an otherwise great algorithm.
21 |
--------------------------------------------------------------------------------
/tests/resources/brat_data/subfolder/doc-1.ann:
--------------------------------------------------------------------------------
1 | R1 lieu Arg1:T8 Arg2:T9
2 | T1 sosy 30 38 douleurs
3 | T2 localisation 39 57 dans le bras droit
4 | T3 anatomie 47 57 bras droit
5 | T4 pathologie 75 83;85 98 problème de locomotion
6 | A1 assertion T4 absent
7 | A9 bool flag 0 T4
8 | T5 pathologie 114 117 AVC
9 | A2 etat T5 passé
10 | A3 assertion T5 non-associé
11 | T6 pathologie 159 164 rhume
12 | A4 etat T6 présent
13 | A5 assertion T6 hypothétique
14 | T7 pathologie 291 296 rhume
15 | A6 etat T7 présent
16 | A7 assertion T7 hypothétique
17 | T8 sosy 306 314 Douleurs
18 | T9 localisation 315 333 dans le bras droit
19 | T10 anatomie 323 333 bras droit
20 | T11 sosy 378 386 anomalie
21 | #1 AnnotatorNotes T7 Repetition
22 | R2 lieu Arg1:T1 Arg2:T2
23 | A8 assertion T11 absent
24 | E1 MyArg1:T3 MyArg2:T1
25 | E2 MyArg1:T1 MyArg2:E1
26 | T12 test label 0 378 386 anomalie
27 | #1 AnnotatorNotes T1 C0030193
28 |
--------------------------------------------------------------------------------
/tests/resources/brat_data/subfolder/doc-1.txt:
--------------------------------------------------------------------------------
1 | Le patient est admis pour des douleurs dans le bras droit, mais n'a pas de problème
2 | de locomotion.
3 | Historique d'AVC dans la famille. pourrait être un cas de rhume.
4 | NBNbWbWbNbWbNBNbNbWbWbNBNbWbNbNbWbNBNbWbNbNBWbWbNbNbNBWbNbWbNbWBNbNbWbNbNBNbWbWbNbWBNbNbWbNBNbWbWbNb
5 | Pourrait être un cas de rhume.
6 | Motif :
7 | Douleurs dans le bras droit.
8 | ANTÉCÉDENTS
9 | Le patient est déjà venu
10 | Pas d'anomalie détectée.
11 |
--------------------------------------------------------------------------------
/tests/resources/brat_data/subfolder/doc-2.txt:
--------------------------------------------------------------------------------
1 | Small text
2 |
--------------------------------------------------------------------------------
/tests/resources/brat_data/subfolder/doc-3.txt:
--------------------------------------------------------------------------------
1 | Another small text
2 |
--------------------------------------------------------------------------------
/tests/resources/docs.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/tests/resources/docs.parquet
--------------------------------------------------------------------------------
/tests/test_entrypoints.py:
--------------------------------------------------------------------------------
1 | import catalogue
2 | import pytest
3 |
4 | try:
5 | from importlib.metadata import entry_points
6 | except ImportError:
7 | from importlib_metadata import entry_points
8 |
9 | try:
10 | import torch.nn
11 | except ImportError:
12 | torch = None
13 |
14 | if torch is None:
15 | pytest.skip("torch not installed", allow_module_level=True)
16 |
17 |
18 | def test_entrypoints():
19 | ep = entry_points()
20 | namespaces = ep.groups if hasattr(ep, "groups") else ep.keys()
21 | for ns in namespaces:
22 | if ns.startswith("spacy_") or ns.startswith("edsnlp_"):
23 | reg = catalogue.Registry(ns.split("_"), entry_points=True)
24 | reg.get_all()
25 |
--------------------------------------------------------------------------------
/tests/test_span_args.py:
--------------------------------------------------------------------------------
1 | from confit import validate_arguments
2 |
3 | from edsnlp.pipes.base import (
4 | SpanGetterArg,
5 | SpanSetterArg,
6 | validate_span_getter,
7 | validate_span_setter,
8 | )
9 |
10 |
11 | def test_span_getter():
12 | assert validate_span_getter("ents") == {"ents": True}
13 | assert validate_span_getter(["ents"]) == {"ents": True}
14 | assert validate_span_getter(["ents", "group"]) == {"ents": True, "group": True}
15 | assert validate_span_getter({"grp": True}) == {"grp": True}
16 | assert validate_span_getter({"grp": ["a", "b", "c"]}) == {"grp": ["a", "b", "c"]}
17 |
18 |
19 | def test_span_setter():
20 | assert validate_span_setter("ents") == {"ents": True}
21 | assert validate_span_setter(["ents"]) == {"ents": True}
22 | assert validate_span_setter(["ents", "group"]) == {"ents": True, "group": True}
23 | assert validate_span_setter({"grp": True}) == {"grp": True}
24 | assert validate_span_setter({"grp": ["a", "b", "c"]}) == {"grp": ["a", "b", "c"]}
25 |
26 |
27 | def test_validate_args():
28 | @validate_arguments
29 | def my_func(span_getter: SpanGetterArg, span_setter: SpanSetterArg):
30 | return span_getter, span_setter
31 |
32 | assert my_func("ents", "ents") == ({"ents": True}, {"ents": True})
33 |
--------------------------------------------------------------------------------
/tests/training/dataset.jsonl:
--------------------------------------------------------------------------------
1 | {"note_id": "1", "note_text": "Pas de cancer chez le patient ou sa famille.\nOn trouve un nodule superieur centimétrique droit évocateur de fibroanédome.", "entities": [{"start": 7, "end": 13, "label": "sosy", "negation": true}, {"start": 58, "end": 64, "label": "sosy", "negation": false}, {"start": 75, "end": 88, "label": "measure", "unit": "cm"}, {"start": 108, "end": 120, "label": "sosy", "negation": false}]}
2 | {"note_id": "2", "note_text": "La patiente a un gros rhume, sans fièvre ou douleur thoracique. Elle fait 30 kg.", "entities": [{"start": 22, "end": 27, "label": "sosy", "negation": false}, {"start": 34, "end": 40, "label": "sosy", "negation": true}, {"start": 44, "end": 62, "label": "sosy", "negation": true}, {"start": 74, "end": 79, "label": "measure", "unit": "kg"}]}
3 |
--------------------------------------------------------------------------------
/tests/training/dataset/annotation.conf:
--------------------------------------------------------------------------------
1 | [entities]
2 |
3 | sosy
4 | measure
5 |
6 | [attributes]
7 |
8 | negation Arg:sosy
9 | unit Arg:measure, Value:cm|kg
10 |
11 | [relations]
12 |
13 | [events]
14 |
--------------------------------------------------------------------------------
/tests/training/dataset/sample-1.ann:
--------------------------------------------------------------------------------
1 | T1 sosy 7 13 cancer
2 | A1 negation T1
3 | T2 sosy 58 64 nodule
4 | T3 measure 75 88 centimétrique
5 | T4 sosy 108 120 fibroanédome
6 | A2 unit T3 cm
7 |
--------------------------------------------------------------------------------
/tests/training/dataset/sample-1.txt:
--------------------------------------------------------------------------------
1 | Pas de cancer chez le patient ou sa famille.
2 | On trouve un nodule superieur centimétrique droit évocateur de fibroanédome.
3 |
--------------------------------------------------------------------------------
/tests/training/dataset/sample-2.ann:
--------------------------------------------------------------------------------
1 | T1 sosy 22 27 rhume
2 | T2 sosy 34 40 fièvre
3 | T3 sosy 44 62 douleur thoracique
4 | A1 negation T2
5 | A2 negation T3
6 | T4 measure 74 79 30 kg
7 | A3 unit T4 kg
8 |
--------------------------------------------------------------------------------
/tests/training/dataset/sample-2.txt:
--------------------------------------------------------------------------------
1 | La patiente a un gros rhume, sans fièvre ou douleur thoracique. Elle fait 30 kg.
2 |
--------------------------------------------------------------------------------
/tests/training/dep_parser_config.yml:
--------------------------------------------------------------------------------
1 | # 🤖 PIPELINE DEFINITION
2 | nlp:
3 | "@core": pipeline
4 |
5 | lang: fr
6 |
7 | components:
8 | parser:
9 | '@factory': eds.biaffine_dep_parser
10 | hidden_size: 64
11 | decoding_mode: greedy
12 | dropout_p: 0.
13 | use_attrs: ['pos_']
14 |
15 | embedding:
16 | '@factory': eds.transformer
17 | model: hf-internal-testing/tiny-bert
18 | window: 512
19 | stride: 256
20 |
21 | # 📈 SCORERS
22 | scorer:
23 | speed: false
24 | dep:
25 | '@metrics': "eds.dep_parsing"
26 |
27 | # 🎛️ OPTIMIZER
28 | optimizer:
29 | optim: adamw
30 | module: ${ nlp }
31 | total_steps: ${ train.max_steps }
32 | groups:
33 | ".*":
34 | lr: 1e-3
35 |
36 | # 📚 DATA
37 | train_data:
38 | data:
39 | "@readers": conll
40 | path: ./rhapsodie_sample.conllu
41 | shuffle: dataset
42 | batch_size: 1 docs
43 | pipe_names: [ "parser" ]
44 |
45 | val_data:
46 | "@readers": conll
47 | path: ./rhapsodie_sample.conllu
48 |
49 | # 🚀 TRAIN SCRIPT OPTIONS
50 | train:
51 | nlp: ${ nlp }
52 | train_data: ${ train_data }
53 | val_data: ${ val_data }
54 | max_steps: 20
55 | validation_interval: 10
56 | max_grad_norm: 5.0
57 | scorer: ${ scorer }
58 | num_workers: 0
59 | optimizer: ${ optimizer }
60 | grad_dev_policy: "clip_mean"
61 | log_weight_grads: true
62 |
--------------------------------------------------------------------------------
/tests/tuning/config.cfg:
--------------------------------------------------------------------------------
1 | # My usefull comment
2 | [train]
3 | param1 = 1
4 |
--------------------------------------------------------------------------------
/tests/tuning/test_checkpoints/single_phase_gpu_hour/study_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/tests/tuning/test_checkpoints/single_phase_gpu_hour/study_.pkl
--------------------------------------------------------------------------------
/tests/tuning/test_checkpoints/single_phase_n_trials/study_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/tests/tuning/test_checkpoints/single_phase_n_trials/study_.pkl
--------------------------------------------------------------------------------
/tests/tuning/test_checkpoints/two_phase_gpu_hour/results_summary.txt:
--------------------------------------------------------------------------------
1 | Study Summary
2 | ==================
3 | Best trial: 2
4 |
5 | Value: 0.7674011016524788
6 |
7 | Params:
8 | start_value: 0.00017235427021406453
9 | warmup_rate: 0.1
10 |
11 | Importances:
12 | start_value: 0.7
13 | warmup_rate: 0.3
14 |
--------------------------------------------------------------------------------
/tests/tuning/test_checkpoints/two_phase_gpu_hour/study_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/tests/tuning/test_checkpoints/two_phase_gpu_hour/study_.pkl
--------------------------------------------------------------------------------
/tests/tuning/test_checkpoints/two_phase_n_trials/results_summary.txt:
--------------------------------------------------------------------------------
1 | Study Summary
2 | ==================
3 | Best trial: 2
4 |
5 | Value: 0.7674011016524788
6 |
7 | Params:
8 | start_value: 0.00017235427021406453
9 | warmup_rate: 0.1
10 |
11 | Importances:
12 | start_value: 0.7
13 | warmup_rate: 0.3
14 |
--------------------------------------------------------------------------------
/tests/tuning/test_checkpoints/two_phase_n_trials/study_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aphp/edsnlp/8e9ed84f56e6af741023e8b3a9de38ba93912953/tests/tuning/test_checkpoints/two_phase_n_trials/study_.pkl
--------------------------------------------------------------------------------
/tests/utils/test_bindings.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from confit import validate_arguments
3 | from confit.errors import ConfitValidationError
4 |
5 | from edsnlp.utils.bindings import BINDING_GETTERS, BINDING_SETTERS, AttributesArg
6 |
7 |
8 | def test_qualifier_validation():
9 | @validate_arguments
10 | def fn(arg: AttributesArg):
11 | return arg
12 |
13 | assert fn("_.negated") == {"_.negated": True}
14 | assert fn(["_.negated", "_.event"]) == {"_.negated": True, "_.event": True}
15 | assert fn({"_.negated": True, "_.event": "DATE"}) == {
16 | "_.negated": True,
17 | "_.event": ["DATE"],
18 | }
19 |
20 | callback = lambda x: x # noqa: E731
21 |
22 | assert fn(callback) is callback
23 |
24 | with pytest.raises(ConfitValidationError):
25 | fn(1)
26 |
27 | with pytest.raises(ConfitValidationError):
28 | fn({"_.negated": 1})
29 |
30 |
31 | def test_bindings():
32 | class custom:
33 | def __init__(self, value):
34 | self.value = value
35 |
36 | obj = custom([custom(1), custom(2)])
37 | assert BINDING_GETTERS["value[0].value"](obj) == 1
38 | assert BINDING_GETTERS[("value[0].value", 1)](obj) is True
39 | BINDING_SETTERS[("value[1].value", 3)](obj)
40 | assert obj.value[1].value == 3
41 |
--------------------------------------------------------------------------------
/tests/utils/test_filter.py:
--------------------------------------------------------------------------------
1 | from spacy.tokens import Doc, Span
2 |
3 | from edsnlp.utils.filter import filter_spans
4 |
5 |
6 | def test_filter_spans(doc: Doc):
7 | spans = [
8 | doc[0:3],
9 | doc[0:4],
10 | doc[1:2],
11 | doc[0:2],
12 | doc[0:3],
13 | ]
14 |
15 | filtered = filter_spans(spans)
16 |
17 | assert len(filtered) == 1
18 | assert len(filtered[0]) == 4
19 |
20 |
21 | def test_filter_spans_strict_nesting(doc: Doc):
22 | spans = [
23 | doc[0:5],
24 | doc[1:4],
25 | ]
26 |
27 | filtered = filter_spans(spans)
28 |
29 | assert len(filtered) == 1
30 | assert len(filtered[0]) == 5
31 |
32 |
33 | def test_label_to_remove(doc: Doc):
34 |
35 | spans = [
36 | Span(doc, 0, 5, label="test"),
37 | Span(doc, 6, 10, label="test"),
38 | Span(doc, 6, 10, label="remove"),
39 | ]
40 |
41 | filtered = filter_spans(spans, label_to_remove="remove")
42 |
43 | assert len(filtered) == 2
44 |
45 | spans = [
46 | Span(doc, 6, 10, label="remove"),
47 | Span(doc, 0, 5, label="test"),
48 | Span(doc, 6, 10, label="test"),
49 | ]
50 |
51 | filtered = filter_spans(spans, label_to_remove="remove")
52 |
53 | assert len(filtered) == 1
54 |
--------------------------------------------------------------------------------
/tests/utils/test_typing.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from confit import validate_arguments
3 | from confit.errors import ConfitValidationError
4 |
5 | from edsnlp.utils.typing import AsList
6 |
7 |
8 | def test_as_list():
9 | @validate_arguments
10 | def func(a: AsList[int]):
11 | return a
12 |
13 | assert func("1") == [1]
14 |
15 | with pytest.raises(ConfitValidationError) as e:
16 | func("a")
17 |
18 | assert (
19 | "1 validation error for test_typing.test_as_list..func()\n" "-> a.0\n"
20 | ) in str(e.value)
21 |
--------------------------------------------------------------------------------