├── .gitignore ├── KB ├── classificationRules3.csv ├── critical_findings.tsv ├── critical_findings.yml ├── critical_modifiers.tsv ├── critical_modifiers.yml ├── criticalfinder_generalized_modifiers.tsv ├── criticalfinder_generalized_modifiers.yml ├── lexical_kb_04292013.tsv ├── lexical_kb_04292013.yml ├── lexical_kb_05042016.tsv ├── lexical_kb_05042016.yml ├── lexical_kb_nlm.tsv ├── lexical_kb_nlm.yml ├── pah_utah.tsv ├── pah_utah.txt ├── pah_utah.yml ├── pe_kb.tsv ├── pe_kb.yml ├── pneumonia_modifiers.yml ├── pneumonia_targets.yml ├── quality_artifacts.tsv ├── quality_artifacts.yml ├── schema2.csv ├── test.yml ├── utah_crit.tsv └── utah_crit.yml ├── README.rst ├── docs ├── Makefile └── source │ ├── bibliography.md │ ├── conf.py │ └── index.md ├── notebooks ├── BasicSentenceMarkup.ipynb ├── BasicSentenceMarkupPart2.ipynb ├── MultiSentenceDocuments.ipynb ├── README.md ├── cherrypy_pyConText.py ├── functional │ ├── 2 │ │ └── Reading_ConTextItems.ipynb │ └── 3 │ │ └── Reading_ConTextItems.ipynb ├── html │ ├── BasicSentenceMarkup.html │ └── BasicSentenceMarkupPart2.html └── pyConText_REST_demo.ipynb ├── pyConTextNLP ├── ConTextMarkup.py ├── __init__.py ├── display │ ├── __init__.py │ ├── _bokeh.py │ ├── _mpld3.py │ └── html.py ├── helpers.py ├── io │ ├── __init__.py │ └── xml.py ├── itemData.py ├── pyConText.py ├── tagObject.py ├── tests │ ├── __init__.py │ ├── test_base.py │ ├── test_contextitem.py │ ├── test_contextmarkup.py │ ├── test_env.py │ ├── test_helpers.py │ └── test_itemData.py ├── utils.py └── version.py ├── requirements-py2.txt ├── setup.cfg ├── setup.py └── tests ├── __init__.py └── pyConTextNLP ├── __init__.py ├── display └── __init__.py └── tests2.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | eggs/ 15 | lib/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | 24 | # Installer logs 25 | pip-log.txt 26 | pip-delete-this-directory.txt 27 | 28 | # Unit test / coverage reports 29 | htmlcov/ 30 | .tox/ 31 | .coverage 32 | .cache 33 | nosetests.xml 34 | coverage.xml 35 | 36 | # Translations 37 | *.mo 38 | *.pot 39 | 40 | # Django stuff: 41 | *.log 42 | 43 | # Sphinx documentation 44 | docs/_build/ 45 | 46 | # VIM 47 | *.swp 48 | 49 | *.pdf 50 | *.png 51 | *.text 52 | *.html 53 | *.backup 54 | 55 | *.ipynb_checkpoints 56 | -------------------------------------------------------------------------------- /KB/classificationRules3.csv: -------------------------------------------------------------------------------- 1 | # Lines that start with the # symbol are comments and are ignored,,,,,,,,,,,,, 2 | # processReport current has three types of rules: @CLASSIFICATION_RULE, @CATEGORY_RULE, and @SEVERITY_RULE,,,,,,,,,,, 3 | # classification rules would be for things like disease_state, certainty_state, temporality state,,,,,,,,,,, 4 | # For each classification_rule set," there is a rule label (e.g. ""DISEASE_STATE"". This must match",,,,,,,,,,,, 5 | # the terms used in the schema file,,,,,,,,,,,,, 6 | # Each rule set requires a DEFAULT which is the schema value to be returned if no rule conditions are satisifed,,,,,,,,,,,,, 7 | # Each rule set has zero or more rules consisting of a schema value to be returned if the rule evaluates to true,,,,,,,,,,,,, 8 | # A rule evalutes to true if the target is modified by one or more of the ConText CATEGORIES listed following,,,,,,,,,,,,, 9 | @CLASSIFICATION_RULE,DISEASE_STATE,RULE,0,DEFINITE_NEGATED_EXISTENCE,PROBABLE_NEGATED_EXISTENCE,FUTURE,INDICATION,PSEUDONEG,,,,, 10 | @CLASSIFICATION_RULE,DISEASE_STATE,RULE,2,AMBIVALENT_EXISTENCE,,,,,,,,, 11 | @CLASSIFICATION_RULE,DISEASE_STATE,RULE,1,PROBABLE_EXISTENCE,DEFINITE_EXISTENCE,,,,,,,, 12 | @CLASSIFICATION_RULE,DISEASE_STATE,DEFAULT,1,,,,,,,,,, 13 | @CLASSIFICATION_RULE,CERTAINTY_STATE,RULE,0,PROBABLE_NEGATED_EXISTENCE,AMBIVALENT_EXISTENCE,PROBABLE_EXISTENCE,,,,,,, 14 | @CLASSIFICATION_RULE,CERTAINTY_STATE,DEFAULT,1,,,,,,,,,, 15 | @CLASSIFICATION_RULE,ACUTE_STATE,RULE,0,HISTORICAL,,,,,,,,, 16 | @CLASSIFICATION_RULE,ACUTE_STATE,DEFAULT,1,,,,,,,,,, 17 | #CATEGORY_RULE rules specify what Findings (e.g. DVT) can have the category modified by the following ANATOMIC modifies,,,,,,,,,,,,, 18 | @CATEGORY_RULE,DVT,LOWER_DEEP_VEIN,UPPER_DEEP_VEIN,HEPATIC_VEIN,PORTAL_SYSTEM_VEIN,PULMONARY_VEIN,RENAL_VEIN,SINUS_VEIN,LOWER_SUPERFICIAL_VEIN,UPPER_SUPERFICIAL_VEIN,VARICOCELE,ARTERIAL,NON_VASCULAR 19 | @CATEGORY_RULE,INFARCT,BRAIN_ANATOMY,HEART_ANATOMY,OTHER_CRITICAL_ANATOMY,,,,,,,,, 20 | @CATEGORY_RULE,ANEURYSM,AORTIC_ANATOMY,,,,,,,,,,, 21 | #SEVERITY_RUlE specifiy which targets to try to obtain severity measures for,,,,,,,,,,,,, 22 | @SEVERITY_RULE,AORTIC_ANATOMY_ANEURYSM,SEVERITY,,,,,,,,,,, 23 | -------------------------------------------------------------------------------- /KB/critical_findings.tsv: -------------------------------------------------------------------------------- 1 | Lex Type Regex Direction embolism PULMONARY_EMBOLISM \b(emboli|embolism|embolus)\b pe PULMONARY_EMBOLISM \bpe\b pulmonary embolism PULMONARY_EMBOLISM pulmonary\s(artery )?(embol[a-z]+) aneurysm (target) ANEURYSM aneurysm[a-z]* dilation ANEURYSM aneurysmal dilatation ANEURYSM (aneurysmal )?dilatation aortic dissection AORTIC_DISSECTION (aortic|aorta)\s(artery\s)? dissection appendicitis APPENDICITIS bowel obstruction BOWEL_OBSTRUCTION midline shift BRAIN_HERNIATION ((subfalcian|subfalcine|tonsillar)\sherniation)|((midline|mid-line)\sshift) carotid dissection CAROTID_DISSECTION carotid?\s*?\w*\s*dissection cerebral hemorrhage CEREBRAL_HEMORRHAGE (cereblal|intracranial|brain)\s(hemorrhage|hematoma|bleed) cervical fracture CERVICAL_FRACTURE (cervical spine|c[1-7]|hangman|jefferson|dens|odontoid)\sfracture cholecystitis CHOLECYSTITIS cord compression CORD_COMPRESSION depressed skull fracture DEPRESSED_SKULL_FRACTURE diverticulitis DIVERTICULITIS DVT DVT ((non.?)?occlusive)?\s?(thromb(us|i|osis|osed)\b|DVT|clot\b) 2/28/13 ectasia ECTASIA (ectasia| ectatic) ectopic pregnancy ECTOPIC_PREGNANCY epiglottitis EPIGLOTTITIS fetal demise FETAL_DEMISE free air FREE_AIR (pneumoperitoneum|((intraperitoneal|free)\s(gas|air))) infarct INFARCT \b(stroke|infarct|infarction)\b ischemic bowel ISCHEMIC_BOWEL lacunar infarct LACUNAR_INFARCT mediastinal emphysema MEDIASTINAL_EMPHYSEMA omental infarct OMENTAL_INFARCT bone infarct OSTEONECROSIS (bone infarct|osteonecrosis) pneumonia PNEUMONIA pneumoni* aspiration PNEUMONIA aspirat* consolidation PNEUMONIA consolidat* pneumothorax PNEUMOTHORAX portal venous air PORTAL_VENOUS_AIR portal\b(venous\b)?(gas|air) renal infarct RENAL_INFARCT retroperitoneal hemorrhage RETROPERITONEAL_HEMORRHAGE (retro|intra)?peritoneal\s(hemorrhage|hematoma|bleed) retropharyngeal abscess RETROPHARYNGEAL_ABSCESS ruptured aneurysm RUPTURED_ANEURYSM (ruptured aneurysm|aortic rupture) spinal cord compression SPINAL_CORD_COMPRESSION splenic infarct SPLENIC_INFARCT tension pneumothorax TENSION_PNEUMOTHORAX torsion TORSION volvulus VOLVULUS -------------------------------------------------------------------------------- /KB/critical_findings.yml: -------------------------------------------------------------------------------- 1 | Comments: '' 2 | Direction: '' 3 | Lex: embolism 4 | Regex: \b(emboli|embolism|embolus)\b 5 | Type: PULMONARY_EMBOLISM 6 | --- 7 | Comments: '' 8 | Direction: '' 9 | Lex: pe 10 | Regex: \bpe\b 11 | Type: PULMONARY_EMBOLISM 12 | --- 13 | Comments: '' 14 | Direction: '' 15 | Lex: pulmonary embolism 16 | Regex: pulmonary\s(artery )?(embol[a-z]+) 17 | Type: PULMONARY_EMBOLISM 18 | --- 19 | Comments: '' 20 | Direction: '' 21 | Lex: aneurysm (target) 22 | Regex: aneurysm[a-z]* 23 | Type: ANEURYSM 24 | --- 25 | Comments: '' 26 | Direction: '' 27 | Lex: dilation 28 | Regex: '' 29 | Type: ANEURYSM 30 | --- 31 | Comments: '' 32 | Direction: '' 33 | Lex: aneurysmal dilatation 34 | Regex: (aneurysmal )?dilatation 35 | Type: ANEURYSM 36 | --- 37 | Comments: '' 38 | Direction: '' 39 | Lex: aortic dissection 40 | Regex: (aortic|aorta)\s(artery\s)? dissection 41 | Type: AORTIC_DISSECTION 42 | --- 43 | Comments: '' 44 | Direction: '' 45 | Lex: appendicitis 46 | Regex: '' 47 | Type: APPENDICITIS 48 | --- 49 | Comments: '' 50 | Direction: '' 51 | Lex: bowel obstruction 52 | Regex: '' 53 | Type: BOWEL_OBSTRUCTION 54 | --- 55 | Comments: '' 56 | Direction: '' 57 | Lex: midline shift 58 | Regex: ((subfalcian|subfalcine|tonsillar)\sherniation)|((midline|mid-line)\sshift) 59 | Type: BRAIN_HERNIATION 60 | --- 61 | Comments: '' 62 | Direction: '' 63 | Lex: carotid dissection 64 | Regex: carotid?\s*?\w*\s*dissection 65 | Type: CAROTID_DISSECTION 66 | --- 67 | Comments: '' 68 | Direction: '' 69 | Lex: cerebral hemorrhage 70 | Regex: (cereblal|intracranial|brain)\s(hemorrhage|hematoma|bleed) 71 | Type: CEREBRAL_HEMORRHAGE 72 | --- 73 | Comments: '' 74 | Direction: '' 75 | Lex: cervical fracture 76 | Regex: (cervical spine|c[1-7]|hangman|jefferson|dens|odontoid)\sfracture 77 | Type: CERVICAL_FRACTURE 78 | --- 79 | Comments: '' 80 | Direction: '' 81 | Lex: cholecystitis 82 | Regex: '' 83 | Type: CHOLECYSTITIS 84 | --- 85 | Comments: '' 86 | Direction: '' 87 | Lex: cord compression 88 | Regex: '' 89 | Type: CORD_COMPRESSION 90 | --- 91 | Comments: '' 92 | Direction: '' 93 | Lex: depressed skull fracture 94 | Regex: '' 95 | Type: DEPRESSED_SKULL_FRACTURE 96 | --- 97 | Comments: '' 98 | Direction: '' 99 | Lex: diverticulitis 100 | Regex: '' 101 | Type: DIVERTICULITIS 102 | --- 103 | Comments: '' 104 | Direction: 2/28/13 105 | Lex: DVT 106 | Regex: ((non.?)?occlusive)?\s?(thromb(us|i|osis|osed)\b|DVT|clot\b) 107 | Type: DVT 108 | --- 109 | Comments: '' 110 | Direction: '' 111 | Lex: ectasia 112 | Regex: (ectasia| ectatic) 113 | Type: ECTASIA 114 | --- 115 | Comments: '' 116 | Direction: '' 117 | Lex: ectopic pregnancy 118 | Regex: '' 119 | Type: ECTOPIC_PREGNANCY 120 | --- 121 | Comments: '' 122 | Direction: '' 123 | Lex: epiglottitis 124 | Regex: '' 125 | Type: EPIGLOTTITIS 126 | --- 127 | Comments: '' 128 | Direction: '' 129 | Lex: fetal demise 130 | Regex: '' 131 | Type: FETAL_DEMISE 132 | --- 133 | Comments: '' 134 | Direction: '' 135 | Lex: free air 136 | Regex: (pneumoperitoneum|((intraperitoneal|free)\s(gas|air))) 137 | Type: FREE_AIR 138 | --- 139 | Comments: '' 140 | Direction: '' 141 | Lex: infarct 142 | Regex: \b(stroke|infarct|infarction)\b 143 | Type: INFARCT 144 | --- 145 | Comments: '' 146 | Direction: '' 147 | Lex: ischemic bowel 148 | Regex: '' 149 | Type: ISCHEMIC_BOWEL 150 | --- 151 | Comments: '' 152 | Direction: '' 153 | Lex: lacunar infarct 154 | Regex: '' 155 | Type: LACUNAR_INFARCT 156 | --- 157 | Comments: '' 158 | Direction: '' 159 | Lex: mediastinal emphysema 160 | Regex: '' 161 | Type: MEDIASTINAL_EMPHYSEMA 162 | --- 163 | Comments: '' 164 | Direction: '' 165 | Lex: omental infarct 166 | Regex: '' 167 | Type: OMENTAL_INFARCT 168 | --- 169 | Comments: '' 170 | Direction: '' 171 | Lex: bone infarct 172 | Regex: (bone infarct|osteonecrosis) 173 | Type: OSTEONECROSIS 174 | --- 175 | Comments: '' 176 | Direction: '' 177 | Lex: pneumonia 178 | Regex: pneumoni* 179 | Type: PNEUMONIA 180 | --- 181 | Comments: '' 182 | Direction: '' 183 | Lex: aspiration 184 | Regex: aspirat* 185 | Type: PNEUMONIA 186 | --- 187 | Comments: '' 188 | Direction: '' 189 | Lex: consolidation 190 | Regex: consolidat* 191 | Type: PNEUMONIA 192 | --- 193 | Comments: '' 194 | Direction: '' 195 | Lex: pneumothorax 196 | Regex: '' 197 | Type: PNEUMOTHORAX 198 | --- 199 | Comments: '' 200 | Direction: '' 201 | Lex: portal venous air 202 | Regex: portal\b(venous\b)?(gas|air) 203 | Type: PORTAL_VENOUS_AIR 204 | --- 205 | Comments: '' 206 | Direction: '' 207 | Lex: renal infarct 208 | Regex: '' 209 | Type: RENAL_INFARCT 210 | --- 211 | Comments: '' 212 | Direction: '' 213 | Lex: retroperitoneal hemorrhage 214 | Regex: (retro|intra)?peritoneal\s(hemorrhage|hematoma|bleed) 215 | Type: RETROPERITONEAL_HEMORRHAGE 216 | --- 217 | Comments: '' 218 | Direction: '' 219 | Lex: retropharyngeal abscess 220 | Regex: '' 221 | Type: RETROPHARYNGEAL_ABSCESS 222 | --- 223 | Comments: '' 224 | Direction: '' 225 | Lex: ruptured aneurysm 226 | Regex: (ruptured aneurysm|aortic rupture) 227 | Type: RUPTURED_ANEURYSM 228 | --- 229 | Comments: '' 230 | Direction: '' 231 | Lex: spinal cord compression 232 | Regex: '' 233 | Type: SPINAL_CORD_COMPRESSION 234 | --- 235 | Comments: '' 236 | Direction: '' 237 | Lex: splenic infarct 238 | Regex: '' 239 | Type: SPLENIC_INFARCT 240 | --- 241 | Comments: '' 242 | Direction: '' 243 | Lex: tension pneumothorax 244 | Regex: '' 245 | Type: TENSION_PNEUMOTHORAX 246 | --- 247 | Comments: '' 248 | Direction: '' 249 | Lex: torsion 250 | Regex: '' 251 | Type: TORSION 252 | --- 253 | Comments: '' 254 | Direction: '' 255 | Lex: volvulus 256 | Regex: '' 257 | Type: VOLVULUS 258 | -------------------------------------------------------------------------------- /KB/critical_modifiers.tsv: -------------------------------------------------------------------------------- 1 | Lex Type Regex Direction 2 | bolus timing QUALITY_FEATURE \bbolus[ -]{0,1}timing bidirectional # fixes pedoc #129 dq 3 | limited exam QUALITY_FEATURE (suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing) bidirectional 4 | nondiagnostic exam QUALITY_FEATURE nondiagnostic (exam[a-z]*|study|scan|evaluation) bidirectional #fix for pedoc #231 5 | artifact ARTIFACT artifact(ual)? 6 | bulk motion ARTIFACT 7 | motion ARTIFACT 8 | patient motion ARTIFACT 9 | respiratory motion ARTIFACT 10 | declot EXCLUSION 11 | detorsion EXCLUSION 12 | embolization EXCLUSION 13 | epiploic appendicitis EXCLUSION 14 | in the setting of EXCLUSION 15 | pe examination EXCLUSION (pulmonary )(artery )?(embol[a-z]+)(exam[a-z]*|study|protocol) 16 | septic embolism EXCLUSION septic\s(emboli|embolus|embolism) 17 | thrombectomy EXCLUSION 18 | thrombin EXCLUSION 19 | upstroke EXCLUSION 20 | -------------------------------------------------------------------------------- /KB/critical_modifiers.yml: -------------------------------------------------------------------------------- 1 | Comments: ' # fixes pedoc #129 dq' 2 | Direction: bidirectional 3 | Lex: bolus timing 4 | Regex: \bbolus[ -]{0,1}timing 5 | Type: QUALITY_FEATURE 6 | --- 7 | Comments: '' 8 | Direction: bidirectional 9 | Lex: limited exam 10 | Regex: (suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing) 11 | Type: QUALITY_FEATURE 12 | --- 13 | Comments: ' #fix for pedoc #231' 14 | Direction: bidirectional 15 | Lex: nondiagnostic exam 16 | Regex: nondiagnostic (exam[a-z]*|study|scan|evaluation) 17 | Type: QUALITY_FEATURE 18 | --- 19 | Comments: '' 20 | Direction: '' 21 | Lex: artifact 22 | Regex: artifact(ual)? 23 | Type: ARTIFACT 24 | --- 25 | Comments: '' 26 | Direction: '' 27 | Lex: bulk motion 28 | Regex: '' 29 | Type: ARTIFACT 30 | --- 31 | Comments: '' 32 | Direction: '' 33 | Lex: motion 34 | Regex: '' 35 | Type: ARTIFACT 36 | --- 37 | Comments: '' 38 | Direction: '' 39 | Lex: patient motion 40 | Regex: '' 41 | Type: ARTIFACT 42 | --- 43 | Comments: '' 44 | Direction: '' 45 | Lex: respiratory motion 46 | Regex: '' 47 | Type: ARTIFACT 48 | --- 49 | Comments: '' 50 | Direction: '' 51 | Lex: declot 52 | Regex: '' 53 | Type: EXCLUSION 54 | --- 55 | Comments: '' 56 | Direction: '' 57 | Lex: detorsion 58 | Regex: '' 59 | Type: EXCLUSION 60 | --- 61 | Comments: '' 62 | Direction: '' 63 | Lex: embolization 64 | Regex: '' 65 | Type: EXCLUSION 66 | --- 67 | Comments: '' 68 | Direction: '' 69 | Lex: epiploic appendicitis 70 | Regex: '' 71 | Type: EXCLUSION 72 | --- 73 | Comments: '' 74 | Direction: '' 75 | Lex: in the setting of 76 | Regex: '' 77 | Type: EXCLUSION 78 | --- 79 | Comments: '' 80 | Direction: '' 81 | Lex: pe examination 82 | Regex: (pulmonary )(artery )?(embol[a-z]+)(exam[a-z]*|study|protocol) 83 | Type: EXCLUSION 84 | --- 85 | Comments: '' 86 | Direction: '' 87 | Lex: septic embolism 88 | Regex: septic\s(emboli|embolus|embolism) 89 | Type: EXCLUSION 90 | --- 91 | Comments: '' 92 | Direction: '' 93 | Lex: thrombectomy 94 | Regex: '' 95 | Type: EXCLUSION 96 | --- 97 | Comments: '' 98 | Direction: '' 99 | Lex: thrombin 100 | Regex: '' 101 | Type: EXCLUSION 102 | --- 103 | Comments: '' 104 | Direction: '' 105 | Lex: upstroke 106 | Regex: '' 107 | Type: EXCLUSION 108 | -------------------------------------------------------------------------------- /KB/criticalfinder_generalized_modifiers.tsv: -------------------------------------------------------------------------------- 1 | Lex Type Regex Direction 2 | abdominal aorta AORTIC_ANATOMY abdominal aort(a|ic) bidirectional 3 | aorta AORTIC_ANATOMY aort(a|ic) bidirectional 4 | ascending aorta AORTIC_ANATOMY ascending aort(a|ic) bidirectional 5 | thoracic aorta AORTIC_ANATOMY thoracic aort(a|ic) bidirectional 6 | A1 BRAIN_ANATOMY bidirectional 7 | intracranial BRAIN_ANATOMY bidirectional 8 | cerebral BRAIN_ANATOMY bidirectional 9 | A2 BRAIN_ANATOMY bidirectional 10 | ACA BRAIN_ANATOMY bidirectional 11 | anterior limb BRAIN_ANATOMY (anterior limb|crus anterius capsulae internae|capsula interna|pars anterior|crus anterior|capsulae internae) bidirectional 12 | posterior limb BRAIN_ANATOMY (posterior limb|crus posterius capsulae internae|capsula interna|pars posterior|crus posterior|capsulae internae) bidirectional 13 | genu limb BRAIN_ANATOMY (genu limb|internal capsule genu|genu capsulae internae) bidirectional 14 | brain BRAIN_ANATOMY bidirectional 15 | caudate BRAIN_ANATOMY (caudate|caudate nucleus|nucleus caudatus) bidirectional 16 | cerebellar BRAIN_ANATOMY bidirectional 17 | cerebellum BRAIN_ANATOMY (cerebellum|epencephalon-1|kleinhirn) bidirectional 18 | corona radiata BRAIN_ANATOMY bidirectional 19 | cerebral cortex BRAIN_ANATOMY (cerebral cortex|cortex of cerebrum|cortex cerebri|pallium|cortex cerebralis) bidirectional 20 | encephalo BRAIN_ANATOMY bidirectional 21 | brain lobe BRAIN_ANATOMY (frontal|parietal|occipital|temporal|limbic)( lobe(s))? bidirectional 22 | ganglia BRAIN_ANATOMY bidirectional 23 | gray matter BRAIN_ANATOMY (gray|grey) matter bidirectional 24 | white matter BRAIN_ANATOMY white matter bidirectional 25 | gyrus BRAIN_ANATOMY bidirectional 26 | hemisphere BRAIN_ANATOMY bidirectional 27 | insular BRAIN_ANATOMY bidirectional 28 | internal capsule BRAIN_ANATOMY (internal capsule|internal capsule radiations|capsula interna) bidirectional 29 | lentiform BRAIN_ANATOMY bidirectional 30 | M1 BRAIN_ANATOMY bidirectional 31 | M2 BRAIN_ANATOMY bidirectional 32 | MCA BRAIN_ANATOMY bidirectional 33 | nuclei BRAIN_ANATOMY nucle(i|us) bidirectional 34 | P1 BRAIN_ANATOMY bidirectional 35 | P2 BRAIN_ANATOMY bidirectional 36 | PCA BRAIN_ANATOMY bidirectional 37 | pons BRAIN_ANATOMY bidirectional 38 | putamen BRAIN_ANATOMY bidirectional 39 | semiovale BRAIN_ANATOMY (semiovale center|semiovale|semi-ovale|medullary center|white matter of cerebrum|centrum semiovale|substantia centralis medullaris cerebri|corpus medullare cerebri|centrum ovale) bidirectional 40 | sulcus BRAIN_ANATOMY bidirectional 41 | territorial BRAIN_ANATOMY bidirectional 42 | territory BRAIN_ANATOMY bidirectional 43 | measuring X.X cm SEVERITY (measuring|diameter of|measured) (?P[0-9]+(\.[0-9]*)?)( )?(?P(cm|mm)) backward 44 | measuring XxY cm SEVERITY (measuring|diameter of|measured) (?P[0-9]+(\.[0-9]*)?( )?(x|by)( )?[0-9]+(\.[0-9]*)?)( )?(?P(cm|mm)) backward 45 | X.Y cm SEVERITY (?P[0-9]+(\.[0-9]*)?)( )?(?P(cm|mm)) forward 46 | XxY cm SEVERITY (?P[0-9]+(\.[0-9]*)?( )?(x|by)( )?[0-9]+(\.[0-9]*)?)( )?(?P(cm|mm)) forward 47 | anterior HEART_ANATOMY bidirectional 48 | septal HEART_ANATOMY bidirectional 49 | septum HEART_ANATOMY (cardiac septum|septum) bidirectional 50 | inferior HEART_ANATOMY bidirectional 51 | heart apex HEART_ANATOMY (cardiac apex|apex of (the )?heart) bidirectional 52 | inferolateral HEART_ANATOMY bidirectional 53 | lateral HEART_ANATOMY bidirectional 54 | anteroseptal HEART_ANATOMY bidirectional 55 | transmural HEART_ANATOMY bidirectional 56 | wall HEART_ANATOMY bidirectional 57 | left ventricular wall HEART_ANATOMY (\blv\b|left ventricular)( wall)? bidirectional 58 | right ventricular wall HEART_ANATOMY (\brv\b|right ventricular)( wall)? bidirectional 59 | myocardial HEART_ANATOMY (myocardial|myocardium) bidirectional 60 | cardiac HEART_ANATOMY (cardiac|heart) forward 61 | omental OTHER_CRITICAL_ANATOMY (omentum|omental) bidirectional 62 | spleen OTHER_CRITICAL_ANATOMY (spleen|splenic) bidirectional 63 | kidney OTHER_CRITICAL_ANATOMY (kidney|\brenal\b) bidirectional 64 | testis OTHER_CRITICAL_ANATOMY (testis|testicular|testes) bidirectional 65 | ovary OTHER_CRITICAL_ANATOMY (ovary|ovaries|ovarian) bidirectional 66 | azygos vein AZYGOS_VEIN bidirectional 67 | azygos arch AZYGOS_VEIN bidirectional 68 | inferior vena cava LOWER_DEEP_VEIN bidirectional 69 | common iliac vein LOWER_DEEP_VEIN bidirectional 70 | internal iliac vein LOWER_DEEP_VEIN bidirectional 71 | external iliac vein LOWER_DEEP_VEIN bidirectional 72 | common femoral vein LOWER_DEEP_VEIN bidirectional 73 | femoral vein LOWER_DEEP_VEIN bidirectional 74 | popliteal vein LOWER_DEEP_VEIN bidirectional 75 | anterior tibial vein LOWER_DEEP_VEIN bidirectional 76 | peroneal vein LOWER_DEEP_VEIN bidirectional 77 | posterior tibial vein LOWER_DEEP_VEIN bidirectional 78 | superior vena cava UPPER_DEEP_VEIN bidirectional 79 | brachiocephalic vein UPPER_DEEP_VEIN bidirectional 80 | subclavian vein UPPER_DEEP_VEIN bidirectional 81 | axillary vein UPPER_DEEP_VEIN bidirectional 82 | brachial vein UPPER_DEEP_VEIN bidirectional 83 | external jugular vein UPPER_DEEP_VEIN bidirectional 84 | anterior jugular vein UPPER_DEEP_VEIN bidirectional 85 | jugular venous arch UPPER_DEEP_VEIN bidirectional 86 | internal jugular vein UPPER_DEEP_VEIN bidirectional 87 | hepatic vein HEPATIC_VEIN bidirectional 88 | portal vein PORTAL_SYSTEM_VEIN bidirectional 89 | splenic vein PORTAL_SYSTEM_VEIN bidirectional 90 | inferior mesenteric vein PORTAL_SYSTEM_VEIN bidirectional 91 | superior mesenteric vein PORTAL_SYSTEM_VEIN bidirectional 92 | pulmonary vein PULMONARY_VEIN bidirectional 93 | renal vein RENAL_VEIN bidirectional 94 | sigmoid sinus SINUS_VEIN bidirectional 95 | inferior petrosal sinus SINUS_VEIN bidirectional 96 | cavernous sinus SINUS_VEIN bidirectional 97 | greater saphenous vein LOWER_SUPERFICIAL_VEIN bidirectional 98 | lesser saphenous vein LOWER_SUPERFICIAL_VEIN bidirectional 99 | basilic vein UPPER_SUPERFICIAL_VEIN bidirectional 100 | cephalic vein UPPER_SUPERFICIAL_VEIN bidirectional 101 | varicocele VARICOCELE bidirectional 102 | varicous vein UPPER_SUPERFICIAL_VEIN bidirectional 103 | -------------------------------------------------------------------------------- /KB/criticalfinder_generalized_modifiers.yml: -------------------------------------------------------------------------------- 1 | Comments: '' 2 | Direction: bidirectional 3 | Lex: abdominal aorta 4 | Regex: abdominal aort(a|ic) 5 | Type: AORTIC_ANATOMY 6 | --- 7 | Comments: '' 8 | Direction: bidirectional 9 | Lex: aorta 10 | Regex: aort(a|ic) 11 | Type: AORTIC_ANATOMY 12 | --- 13 | Comments: '' 14 | Direction: bidirectional 15 | Lex: ascending aorta 16 | Regex: ascending aort(a|ic) 17 | Type: AORTIC_ANATOMY 18 | --- 19 | Comments: '' 20 | Direction: bidirectional 21 | Lex: thoracic aorta 22 | Regex: thoracic aort(a|ic) 23 | Type: AORTIC_ANATOMY 24 | --- 25 | Comments: '' 26 | Direction: bidirectional 27 | Lex: A1 28 | Regex: '' 29 | Type: BRAIN_ANATOMY 30 | --- 31 | Comments: '' 32 | Direction: bidirectional 33 | Lex: intracranial 34 | Regex: '' 35 | Type: BRAIN_ANATOMY 36 | --- 37 | Comments: '' 38 | Direction: bidirectional 39 | Lex: cerebral 40 | Regex: '' 41 | Type: BRAIN_ANATOMY 42 | --- 43 | Comments: '' 44 | Direction: bidirectional 45 | Lex: A2 46 | Regex: '' 47 | Type: BRAIN_ANATOMY 48 | --- 49 | Comments: '' 50 | Direction: bidirectional 51 | Lex: ACA 52 | Regex: '' 53 | Type: BRAIN_ANATOMY 54 | --- 55 | Comments: '' 56 | Direction: bidirectional 57 | Lex: anterior limb 58 | Regex: (anterior limb|crus anterius capsulae internae|capsula interna|pars anterior|crus 59 | anterior|capsulae internae) 60 | Type: BRAIN_ANATOMY 61 | --- 62 | Comments: '' 63 | Direction: bidirectional 64 | Lex: posterior limb 65 | Regex: (posterior limb|crus posterius capsulae internae|capsula interna|pars posterior|crus 66 | posterior|capsulae internae) 67 | Type: BRAIN_ANATOMY 68 | --- 69 | Comments: '' 70 | Direction: bidirectional 71 | Lex: genu limb 72 | Regex: (genu limb|internal capsule genu|genu capsulae internae) 73 | Type: BRAIN_ANATOMY 74 | --- 75 | Comments: '' 76 | Direction: bidirectional 77 | Lex: brain 78 | Regex: '' 79 | Type: BRAIN_ANATOMY 80 | --- 81 | Comments: '' 82 | Direction: bidirectional 83 | Lex: caudate 84 | Regex: (caudate|caudate nucleus|nucleus caudatus) 85 | Type: BRAIN_ANATOMY 86 | --- 87 | Comments: '' 88 | Direction: bidirectional 89 | Lex: cerebellar 90 | Regex: '' 91 | Type: BRAIN_ANATOMY 92 | --- 93 | Comments: '' 94 | Direction: bidirectional 95 | Lex: cerebellum 96 | Regex: (cerebellum|epencephalon-1|kleinhirn) 97 | Type: BRAIN_ANATOMY 98 | --- 99 | Comments: '' 100 | Direction: bidirectional 101 | Lex: corona radiata 102 | Regex: '' 103 | Type: BRAIN_ANATOMY 104 | --- 105 | Comments: '' 106 | Direction: bidirectional 107 | Lex: cerebral cortex 108 | Regex: (cerebral cortex|cortex of cerebrum|cortex cerebri|pallium|cortex cerebralis) 109 | Type: BRAIN_ANATOMY 110 | --- 111 | Comments: '' 112 | Direction: bidirectional 113 | Lex: encephalo 114 | Regex: '' 115 | Type: BRAIN_ANATOMY 116 | --- 117 | Comments: '' 118 | Direction: bidirectional 119 | Lex: brain lobe 120 | Regex: (frontal|parietal|occipital|temporal|limbic)( lobe(s))? 121 | Type: BRAIN_ANATOMY 122 | --- 123 | Comments: '' 124 | Direction: bidirectional 125 | Lex: ganglia 126 | Regex: '' 127 | Type: BRAIN_ANATOMY 128 | --- 129 | Comments: '' 130 | Direction: bidirectional 131 | Lex: gray matter 132 | Regex: (gray|grey) matter 133 | Type: BRAIN_ANATOMY 134 | --- 135 | Comments: '' 136 | Direction: bidirectional 137 | Lex: white matter 138 | Regex: white matter 139 | Type: BRAIN_ANATOMY 140 | --- 141 | Comments: '' 142 | Direction: bidirectional 143 | Lex: gyrus 144 | Regex: '' 145 | Type: BRAIN_ANATOMY 146 | --- 147 | Comments: '' 148 | Direction: bidirectional 149 | Lex: hemisphere 150 | Regex: '' 151 | Type: BRAIN_ANATOMY 152 | --- 153 | Comments: '' 154 | Direction: bidirectional 155 | Lex: insular 156 | Regex: '' 157 | Type: BRAIN_ANATOMY 158 | --- 159 | Comments: '' 160 | Direction: bidirectional 161 | Lex: internal capsule 162 | Regex: (internal capsule|internal capsule radiations|capsula interna) 163 | Type: BRAIN_ANATOMY 164 | --- 165 | Comments: '' 166 | Direction: bidirectional 167 | Lex: lentiform 168 | Regex: '' 169 | Type: BRAIN_ANATOMY 170 | --- 171 | Comments: '' 172 | Direction: bidirectional 173 | Lex: M1 174 | Regex: '' 175 | Type: BRAIN_ANATOMY 176 | --- 177 | Comments: '' 178 | Direction: bidirectional 179 | Lex: M2 180 | Regex: '' 181 | Type: BRAIN_ANATOMY 182 | --- 183 | Comments: '' 184 | Direction: bidirectional 185 | Lex: MCA 186 | Regex: '' 187 | Type: BRAIN_ANATOMY 188 | --- 189 | Comments: '' 190 | Direction: bidirectional 191 | Lex: nuclei 192 | Regex: nucle(i|us) 193 | Type: BRAIN_ANATOMY 194 | --- 195 | Comments: '' 196 | Direction: bidirectional 197 | Lex: P1 198 | Regex: '' 199 | Type: BRAIN_ANATOMY 200 | --- 201 | Comments: '' 202 | Direction: bidirectional 203 | Lex: P2 204 | Regex: '' 205 | Type: BRAIN_ANATOMY 206 | --- 207 | Comments: '' 208 | Direction: bidirectional 209 | Lex: PCA 210 | Regex: '' 211 | Type: BRAIN_ANATOMY 212 | --- 213 | Comments: '' 214 | Direction: bidirectional 215 | Lex: pons 216 | Regex: '' 217 | Type: BRAIN_ANATOMY 218 | --- 219 | Comments: '' 220 | Direction: bidirectional 221 | Lex: putamen 222 | Regex: '' 223 | Type: BRAIN_ANATOMY 224 | --- 225 | Comments: '' 226 | Direction: bidirectional 227 | Lex: semiovale 228 | Regex: (semiovale center|semiovale|semi-ovale|medullary center|white matter of cerebrum|centrum 229 | semiovale|substantia centralis medullaris cerebri|corpus medullare cerebri|centrum 230 | ovale) 231 | Type: BRAIN_ANATOMY 232 | --- 233 | Comments: '' 234 | Direction: bidirectional 235 | Lex: sulcus 236 | Regex: '' 237 | Type: BRAIN_ANATOMY 238 | --- 239 | Comments: '' 240 | Direction: bidirectional 241 | Lex: territorial 242 | Regex: '' 243 | Type: BRAIN_ANATOMY 244 | --- 245 | Comments: '' 246 | Direction: bidirectional 247 | Lex: territory 248 | Regex: '' 249 | Type: BRAIN_ANATOMY 250 | --- 251 | Comments: '' 252 | Direction: backward 253 | Lex: measuring X.X cm 254 | Regex: (measuring|diameter of|measured) (?P[0-9]+(\.[0-9]*)?)( )?(?P(cm|mm)) 255 | Type: SEVERITY 256 | --- 257 | Comments: '' 258 | Direction: backward 259 | Lex: measuring XxY cm 260 | Regex: (measuring|diameter of|measured) (?P[0-9]+(\.[0-9]*)?( )?(x|by)( )?[0-9]+(\.[0-9]*)?)( 261 | )?(?P(cm|mm)) 262 | Type: SEVERITY 263 | --- 264 | Comments: '' 265 | Direction: forward 266 | Lex: X.Y cm 267 | Regex: (?P[0-9]+(\.[0-9]*)?)( )?(?P(cm|mm)) 268 | Type: SEVERITY 269 | --- 270 | Comments: '' 271 | Direction: forward 272 | Lex: XxY cm 273 | Regex: (?P[0-9]+(\.[0-9]*)?( )?(x|by)( )?[0-9]+(\.[0-9]*)?)( )?(?P(cm|mm)) 274 | Type: SEVERITY 275 | --- 276 | Comments: '' 277 | Direction: bidirectional 278 | Lex: anterior 279 | Regex: '' 280 | Type: HEART_ANATOMY 281 | --- 282 | Comments: '' 283 | Direction: bidirectional 284 | Lex: septal 285 | Regex: '' 286 | Type: HEART_ANATOMY 287 | --- 288 | Comments: '' 289 | Direction: bidirectional 290 | Lex: septum 291 | Regex: (cardiac septum|septum) 292 | Type: HEART_ANATOMY 293 | --- 294 | Comments: '' 295 | Direction: bidirectional 296 | Lex: inferior 297 | Regex: '' 298 | Type: HEART_ANATOMY 299 | --- 300 | Comments: '' 301 | Direction: bidirectional 302 | Lex: heart apex 303 | Regex: (cardiac apex|apex of (the )?heart) 304 | Type: HEART_ANATOMY 305 | --- 306 | Comments: '' 307 | Direction: bidirectional 308 | Lex: inferolateral 309 | Regex: '' 310 | Type: HEART_ANATOMY 311 | --- 312 | Comments: '' 313 | Direction: bidirectional 314 | Lex: 'lateral ' 315 | Regex: '' 316 | Type: HEART_ANATOMY 317 | --- 318 | Comments: '' 319 | Direction: bidirectional 320 | Lex: 'anteroseptal ' 321 | Regex: '' 322 | Type: HEART_ANATOMY 323 | --- 324 | Comments: '' 325 | Direction: bidirectional 326 | Lex: transmural 327 | Regex: '' 328 | Type: HEART_ANATOMY 329 | --- 330 | Comments: '' 331 | Direction: bidirectional 332 | Lex: wall 333 | Regex: '' 334 | Type: HEART_ANATOMY 335 | --- 336 | Comments: '' 337 | Direction: bidirectional 338 | Lex: left ventricular wall 339 | Regex: (\blv\b|left ventricular)( wall)? 340 | Type: HEART_ANATOMY 341 | --- 342 | Comments: '' 343 | Direction: bidirectional 344 | Lex: right ventricular wall 345 | Regex: (\brv\b|right ventricular)( wall)? 346 | Type: HEART_ANATOMY 347 | --- 348 | Comments: '' 349 | Direction: bidirectional 350 | Lex: myocardial 351 | Regex: (myocardial|myocardium) 352 | Type: HEART_ANATOMY 353 | --- 354 | Comments: '' 355 | Direction: forward 356 | Lex: cardiac 357 | Regex: (cardiac|heart) 358 | Type: HEART_ANATOMY 359 | --- 360 | Comments: '' 361 | Direction: bidirectional 362 | Lex: omental 363 | Regex: (omentum|omental) 364 | Type: OTHER_CRITICAL_ANATOMY 365 | --- 366 | Comments: '' 367 | Direction: bidirectional 368 | Lex: spleen 369 | Regex: (spleen|splenic) 370 | Type: OTHER_CRITICAL_ANATOMY 371 | --- 372 | Comments: '' 373 | Direction: bidirectional 374 | Lex: kidney 375 | Regex: (kidney|\brenal\b) 376 | Type: OTHER_CRITICAL_ANATOMY 377 | --- 378 | Comments: '' 379 | Direction: bidirectional 380 | Lex: testis 381 | Regex: (testis|testicular|testes) 382 | Type: OTHER_CRITICAL_ANATOMY 383 | --- 384 | Comments: '' 385 | Direction: bidirectional 386 | Lex: ovary 387 | Regex: (ovary|ovaries|ovarian) 388 | Type: OTHER_CRITICAL_ANATOMY 389 | --- 390 | Comments: '' 391 | Direction: bidirectional 392 | Lex: azygos vein 393 | Regex: '' 394 | Type: AZYGOS_VEIN 395 | --- 396 | Comments: '' 397 | Direction: bidirectional 398 | Lex: azygos arch 399 | Regex: '' 400 | Type: AZYGOS_VEIN 401 | --- 402 | Comments: '' 403 | Direction: bidirectional 404 | Lex: inferior vena cava 405 | Regex: '' 406 | Type: LOWER_DEEP_VEIN 407 | --- 408 | Comments: '' 409 | Direction: bidirectional 410 | Lex: common iliac vein 411 | Regex: '' 412 | Type: LOWER_DEEP_VEIN 413 | --- 414 | Comments: '' 415 | Direction: bidirectional 416 | Lex: internal iliac vein 417 | Regex: '' 418 | Type: LOWER_DEEP_VEIN 419 | --- 420 | Comments: '' 421 | Direction: bidirectional 422 | Lex: external iliac vein 423 | Regex: '' 424 | Type: LOWER_DEEP_VEIN 425 | --- 426 | Comments: '' 427 | Direction: bidirectional 428 | Lex: common femoral vein 429 | Regex: '' 430 | Type: LOWER_DEEP_VEIN 431 | --- 432 | Comments: '' 433 | Direction: bidirectional 434 | Lex: femoral vein 435 | Regex: '' 436 | Type: LOWER_DEEP_VEIN 437 | --- 438 | Comments: '' 439 | Direction: bidirectional 440 | Lex: popliteal vein 441 | Regex: '' 442 | Type: LOWER_DEEP_VEIN 443 | --- 444 | Comments: '' 445 | Direction: bidirectional 446 | Lex: anterior tibial vein 447 | Regex: '' 448 | Type: LOWER_DEEP_VEIN 449 | --- 450 | Comments: '' 451 | Direction: bidirectional 452 | Lex: peroneal vein 453 | Regex: '' 454 | Type: LOWER_DEEP_VEIN 455 | --- 456 | Comments: '' 457 | Direction: bidirectional 458 | Lex: posterior tibial vein 459 | Regex: '' 460 | Type: LOWER_DEEP_VEIN 461 | --- 462 | Comments: '' 463 | Direction: bidirectional 464 | Lex: superior vena cava 465 | Regex: '' 466 | Type: UPPER_DEEP_VEIN 467 | --- 468 | Comments: '' 469 | Direction: bidirectional 470 | Lex: brachiocephalic vein 471 | Regex: '' 472 | Type: UPPER_DEEP_VEIN 473 | --- 474 | Comments: '' 475 | Direction: bidirectional 476 | Lex: subclavian vein 477 | Regex: '' 478 | Type: UPPER_DEEP_VEIN 479 | --- 480 | Comments: '' 481 | Direction: bidirectional 482 | Lex: axillary vein 483 | Regex: '' 484 | Type: UPPER_DEEP_VEIN 485 | --- 486 | Comments: '' 487 | Direction: bidirectional 488 | Lex: brachial vein 489 | Regex: '' 490 | Type: UPPER_DEEP_VEIN 491 | --- 492 | Comments: '' 493 | Direction: bidirectional 494 | Lex: external jugular vein 495 | Regex: '' 496 | Type: UPPER_DEEP_VEIN 497 | --- 498 | Comments: '' 499 | Direction: bidirectional 500 | Lex: anterior jugular vein 501 | Regex: '' 502 | Type: UPPER_DEEP_VEIN 503 | --- 504 | Comments: '' 505 | Direction: bidirectional 506 | Lex: jugular venous arch 507 | Regex: '' 508 | Type: UPPER_DEEP_VEIN 509 | --- 510 | Comments: '' 511 | Direction: bidirectional 512 | Lex: internal jugular vein 513 | Regex: '' 514 | Type: UPPER_DEEP_VEIN 515 | --- 516 | Comments: '' 517 | Direction: bidirectional 518 | Lex: hepatic vein 519 | Regex: '' 520 | Type: HEPATIC_VEIN 521 | --- 522 | Comments: '' 523 | Direction: bidirectional 524 | Lex: portal vein 525 | Regex: '' 526 | Type: PORTAL_SYSTEM_VEIN 527 | --- 528 | Comments: '' 529 | Direction: bidirectional 530 | Lex: splenic vein 531 | Regex: '' 532 | Type: PORTAL_SYSTEM_VEIN 533 | --- 534 | Comments: '' 535 | Direction: bidirectional 536 | Lex: inferior mesenteric vein 537 | Regex: '' 538 | Type: PORTAL_SYSTEM_VEIN 539 | --- 540 | Comments: '' 541 | Direction: bidirectional 542 | Lex: superior mesenteric vein 543 | Regex: '' 544 | Type: PORTAL_SYSTEM_VEIN 545 | --- 546 | Comments: '' 547 | Direction: bidirectional 548 | Lex: pulmonary vein 549 | Regex: '' 550 | Type: PULMONARY_VEIN 551 | --- 552 | Comments: '' 553 | Direction: bidirectional 554 | Lex: renal vein 555 | Regex: '' 556 | Type: RENAL_VEIN 557 | --- 558 | Comments: '' 559 | Direction: bidirectional 560 | Lex: sigmoid sinus 561 | Regex: '' 562 | Type: SINUS_VEIN 563 | --- 564 | Comments: '' 565 | Direction: bidirectional 566 | Lex: inferior petrosal sinus 567 | Regex: '' 568 | Type: SINUS_VEIN 569 | --- 570 | Comments: '' 571 | Direction: bidirectional 572 | Lex: cavernous sinus 573 | Regex: '' 574 | Type: SINUS_VEIN 575 | --- 576 | Comments: '' 577 | Direction: bidirectional 578 | Lex: greater saphenous vein 579 | Regex: '' 580 | Type: LOWER_SUPERFICIAL_VEIN 581 | --- 582 | Comments: '' 583 | Direction: bidirectional 584 | Lex: lesser saphenous vein 585 | Regex: '' 586 | Type: LOWER_SUPERFICIAL_VEIN 587 | --- 588 | Comments: '' 589 | Direction: bidirectional 590 | Lex: basilic vein 591 | Regex: '' 592 | Type: UPPER_SUPERFICIAL_VEIN 593 | --- 594 | Comments: '' 595 | Direction: bidirectional 596 | Lex: cephalic vein 597 | Regex: '' 598 | Type: UPPER_SUPERFICIAL_VEIN 599 | --- 600 | Comments: '' 601 | Direction: bidirectional 602 | Lex: varicocele 603 | Regex: '' 604 | Type: VARICOCELE 605 | --- 606 | Comments: '' 607 | Direction: bidirectional 608 | Lex: varicous vein 609 | Regex: '' 610 | Type: UPPER_SUPERFICIAL_VEIN 611 | -------------------------------------------------------------------------------- /KB/lexical_kb_nlm.tsv: -------------------------------------------------------------------------------- 1 | Lex Type Regex Direction Unnamed: 4 Unnamed: 5 Unnamed: 6 Codes 2 | although CONJ terminate 3 | apart from CONJ terminate 4 | as a cause for CONJ terminate 5 | as a cause of CONJ terminate 6 | as a etiology for CONJ terminate 7 | as a etiology of CONJ terminate 8 | as a reason for CONJ terminate 9 | as a reason of CONJ terminate 10 | as a secondary cause for CONJ terminate 11 | as a secondary cause of CONJ terminate 12 | as a secondary etiology for CONJ terminate 13 | as a secondary etiology of CONJ terminate 14 | as a secondary origin for CONJ terminate 15 | as a secondary origin of CONJ terminate 16 | as a secondary reason for CONJ terminate 17 | as a secondary reason of CONJ terminate 18 | as a secondary source for CONJ terminate 19 | as a secondary source of CONJ terminate 20 | as a source for CONJ terminate 21 | as a source of CONJ terminate 22 | as an cause for CONJ terminate 23 | as an cause of CONJ terminate 24 | as an etiology for CONJ terminate 25 | as an etiology of CONJ terminate 26 | as an origin for CONJ terminate 27 | as an origin of CONJ terminate 28 | as an reason for CONJ terminate 29 | as an reason of CONJ terminate 30 | as an secondary cause for CONJ terminate 31 | as an secondary cause of CONJ terminate 32 | as an secondary etiology for CONJ terminate 33 | as an secondary etiology of CONJ terminate 34 | as an secondary origin for CONJ terminate 35 | as an secondary origin of CONJ terminate 36 | as an secondary reason for CONJ terminate 37 | as an secondary reason of CONJ terminate 38 | as an secondary source for CONJ terminate 39 | as an secondary source of CONJ terminate 40 | as an source for CONJ terminate 41 | as an source of CONJ terminate 42 | as the cause for CONJ terminate 43 | as the cause of CONJ terminate 44 | as the etiology for CONJ terminate 45 | as the etiology of CONJ terminate 46 | as the origin for CONJ terminate 47 | as the origin of CONJ terminate 48 | as the reason for CONJ terminate 49 | as the reason of CONJ terminate 50 | as the secondary cause for CONJ terminate 51 | as the secondary cause of CONJ terminate 52 | as the secondary etiology for CONJ terminate 53 | as the secondary etiology of CONJ terminate 54 | as the secondary origin for CONJ terminate 55 | as the secondary origin of CONJ terminate 56 | as the secondary reason for CONJ terminate 57 | as the secondary reason of CONJ terminate 58 | as the secondary source for CONJ terminate 59 | as the secondary source of CONJ terminate 60 | as the source for CONJ terminate 61 | as the source of CONJ terminate 62 | as there are CONJ terminate 2/14/2013 63 | aside from CONJ terminate 64 | but CONJ terminate 65 | cause for CONJ terminate 66 | cause of CONJ terminate 67 | causes for CONJ terminate 68 | causes of CONJ terminate 69 | etiology for CONJ terminate 70 | etiology of CONJ terminate 71 | except CONJ terminate 72 | however CONJ terminate 73 | involving CONJ terminate 74 | nevertheless CONJ terminate 75 | origin for CONJ terminate 76 | origin of CONJ terminate 77 | origins for CONJ terminate 78 | origins of CONJ terminate 79 | other possibilities of CONJ terminate 80 | reason for CONJ terminate 81 | reason of CONJ terminate 82 | reasons for CONJ terminate 83 | reasons of CONJ terminate 84 | secondary to CONJ terminate 85 | source for CONJ terminate 86 | source of CONJ terminate 87 | sources for CONJ terminate 88 | sources of CONJ terminate 89 | still CONJ terminate 90 | though CONJ terminate 91 | trigger event for CONJ terminate 92 | which CONJ terminate 93 | yet CONJ terminate 94 | -------------------------------------------------------------------------------- /KB/lexical_kb_nlm.yml: -------------------------------------------------------------------------------- 1 | Comments: '' 2 | Direction: terminate 3 | Lex: although 4 | Regex: '' 5 | Type: CONJ 6 | --- 7 | Comments: '' 8 | Direction: terminate 9 | Lex: apart from 10 | Regex: '' 11 | Type: CONJ 12 | --- 13 | Comments: '' 14 | Direction: terminate 15 | Lex: as a cause for 16 | Regex: '' 17 | Type: CONJ 18 | --- 19 | Comments: '' 20 | Direction: terminate 21 | Lex: as a cause of 22 | Regex: '' 23 | Type: CONJ 24 | --- 25 | Comments: '' 26 | Direction: terminate 27 | Lex: as a etiology for 28 | Regex: '' 29 | Type: CONJ 30 | --- 31 | Comments: '' 32 | Direction: terminate 33 | Lex: as a etiology of 34 | Regex: '' 35 | Type: CONJ 36 | --- 37 | Comments: '' 38 | Direction: terminate 39 | Lex: as a reason for 40 | Regex: '' 41 | Type: CONJ 42 | --- 43 | Comments: '' 44 | Direction: terminate 45 | Lex: as a reason of 46 | Regex: '' 47 | Type: CONJ 48 | --- 49 | Comments: '' 50 | Direction: terminate 51 | Lex: as a secondary cause for 52 | Regex: '' 53 | Type: CONJ 54 | --- 55 | Comments: '' 56 | Direction: terminate 57 | Lex: as a secondary cause of 58 | Regex: '' 59 | Type: CONJ 60 | --- 61 | Comments: '' 62 | Direction: terminate 63 | Lex: as a secondary etiology for 64 | Regex: '' 65 | Type: CONJ 66 | --- 67 | Comments: '' 68 | Direction: terminate 69 | Lex: as a secondary etiology of 70 | Regex: '' 71 | Type: CONJ 72 | --- 73 | Comments: '' 74 | Direction: terminate 75 | Lex: as a secondary origin for 76 | Regex: '' 77 | Type: CONJ 78 | --- 79 | Comments: '' 80 | Direction: terminate 81 | Lex: as a secondary origin of 82 | Regex: '' 83 | Type: CONJ 84 | --- 85 | Comments: '' 86 | Direction: terminate 87 | Lex: as a secondary reason for 88 | Regex: '' 89 | Type: CONJ 90 | --- 91 | Comments: '' 92 | Direction: terminate 93 | Lex: as a secondary reason of 94 | Regex: '' 95 | Type: CONJ 96 | --- 97 | Comments: '' 98 | Direction: terminate 99 | Lex: as a secondary source for 100 | Regex: '' 101 | Type: CONJ 102 | --- 103 | Comments: '' 104 | Direction: terminate 105 | Lex: as a secondary source of 106 | Regex: '' 107 | Type: CONJ 108 | --- 109 | Comments: '' 110 | Direction: terminate 111 | Lex: as a source for 112 | Regex: '' 113 | Type: CONJ 114 | --- 115 | Comments: '' 116 | Direction: terminate 117 | Lex: as a source of 118 | Regex: '' 119 | Type: CONJ 120 | --- 121 | Comments: '' 122 | Direction: terminate 123 | Lex: as an cause for 124 | Regex: '' 125 | Type: CONJ 126 | --- 127 | Comments: '' 128 | Direction: terminate 129 | Lex: as an cause of 130 | Regex: '' 131 | Type: CONJ 132 | --- 133 | Comments: '' 134 | Direction: terminate 135 | Lex: as an etiology for 136 | Regex: '' 137 | Type: CONJ 138 | --- 139 | Comments: '' 140 | Direction: terminate 141 | Lex: as an etiology of 142 | Regex: '' 143 | Type: CONJ 144 | --- 145 | Comments: '' 146 | Direction: terminate 147 | Lex: as an origin for 148 | Regex: '' 149 | Type: CONJ 150 | --- 151 | Comments: '' 152 | Direction: terminate 153 | Lex: as an origin of 154 | Regex: '' 155 | Type: CONJ 156 | --- 157 | Comments: '' 158 | Direction: terminate 159 | Lex: as an reason for 160 | Regex: '' 161 | Type: CONJ 162 | --- 163 | Comments: '' 164 | Direction: terminate 165 | Lex: as an reason of 166 | Regex: '' 167 | Type: CONJ 168 | --- 169 | Comments: '' 170 | Direction: terminate 171 | Lex: as an secondary cause for 172 | Regex: '' 173 | Type: CONJ 174 | --- 175 | Comments: '' 176 | Direction: terminate 177 | Lex: as an secondary cause of 178 | Regex: '' 179 | Type: CONJ 180 | --- 181 | Comments: '' 182 | Direction: terminate 183 | Lex: as an secondary etiology for 184 | Regex: '' 185 | Type: CONJ 186 | --- 187 | Comments: '' 188 | Direction: terminate 189 | Lex: as an secondary etiology of 190 | Regex: '' 191 | Type: CONJ 192 | --- 193 | Comments: '' 194 | Direction: terminate 195 | Lex: as an secondary origin for 196 | Regex: '' 197 | Type: CONJ 198 | --- 199 | Comments: '' 200 | Direction: terminate 201 | Lex: as an secondary origin of 202 | Regex: '' 203 | Type: CONJ 204 | --- 205 | Comments: '' 206 | Direction: terminate 207 | Lex: as an secondary reason for 208 | Regex: '' 209 | Type: CONJ 210 | --- 211 | Comments: '' 212 | Direction: terminate 213 | Lex: as an secondary reason of 214 | Regex: '' 215 | Type: CONJ 216 | --- 217 | Comments: '' 218 | Direction: terminate 219 | Lex: as an secondary source for 220 | Regex: '' 221 | Type: CONJ 222 | --- 223 | Comments: '' 224 | Direction: terminate 225 | Lex: as an secondary source of 226 | Regex: '' 227 | Type: CONJ 228 | --- 229 | Comments: '' 230 | Direction: terminate 231 | Lex: as an source for 232 | Regex: '' 233 | Type: CONJ 234 | --- 235 | Comments: '' 236 | Direction: terminate 237 | Lex: as an source of 238 | Regex: '' 239 | Type: CONJ 240 | --- 241 | Comments: '' 242 | Direction: terminate 243 | Lex: as the cause for 244 | Regex: '' 245 | Type: CONJ 246 | --- 247 | Comments: '' 248 | Direction: terminate 249 | Lex: as the cause of 250 | Regex: '' 251 | Type: CONJ 252 | --- 253 | Comments: '' 254 | Direction: terminate 255 | Lex: as the etiology for 256 | Regex: '' 257 | Type: CONJ 258 | --- 259 | Comments: '' 260 | Direction: terminate 261 | Lex: as the etiology of 262 | Regex: '' 263 | Type: CONJ 264 | --- 265 | Comments: '' 266 | Direction: terminate 267 | Lex: as the origin for 268 | Regex: '' 269 | Type: CONJ 270 | --- 271 | Comments: '' 272 | Direction: terminate 273 | Lex: as the origin of 274 | Regex: '' 275 | Type: CONJ 276 | --- 277 | Comments: '' 278 | Direction: terminate 279 | Lex: as the reason for 280 | Regex: '' 281 | Type: CONJ 282 | --- 283 | Comments: '' 284 | Direction: terminate 285 | Lex: as the reason of 286 | Regex: '' 287 | Type: CONJ 288 | --- 289 | Comments: '' 290 | Direction: terminate 291 | Lex: as the secondary cause for 292 | Regex: '' 293 | Type: CONJ 294 | --- 295 | Comments: '' 296 | Direction: terminate 297 | Lex: as the secondary cause of 298 | Regex: '' 299 | Type: CONJ 300 | --- 301 | Comments: '' 302 | Direction: terminate 303 | Lex: as the secondary etiology for 304 | Regex: '' 305 | Type: CONJ 306 | --- 307 | Comments: '' 308 | Direction: terminate 309 | Lex: as the secondary etiology of 310 | Regex: '' 311 | Type: CONJ 312 | --- 313 | Comments: '' 314 | Direction: terminate 315 | Lex: as the secondary origin for 316 | Regex: '' 317 | Type: CONJ 318 | --- 319 | Comments: '' 320 | Direction: terminate 321 | Lex: as the secondary origin of 322 | Regex: '' 323 | Type: CONJ 324 | --- 325 | Comments: '' 326 | Direction: terminate 327 | Lex: as the secondary reason for 328 | Regex: '' 329 | Type: CONJ 330 | --- 331 | Comments: '' 332 | Direction: terminate 333 | Lex: as the secondary reason of 334 | Regex: '' 335 | Type: CONJ 336 | --- 337 | Comments: '' 338 | Direction: terminate 339 | Lex: as the secondary source for 340 | Regex: '' 341 | Type: CONJ 342 | --- 343 | Comments: '' 344 | Direction: terminate 345 | Lex: as the secondary source of 346 | Regex: '' 347 | Type: CONJ 348 | --- 349 | Comments: '' 350 | Direction: terminate 351 | Lex: as the source for 352 | Regex: '' 353 | Type: CONJ 354 | --- 355 | Comments: '' 356 | Direction: terminate 357 | Lex: as the source of 358 | Regex: '' 359 | Type: CONJ 360 | --- 361 | Comments: 2/14/2013 362 | Direction: terminate 363 | Lex: 'as there are ' 364 | Regex: '' 365 | Type: CONJ 366 | --- 367 | Comments: '' 368 | Direction: terminate 369 | Lex: aside from 370 | Regex: '' 371 | Type: CONJ 372 | --- 373 | Comments: '' 374 | Direction: terminate 375 | Lex: but 376 | Regex: '' 377 | Type: CONJ 378 | --- 379 | Comments: '' 380 | Direction: terminate 381 | Lex: cause for 382 | Regex: '' 383 | Type: CONJ 384 | --- 385 | Comments: '' 386 | Direction: terminate 387 | Lex: cause of 388 | Regex: '' 389 | Type: CONJ 390 | --- 391 | Comments: '' 392 | Direction: terminate 393 | Lex: causes for 394 | Regex: '' 395 | Type: CONJ 396 | --- 397 | Comments: '' 398 | Direction: terminate 399 | Lex: causes of 400 | Regex: '' 401 | Type: CONJ 402 | --- 403 | Comments: '' 404 | Direction: terminate 405 | Lex: etiology for 406 | Regex: '' 407 | Type: CONJ 408 | --- 409 | Comments: '' 410 | Direction: terminate 411 | Lex: etiology of 412 | Regex: '' 413 | Type: CONJ 414 | --- 415 | Comments: '' 416 | Direction: terminate 417 | Lex: except 418 | Regex: '' 419 | Type: CONJ 420 | --- 421 | Comments: '' 422 | Direction: terminate 423 | Lex: however 424 | Regex: '' 425 | Type: CONJ 426 | --- 427 | Comments: '' 428 | Direction: terminate 429 | Lex: involving 430 | Regex: '' 431 | Type: CONJ 432 | --- 433 | Comments: '' 434 | Direction: terminate 435 | Lex: nevertheless 436 | Regex: '' 437 | Type: CONJ 438 | --- 439 | Comments: '' 440 | Direction: terminate 441 | Lex: origin for 442 | Regex: '' 443 | Type: CONJ 444 | --- 445 | Comments: '' 446 | Direction: terminate 447 | Lex: origin of 448 | Regex: '' 449 | Type: CONJ 450 | --- 451 | Comments: '' 452 | Direction: terminate 453 | Lex: origins for 454 | Regex: '' 455 | Type: CONJ 456 | --- 457 | Comments: '' 458 | Direction: terminate 459 | Lex: origins of 460 | Regex: '' 461 | Type: CONJ 462 | --- 463 | Comments: '' 464 | Direction: terminate 465 | Lex: other possibilities of 466 | Regex: '' 467 | Type: CONJ 468 | --- 469 | Comments: '' 470 | Direction: terminate 471 | Lex: reason for 472 | Regex: '' 473 | Type: CONJ 474 | --- 475 | Comments: '' 476 | Direction: terminate 477 | Lex: reason of 478 | Regex: '' 479 | Type: CONJ 480 | --- 481 | Comments: '' 482 | Direction: terminate 483 | Lex: reasons for 484 | Regex: '' 485 | Type: CONJ 486 | --- 487 | Comments: '' 488 | Direction: terminate 489 | Lex: reasons of 490 | Regex: '' 491 | Type: CONJ 492 | --- 493 | Comments: '' 494 | Direction: terminate 495 | Lex: secondary to 496 | Regex: '' 497 | Type: CONJ 498 | --- 499 | Comments: '' 500 | Direction: terminate 501 | Lex: source for 502 | Regex: '' 503 | Type: CONJ 504 | --- 505 | Comments: '' 506 | Direction: terminate 507 | Lex: source of 508 | Regex: '' 509 | Type: CONJ 510 | --- 511 | Comments: '' 512 | Direction: terminate 513 | Lex: sources for 514 | Regex: '' 515 | Type: CONJ 516 | --- 517 | Comments: '' 518 | Direction: terminate 519 | Lex: sources of 520 | Regex: '' 521 | Type: CONJ 522 | --- 523 | Comments: '' 524 | Direction: terminate 525 | Lex: still 526 | Regex: '' 527 | Type: CONJ 528 | --- 529 | Comments: '' 530 | Direction: terminate 531 | Lex: though 532 | Regex: '' 533 | Type: CONJ 534 | --- 535 | Comments: '' 536 | Direction: terminate 537 | Lex: trigger event for 538 | Regex: '' 539 | Type: CONJ 540 | --- 541 | Comments: '' 542 | Direction: terminate 543 | Lex: which 544 | Regex: '' 545 | Type: CONJ 546 | --- 547 | Comments: '' 548 | Direction: terminate 549 | Lex: yet 550 | Regex: '' 551 | Type: CONJ 552 | -------------------------------------------------------------------------------- /KB/pah_utah.tsv: -------------------------------------------------------------------------------- 1 | Lex Type Regex Direction 2 | hypertension PULMONARY_ARTERIAL_HYPERTENSION \bhypertension\b 3 | pah PULMONARY_ARTERIAL_HYPERTENSION \bpah\b 4 | pulmonary hypertension PULMONARY_ARTERIAL_ANATOMY pulmonary\s(arterial )?(hypertension) 5 | main pulmonary artery PULMONARY_ARTERIAL_ANATOMY 6 | pulmonary trunk PULMONARY_ARTERIAL_ANATOMY 7 | pulmonary artery PULMONARY_ARTERIAL_ANATOMY 8 | pulmonary arteries PULMONARY_ARTERIAL_ANATOMY 9 | mosaic PULMONARY_ARTERIAL_HYPERTENSION 10 | right heart CARDIAC_ANATOMY 11 | septum CARDIAC_ANATOMY 12 | enlargement PULMONARY_ARTERIAL_HYPERTENSION (tapering|enlargement) 13 | tapering PULMONARY_ARTERIAL_HYPERTENSION 14 | pruning PULMONARY_ARTERIAL_HYPERTENSION 15 | right heart strain PULMONARY_ARTERIAL_HYPERTENSION right heart (strain|failure) 16 | -------------------------------------------------------------------------------- /KB/pah_utah.txt: -------------------------------------------------------------------------------- 1 | Lex Type Regex Direction hypertension PULMONARY_ARTERIAL_HYPERTENSION \bhypertension\b pah PULMONARY_ARTERIAL_HYPERTENSION \bpah\b pulmonary hypertension PULMONARY_ARTERIAL_ANATOMY pulmonary\s(arterial )?(hypertension) main pulmonary artery PULMONARY_ARTERIAL_ANATOMY pulmonary trunk PULMONARY_ARTERIAL_ANATOMY mosaic PULMONARY_ARTERIAL_HYPERTENSION right heart CARDIAC_ANATOMY -------------------------------------------------------------------------------- /KB/pah_utah.yml: -------------------------------------------------------------------------------- 1 | Comments: '' 2 | Direction: '' 3 | Lex: hypertension 4 | Regex: \bhypertension\b 5 | Type: PULMONARY_ARTERIAL_HYPERTENSION 6 | --- 7 | Comments: '' 8 | Direction: '' 9 | Lex: pah 10 | Regex: \bpah\b 11 | Type: PULMONARY_ARTERIAL_HYPERTENSION 12 | --- 13 | Comments: '' 14 | Direction: '' 15 | Lex: pulmonary hypertension 16 | Regex: pulmonary\s(arterial )?(hypertension) 17 | Type: PULMONARY_ARTERIAL_ANATOMY 18 | --- 19 | Comments: '' 20 | Direction: ' ' 21 | Lex: main pulmonary artery 22 | Regex: '' 23 | Type: PULMONARY_ARTERIAL_ANATOMY 24 | --- 25 | Comments: '' 26 | Direction: '' 27 | Lex: pulmonary trunk 28 | Regex: '' 29 | Type: PULMONARY_ARTERIAL_ANATOMY 30 | --- 31 | Comments: '' 32 | Direction: '' 33 | Lex: pulmonary artery 34 | Regex: '' 35 | Type: PULMONARY_ARTERIAL_ANATOMY 36 | --- 37 | Comments: '' 38 | Direction: '' 39 | Lex: pulmonary arteries 40 | Regex: '' 41 | Type: PULMONARY_ARTERIAL_ANATOMY 42 | --- 43 | Comments: '' 44 | Direction: '' 45 | Lex: mosaic 46 | Regex: '' 47 | Type: PULMONARY_ARTERIAL_HYPERTENSION 48 | --- 49 | Comments: '' 50 | Direction: '' 51 | Lex: right heart 52 | Regex: '' 53 | Type: CARDIAC_ANATOMY 54 | --- 55 | Comments: '' 56 | Direction: '' 57 | Lex: septum 58 | Regex: '' 59 | Type: CARDIAC_ANATOMY 60 | --- 61 | Comments: '' 62 | Direction: '' 63 | Lex: enlargement 64 | Regex: (tapering|enlargement) 65 | Type: PULMONARY_ARTERIAL_HYPERTENSION 66 | --- 67 | Comments: '' 68 | Direction: '' 69 | Lex: tapering 70 | Regex: '' 71 | Type: PULMONARY_ARTERIAL_HYPERTENSION 72 | --- 73 | Comments: '' 74 | Direction: '' 75 | Lex: pruning 76 | Regex: '' 77 | Type: PULMONARY_ARTERIAL_HYPERTENSION 78 | --- 79 | Comments: '' 80 | Direction: '' 81 | Lex: right heart strain 82 | Regex: right heart (strain|failure) 83 | Type: PULMONARY_ARTERIAL_HYPERTENSION 84 | -------------------------------------------------------------------------------- /KB/pe_kb.tsv: -------------------------------------------------------------------------------- 1 | Lex Type Regex Direction 2 | embolism PULMONARY_EMBOLISM \b(emboli|embolism|embolus)\b 3 | pe PULMONARY_EMBOLISM \bpe\b 4 | pulmonary embolism PULMONARY_EMBOLISM pulmonary\s(artery )?(embol[a-z]+) 5 | bolus timing QUALITY_FEATURE "\bbolus[ -]{0,1}timing" bidirectional # fixes pedoc #129 dq 6 | limited exam QUALITY_FEATURE (suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing) bidirectional 7 | nondiagnostic exam QUALITY_FEATURE nondiagnostic (exam[a-z]*|study|scan|evaluation) bidirectional #fix for pedoc #231 8 | artifact ARTIFACT artifact(ual)? 9 | bulk motion ARTIFACT 10 | motion ARTIFACT 11 | patient motion ARTIFACT 12 | respiratory motion ARTIFACT 13 | thromboembolic disease thromboembolic disease -------------------------------------------------------------------------------- /KB/pe_kb.yml: -------------------------------------------------------------------------------- 1 | Comments: '' 2 | Direction: '' 3 | Lex: embolism 4 | Regex: \b(emboli|embolism|embolus)\b 5 | Type: PULMONARY_EMBOLISM 6 | --- 7 | Comments: '' 8 | Direction: '' 9 | Lex: pe 10 | Regex: \bpe\b 11 | Type: PULMONARY_EMBOLISM 12 | --- 13 | Comments: '' 14 | Direction: '' 15 | Lex: pulmonary embolism 16 | Regex: pulmonary\s(artery )?(embol[a-z]+) 17 | Type: PULMONARY_EMBOLISM 18 | --- 19 | Comments: ' # fixes pedoc #129 dq' 20 | Direction: bidirectional 21 | Lex: bolus timing 22 | Regex: \bbolus[ -]{0,1}timing 23 | Type: QUALITY_FEATURE 24 | --- 25 | Comments: '' 26 | Direction: bidirectional 27 | Lex: limited exam 28 | Regex: (suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing) 29 | Type: QUALITY_FEATURE 30 | --- 31 | Comments: ' #fix for pedoc #231' 32 | Direction: bidirectional 33 | Lex: nondiagnostic exam 34 | Regex: nondiagnostic (exam[a-z]*|study|scan|evaluation) 35 | Type: QUALITY_FEATURE 36 | --- 37 | Comments: '' 38 | Direction: '' 39 | Lex: artifact 40 | Regex: artifact(ual)? 41 | Type: ARTIFACT 42 | --- 43 | Comments: '' 44 | Direction: '' 45 | Lex: bulk motion 46 | Regex: '' 47 | Type: ARTIFACT 48 | --- 49 | Comments: '' 50 | Direction: '' 51 | Lex: motion 52 | Regex: '' 53 | Type: ARTIFACT 54 | --- 55 | Comments: '' 56 | Direction: '' 57 | Lex: patient motion 58 | Regex: '' 59 | Type: ARTIFACT 60 | --- 61 | Comments: '' 62 | Direction: '' 63 | Lex: respiratory motion 64 | Regex: '' 65 | Type: ARTIFACT 66 | --- 67 | Comments: '' 68 | Direction: '' 69 | Lex: thromboembolic disease 70 | Regex: '' 71 | Type: thromboembolic disease 72 | -------------------------------------------------------------------------------- /KB/pneumonia_targets.yml: -------------------------------------------------------------------------------- 1 | Comments: '' 2 | Direction: '' 3 | Lex: pneumonia 4 | Regex: \bpneumonia[s]?\b 5 | Type: EVIDENCE_OF_PNEUMONIA 6 | --- 7 | Comments: '' 8 | Direction: '' 9 | Lex: consolidation 10 | Regex: '' 11 | Type: EVIDENCE_OF_PNEUMONIA 12 | --- 13 | Comments: '' 14 | Direction: '' 15 | Lex: infiltrate 16 | Regex: '' 17 | Type: EVIDENCE_OF_PNEUMONIA 18 | -------------------------------------------------------------------------------- /KB/quality_artifacts.tsv: -------------------------------------------------------------------------------- 1 | Lex Type Regex Direction 2 | bolus timing QUALITY_FEATURE \bbolus[ -]{0,1}timing bidirectional # fixes pedoc #129 dq 3 | limited exam QUALITY_FEATURE (suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing) bidirectional 4 | nondiagnostic exam QUALITY_FEATURE nondiagnostic (exam[a-z]*|study|scan|evaluation) bidirectional #fix for pedoc #231 5 | artifact ARTIFACT artifact(ual)? 6 | bulk motion ARTIFACT 7 | motion ARTIFACT 8 | patient motion ARTIFACT 9 | respiratory motion ARTIFACT 10 | -------------------------------------------------------------------------------- /KB/quality_artifacts.yml: -------------------------------------------------------------------------------- 1 | Comments: ' # fixes pedoc #129 dq' 2 | Direction: bidirectional 3 | Lex: bolus timing 4 | Regex: \bbolus[ -]{0,1}timing 5 | Type: QUALITY_FEATURE 6 | --- 7 | Comments: '' 8 | Direction: bidirectional 9 | Lex: limited exam 10 | Regex: (suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing) 11 | Type: QUALITY_FEATURE 12 | --- 13 | Comments: ' #fix for pedoc #231' 14 | Direction: bidirectional 15 | Lex: nondiagnostic exam 16 | Regex: nondiagnostic (exam[a-z]*|study|scan|evaluation) 17 | Type: QUALITY_FEATURE 18 | --- 19 | Comments: '' 20 | Direction: '' 21 | Lex: artifact 22 | Regex: artifact(ual)? 23 | Type: ARTIFACT 24 | --- 25 | Comments: '' 26 | Direction: '' 27 | Lex: bulk motion 28 | Regex: '' 29 | Type: ARTIFACT 30 | --- 31 | Comments: '' 32 | Direction: '' 33 | Lex: motion 34 | Regex: '' 35 | Type: ARTIFACT 36 | --- 37 | Comments: '' 38 | Direction: '' 39 | Lex: patient motion 40 | Regex: '' 41 | Type: ARTIFACT 42 | --- 43 | Comments: '' 44 | Direction: '' 45 | Lex: respiratory motion 46 | Regex: '' 47 | Type: ARTIFACT 48 | -------------------------------------------------------------------------------- /KB/schema2.csv: -------------------------------------------------------------------------------- 1 | # Lines that start with the # symbol are comments and are ignored 2 | #The schema consists of a numeric value, followed by a label (e.g. "AMBIVALENT"), followed by a Python express that can evaluate to True or False 3 | #The Python expression uses LABELS from the rules. processReports.py will substitute the LABEL with any matched values identified from 4 | #the corresponding rules 5 | 1,AMBIVALENT,DISEASE_STATE == 2 6 | 2,Negative/Certain/Acute,DISEASE_STATE == 0 and CERTAINTY_STATE == 1 7 | 3,Negative/Uncertain/Chronic,DISEASE_STATE == 0 and CERTAINTY_STATE == 0 and ACUTE_STATE == 0 8 | 4,Positive/Uncertain/Chronic,DISEASE_STATE == 1 and CERTAINTY_STATE == 0 and ACUTE_STATE == 0 9 | 5,Positive/Certain/Chronic,DISEASE_STATE == 1 and CERTAINTY_STATE == 1 and ACUTE_STATE == 0 10 | 6,Negative/Uncertain/Acute,DISEASE_STATE == 0 and CERTAINTY_STATE == 0 11 | 7,Positive/Uncertain/Acute,DISEASE_STATE == 1 and CERTAINTY_STATE == 0 and ACUTE_STATE == 1 12 | 8,Positive/Certain/Acute,DISEASE_STATE == 1 and CERTAINTY_STATE == 1 and ACUTE_STATE == 1 13 | -------------------------------------------------------------------------------- /KB/utah_crit.tsv: -------------------------------------------------------------------------------- 1 | Lex Type Regex Direction Codes 2 | pulmonary embolism PULMONARY_EMBOLISM pulmonary\s(artery )?(embol[a-z]+)|\bpe\b|pulmonary thromboembolic disease Chest 3 | aneurysm ANEURYSM \baneurysm[a-z]*\b|(aneurysmal )?dilatation Chest, Neuro, ABD/Pel 4 | aortic dissection AORTIC_DISSECTION (aortic|aorta)\s(artery\s)?dissection Chest, ABD/Pel 5 | appendicitis APPENDICITIS ABD/Pel 6 | inflammation INFLAMMATION inflammation|inflammatory|infection Spine, Neuro, ABD/Pel, Extremity 7 | bowel obstruction BOWEL_OBSTRUCTION ABD/Pel 8 | midline shift BRAIN_HERNIATION ((subfalcian|subfalcine|tonsillar)\sherniation)|((midline|mid-line)\sshift) Neuro 9 | carotid dissection CAROTID_DISSECTION carotid?\s*?\w*\s*dissection Neuro 10 | intracranial hemorrhage INTRACRANIAL_HEMORRHAGE (cerebral|intracranial|brain)\s(hemorrhage|hematoma|bleed) Neuro 11 | fracture FRACTURE fracture(s)? Spine, Neuro, ABD/Pel, Chest, Extremity 12 | cholecystitis CHOLECYSTITIS ABD/Pel 13 | cord compression CORD_COMPRESSION cord compression Extremity, Spine, Neuro 14 | depressed skull fracture DEPRESSED_SKULL_FRACTURE Neuro 15 | diverticulitis DIVERTICULITIS ABD/Pel 16 | dvt DVT ((non.?)?occlusive)?\s?(thromb(us|i|osis|osed)\b|DVT|clot\b) 2/28/13 ABD/Pel 17 | ectasia ECTASIA (ectasia| ectatic) ABD/Pel 18 | ectopic pregnancy ECTOPIC_PREGNANCY ABD/Pel 19 | epiglottitis EPIGLOTTITIS Chest 20 | fetal demise FETAL_DEMISE ABD/Pel 21 | free air FREE_AIR (pneumoperitoneum|((intraperitoneal|free)\s(gas|air))) Chest, ABD/Pel 22 | infarct INFARCT \b(stroke|infarct|infarction)\b Neuro, ABD/Pel 23 | ischemic bowel ISCHEMIC_BOWEL ABD/Pel 24 | lacunar infarct LACUNAR_INFARCT Neuro 25 | mediastinal emphysema MEDIASTINAL_EMPHYSEMA (mediastinal emphysema|pneumomediastinum) Chest 26 | omental infarct OMENTAL_INFARCT ABD/Pel 27 | bone infarct OSTEONECROSIS (bone infarct|osteonecrosis) Extremity 28 | pneumothorax PNEUMOTHORAX pneumothorax|hydropneumothorax|pneumothoraces Chest 29 | portal venous air PORTAL_VENOUS_AIR portal (venous\s)?(gas|air) ABD/Pel 30 | renal infarct RENAL_INFARCT ABD/Pel 31 | retroperitoneal hemorrhage RETROPERITONEAL_HEMORRHAGE (retro|intra)?peritoneal\s(hemorrhage|hematoma|bleed) ABD/Pel 32 | retropharyngeal abscess RETROPHARYNGEAL_ABSCESS Chest 33 | ruptured aneurysm RUPTURED_ANEURYSM (ruptured aneurysm|aortic rupture) Chest, Neuro, ABD/Pel 34 | splenic infarct SPLENIC_INFARCT Chest, ABD/Pel 35 | torsion TORSION ABD/Pel 36 | volvulus VOLVULUS ABD/Pel 37 | pneumonia PNEUMONIA (pneumonia|consolidation|aspiration) Chest 38 | cancer CANCER cancer|metastatic(\sdisease|\slesion)?|metastases|carcinoma|sarcoma|malignancy Spine, Neuro, ABD/Pel, Chest, Extremity 39 | NO CRITICAL FINDING NULL_FINDING Spine, Neuro, ABD/Pel, Chest, Extremity 40 | 0:UNREVIEWED NULL_FINDING Spine, Neuro, ABD/Pel, Chest, Extremity 41 | thrombosis THROMBOSIS (thromb(us|i|osis|osed)\b|clob\b) Spine, Neuro, ABD/Pel, Extremity 42 | -------------------------------------------------------------------------------- /KB/utah_crit.yml: -------------------------------------------------------------------------------- 1 | Comments: Chest 2 | Direction: '' 3 | Lex: pulmonary embolism 4 | Regex: pulmonary\s(artery )?(embol[a-z]+)|\bpe\b|pulmonary thromboembolic disease 5 | Type: PULMONARY_EMBOLISM 6 | --- 7 | Comments: Chest, Neuro, ABD/Pel 8 | Direction: '' 9 | Lex: aneurysm 10 | Regex: \baneurysm[a-z]*\b|(aneurysmal )?dilatation 11 | Type: ANEURYSM 12 | --- 13 | Comments: Chest, ABD/Pel 14 | Direction: '' 15 | Lex: aortic dissection 16 | Regex: (aortic|aorta)\s(artery\s)?dissection 17 | Type: AORTIC_DISSECTION 18 | --- 19 | Comments: ABD/Pel 20 | Direction: '' 21 | Lex: appendicitis 22 | Regex: '' 23 | Type: APPENDICITIS 24 | --- 25 | Comments: Spine, Neuro, ABD/Pel, Extremity 26 | Direction: '' 27 | Lex: inflammation 28 | Regex: inflammation|inflammatory|infection 29 | Type: INFLAMMATION 30 | --- 31 | Comments: ABD/Pel 32 | Direction: '' 33 | Lex: bowel obstruction 34 | Regex: '' 35 | Type: BOWEL_OBSTRUCTION 36 | --- 37 | Comments: Neuro 38 | Direction: '' 39 | Lex: midline shift 40 | Regex: ((subfalcian|subfalcine|tonsillar)\sherniation)|((midline|mid-line)\sshift) 41 | Type: BRAIN_HERNIATION 42 | --- 43 | Comments: Neuro 44 | Direction: '' 45 | Lex: carotid dissection 46 | Regex: carotid?\s*?\w*\s*dissection 47 | Type: CAROTID_DISSECTION 48 | --- 49 | Comments: Neuro 50 | Direction: '' 51 | Lex: intracranial hemorrhage 52 | Regex: (cerebral|intracranial|brain)\s(hemorrhage|hematoma|bleed) 53 | Type: INTRACRANIAL_HEMORRHAGE 54 | --- 55 | Comments: Spine, Neuro, ABD/Pel, Chest, Extremity 56 | Direction: '' 57 | Lex: fracture 58 | Regex: fracture(s)? 59 | Type: FRACTURE 60 | --- 61 | Comments: ABD/Pel 62 | Direction: '' 63 | Lex: cholecystitis 64 | Regex: '' 65 | Type: CHOLECYSTITIS 66 | --- 67 | Comments: Extremity, Spine, Neuro 68 | Direction: '' 69 | Lex: cord compression 70 | Regex: cord compression 71 | Type: CORD_COMPRESSION 72 | --- 73 | Comments: Neuro 74 | Direction: '' 75 | Lex: depressed skull fracture 76 | Regex: '' 77 | Type: DEPRESSED_SKULL_FRACTURE 78 | --- 79 | Comments: ABD/Pel 80 | Direction: '' 81 | Lex: diverticulitis 82 | Regex: '' 83 | Type: DIVERTICULITIS 84 | --- 85 | Comments: ABD/Pel 86 | Direction: 2/28/13 87 | Lex: dvt 88 | Regex: ((non.?)?occlusive)?\s?(thromb(us|i|osis|osed)\b|DVT|clot\b) 89 | Type: DVT 90 | --- 91 | Comments: ABD/Pel 92 | Direction: '' 93 | Lex: ectasia 94 | Regex: (ectasia| ectatic) 95 | Type: ECTASIA 96 | --- 97 | Comments: ABD/Pel 98 | Direction: '' 99 | Lex: ectopic pregnancy 100 | Regex: '' 101 | Type: ECTOPIC_PREGNANCY 102 | --- 103 | Comments: Chest 104 | Direction: '' 105 | Lex: epiglottitis 106 | Regex: '' 107 | Type: EPIGLOTTITIS 108 | --- 109 | Comments: ABD/Pel 110 | Direction: '' 111 | Lex: fetal demise 112 | Regex: '' 113 | Type: FETAL_DEMISE 114 | --- 115 | Comments: Chest, ABD/Pel 116 | Direction: '' 117 | Lex: free air 118 | Regex: (pneumoperitoneum|((intraperitoneal|free)\s(gas|air))) 119 | Type: FREE_AIR 120 | --- 121 | Comments: Neuro, ABD/Pel 122 | Direction: '' 123 | Lex: infarct 124 | Regex: \b(stroke|infarct|infarction)\b 125 | Type: INFARCT 126 | --- 127 | Comments: ABD/Pel 128 | Direction: '' 129 | Lex: ischemic bowel 130 | Regex: '' 131 | Type: ISCHEMIC_BOWEL 132 | --- 133 | Comments: Neuro 134 | Direction: '' 135 | Lex: lacunar infarct 136 | Regex: '' 137 | Type: LACUNAR_INFARCT 138 | --- 139 | Comments: Chest 140 | Direction: '' 141 | Lex: mediastinal emphysema 142 | Regex: (mediastinal emphysema|pneumomediastinum) 143 | Type: MEDIASTINAL_EMPHYSEMA 144 | --- 145 | Comments: ABD/Pel 146 | Direction: '' 147 | Lex: omental infarct 148 | Regex: '' 149 | Type: OMENTAL_INFARCT 150 | --- 151 | Comments: Extremity 152 | Direction: '' 153 | Lex: bone infarct 154 | Regex: (bone infarct|osteonecrosis) 155 | Type: OSTEONECROSIS 156 | --- 157 | Comments: Chest 158 | Direction: '' 159 | Lex: pneumothorax 160 | Regex: pneumothorax|hydropneumothorax|pneumothoraces 161 | Type: PNEUMOTHORAX 162 | --- 163 | Comments: ABD/Pel 164 | Direction: '' 165 | Lex: portal venous air 166 | Regex: portal (venous\s)?(gas|air) 167 | Type: PORTAL_VENOUS_AIR 168 | --- 169 | Comments: ABD/Pel 170 | Direction: '' 171 | Lex: renal infarct 172 | Regex: '' 173 | Type: RENAL_INFARCT 174 | --- 175 | Comments: ABD/Pel 176 | Direction: '' 177 | Lex: retroperitoneal hemorrhage 178 | Regex: (retro|intra)?peritoneal\s(hemorrhage|hematoma|bleed) 179 | Type: RETROPERITONEAL_HEMORRHAGE 180 | --- 181 | Comments: Chest 182 | Direction: '' 183 | Lex: retropharyngeal abscess 184 | Regex: '' 185 | Type: RETROPHARYNGEAL_ABSCESS 186 | --- 187 | Comments: Chest, Neuro, ABD/Pel 188 | Direction: '' 189 | Lex: ruptured aneurysm 190 | Regex: (ruptured aneurysm|aortic rupture) 191 | Type: RUPTURED_ANEURYSM 192 | --- 193 | Comments: Chest, ABD/Pel 194 | Direction: '' 195 | Lex: splenic infarct 196 | Regex: '' 197 | Type: SPLENIC_INFARCT 198 | --- 199 | Comments: ABD/Pel 200 | Direction: '' 201 | Lex: torsion 202 | Regex: '' 203 | Type: TORSION 204 | --- 205 | Comments: ABD/Pel 206 | Direction: '' 207 | Lex: volvulus 208 | Regex: '' 209 | Type: VOLVULUS 210 | --- 211 | Comments: Chest 212 | Direction: '' 213 | Lex: pneumonia 214 | Regex: (pneumonia|consolidation|aspiration) 215 | Type: PNEUMONIA 216 | --- 217 | Comments: Spine, Neuro, ABD/Pel, Chest, Extremity 218 | Direction: '' 219 | Lex: cancer 220 | Regex: cancer|metastatic(\sdisease|\slesion)?|metastases|carcinoma|sarcoma|malignancy 221 | Type: CANCER 222 | --- 223 | Comments: Spine, Neuro, ABD/Pel, Chest, Extremity 224 | Direction: '' 225 | Lex: NO CRITICAL FINDING 226 | Regex: '' 227 | Type: NULL_FINDING 228 | --- 229 | Comments: Spine, Neuro, ABD/Pel, Chest, Extremity 230 | Direction: '' 231 | Lex: 0:UNREVIEWED 232 | Regex: '' 233 | Type: NULL_FINDING 234 | --- 235 | Comments: Spine, Neuro, ABD/Pel, Extremity 236 | Direction: '' 237 | Lex: thrombosis 238 | Regex: (thromb(us|i|osis|osed)\b|clob\b) 239 | Type: THROMBOSIS 240 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pyConTextNLP 2 | ============ 3 | 4 | This package has been in *de facto* retirement for many years. The direct successor to this project is `MedSpaCy `_. 5 | 6 | This code has been validated using the included notebooks on Python v 3.7.2. Python 2.x is no longer supported. 7 | 8 | pyConTextNLP is a Python implementation/extension/modification of the 9 | ConText algorithm described in `"Context: An Algorithm for Determining Negation, Experiencer, and Temporal Status from Clinical Reports" `_ which is itself a 10 | generalization of the NegEx algorithm described in `"A simple algorithm for identifying negated findings and diseases in discharge summaries" `_. 11 | 12 | The package is maintained by Brian Chapman at the University of Utah. 13 | Other active and past developers include: 14 | 15 | - Wendy W. Chapman 16 | - Glenn Dayton 17 | - Danielle Mowery 18 | 19 | Introduction 20 | ------------ 21 | 22 | pyConTextNLP is a partial implementation of the ConText algorithm using 23 | Python. The original description of pyConTextNLP was provided in Chapman 24 | BE, Lee S, Kang HP, Chapman WW, "Document-level classification of CT 25 | pulmonary angiography reports based on an extension of the ConText 26 | algorithm." `J Biomed Inform. 2011 27 | Oct;44(5):728-37 `__ 28 | 29 | Other publications/presentations based on pyConText include: \* Wilson 30 | RA, et al. "Automated ancillary cancer history classification for 31 | mesothelioma patients from free-text clinical reports." J Pathol Inform. 32 | 2010 Oct 11;1:24. \* Chapman BE, Lee S, Kang HP, Chapman WW. "Using 33 | ConText to Identify Candidate Pulmonary Embolism Subjects Based on 34 | Dictated Radiology Reports." (Presented at AMIA Clinical Research 35 | Informatics Summit 2011) \* Wilson RA, Chapman WW, DeFries SJ, Becich 36 | MJ, Chapman BE. "Identifying History of Ancillary Cancers in 37 | Mesothelioma Patients from Free-Text Clinical Reports." (Presented at 38 | AMIA 2010). 39 | 40 | Note: we changed the package name from pyConText to pyConTextNLP because 41 | of a name conflict on pypi. 42 | 43 | Installation 44 | ------------ 45 | 46 | Latest Version 47 | ~~~~~~~~~~~~~~ 48 | 49 | The latest version of pyConTextNLP is available on [github](https://github.com/chapmanbe/pyConTextNLP). 50 | The package can be installed by either cloning the repository and running `python setup.py install`. 51 | Alternatively or by 52 | .. code:: shell 53 | pip install git+https://github.com/chapmanbe/pyConTextNLP.git 54 | 55 | PyPi 56 | ~~~~~ 57 | 58 | pyConTextNLP is also available via the Python Package Index and can be installed via 59 | 60 | .. code:: shell 61 | 62 | pip install pyConTextNLP 63 | 64 | Dependencies include 65 | 66 | * networkx 67 | * PyYAML 68 | 69 | 70 | Tutorials 71 | --------- 72 | 73 | See the `notebooks folder <./notebooks>`__ for a series of walkthroughs 74 | demonstrating pyConTextNLP core concepts with example data. 75 | 76 | 77 | How to Use 78 | ---------- 79 | 80 | I am working on improving the documentation and (hopefully) adding some 81 | testing to the code. 82 | 83 | Some preliminary comments: 84 | 85 | - pyConTextNLP works marks up text on a sentence by sentence level. 86 | - pyConTextNLP assumes the sentence is a string not a list of words 87 | - Our preferred way to represent knowledge is now with YAML files rather than TSV files. 88 | 89 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | @echo " coverage to run coverage check of the documentation (if enabled)" 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | .PHONY: html 55 | html: 56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 57 | @echo 58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 59 | 60 | .PHONY: dirhtml 61 | dirhtml: 62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 63 | @echo 64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 65 | 66 | .PHONY: singlehtml 67 | singlehtml: 68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 69 | @echo 70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 71 | 72 | .PHONY: pickle 73 | pickle: 74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 75 | @echo 76 | @echo "Build finished; now you can process the pickle files." 77 | 78 | .PHONY: json 79 | json: 80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 81 | @echo 82 | @echo "Build finished; now you can process the JSON files." 83 | 84 | .PHONY: htmlhelp 85 | htmlhelp: 86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 87 | @echo 88 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 89 | ".hhp project file in $(BUILDDIR)/htmlhelp." 90 | 91 | .PHONY: qthelp 92 | qthelp: 93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 94 | @echo 95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pyConTextNLP.qhcp" 98 | @echo "To view the help file:" 99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pyConTextNLP.qhc" 100 | 101 | .PHONY: applehelp 102 | applehelp: 103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 104 | @echo 105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 106 | @echo "N.B. You won't be able to view it unless you put it in" \ 107 | "~/Library/Documentation/Help or install it in your application" \ 108 | "bundle." 109 | 110 | .PHONY: devhelp 111 | devhelp: 112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 113 | @echo 114 | @echo "Build finished." 115 | @echo "To view the help file:" 116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pyConTextNLP" 117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pyConTextNLP" 118 | @echo "# devhelp" 119 | 120 | .PHONY: epub 121 | epub: 122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 123 | @echo 124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 125 | 126 | .PHONY: latex 127 | latex: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo 130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 132 | "(use \`make latexpdf' here to do that automatically)." 133 | 134 | .PHONY: latexpdf 135 | latexpdf: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo "Running LaTeX files through pdflatex..." 138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 140 | 141 | .PHONY: latexpdfja 142 | latexpdfja: 143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 144 | @echo "Running LaTeX files through platex and dvipdfmx..." 145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 147 | 148 | .PHONY: text 149 | text: 150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 151 | @echo 152 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 153 | 154 | .PHONY: man 155 | man: 156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 157 | @echo 158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 159 | 160 | .PHONY: texinfo 161 | texinfo: 162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 163 | @echo 164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 165 | @echo "Run \`make' in that directory to run these through makeinfo" \ 166 | "(use \`make info' here to do that automatically)." 167 | 168 | .PHONY: info 169 | info: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo "Running Texinfo files through makeinfo..." 172 | make -C $(BUILDDIR)/texinfo info 173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 174 | 175 | .PHONY: gettext 176 | gettext: 177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 178 | @echo 179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 180 | 181 | .PHONY: changes 182 | changes: 183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 184 | @echo 185 | @echo "The overview file is in $(BUILDDIR)/changes." 186 | 187 | .PHONY: linkcheck 188 | linkcheck: 189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 190 | @echo 191 | @echo "Link check complete; look for any errors in the above output " \ 192 | "or in $(BUILDDIR)/linkcheck/output.txt." 193 | 194 | .PHONY: doctest 195 | doctest: 196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 197 | @echo "Testing of doctests in the sources finished, look at the " \ 198 | "results in $(BUILDDIR)/doctest/output.txt." 199 | 200 | .PHONY: coverage 201 | coverage: 202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 203 | @echo "Testing of coverage in the sources finished, look at the " \ 204 | "results in $(BUILDDIR)/coverage/python.txt." 205 | 206 | .PHONY: xml 207 | xml: 208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 209 | @echo 210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 211 | 212 | .PHONY: pseudoxml 213 | pseudoxml: 214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 215 | @echo 216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 217 | -------------------------------------------------------------------------------- /docs/source/bibliography.md: -------------------------------------------------------------------------------- 1 | ## Publications Based on pyConTextNLP 2 | 3 | * Chapman BE, Lee S, Kang HP, Chapman WW, "Document-level classification of CT pulmonary angiography reports based on an extension of the ConText algorithm." [J Biomed Inform. 2011 Oct;44(5):728-37](http://www.sciencedirect.com/science/article/pii/S1532046411000621) 4 | * Wilson RA, et al. "Automated ancillary cancer history classification for mesothelioma patients from free-text clinical reports." J Pathol Inform. 2010 Oct 11;1:24. 5 | * Chapman BE, Lee S, Kang HP, Chapman WW. Using ConText to Identify Candidate Pulmonary Embolism Subjects Based on Dictated Radiology Reports. (Presented at AMIA Clinical Research Informatics Summit 2011) 6 | * Wilson RA, Chapman WW, DeFries SJ, Becich MJ, Chapman BE. Identifying History of Ancillary Cancers in Mesothelioma Patients from Free-Text Clinical Reports. (Presented at AMIA 2010). 7 | * Chapman WW, Hillert D, Velupillai S, Kvist M, Skeppstedt M, Chapman BE, Conway M, Tharp M, Mowery DL, Deleger L (2013). Extending the NegEx Lexicon for Multiple Languages. Stud Health Technol Inform,192, 677-81 8 | * Velupillai S, Skeppstedt M, Kvist M, Mowery D, Chapman BE, Dalianis H, and Chapman WW. Porting a Rule-based Assertion Classifier for Clinical Text from English to Swedish. The 4th International Louhi Workshop on Health Document Text Mining and Information Analysis (Louhi 2013), edited by Hanna Suominen. 9 | * Chapman WW, Hilert D, Velupillai S, Kvist M, Skeppstedt M, Chapman BE, Conway M, Tharp M, Mowery DL, Deleger L. Extending the NegEx lexicon for multiple languages. (In press Proc Medinfo 2013) 10 | * Velupillai S, Skeppstedt M, Kvist M, Mowery D, Chapman BE, Dalianis H, Chapman WW (July 2014). Cue-based assertion classification for Swedish clinical text--developing a lexicon for pyConTextSwe. Artif Intell Med, 61(3), 137-144. 11 | * Mowery D, Chapman WW, Chapman BE, Conway MA, South BR, Madden E; Keyhani S. Extracting a Stroke Phenotype Risk Factor from Veteran Health Administration Clinical Reports: An Information Content Analysis. Journal of Biomedical Semantics (accepted) 12 | * Wilson RA, Chapman BE. Automated Capture of Pulmonary Embolism Spatial Location in Dictated Reports Using the ConText Algorithm. (Presented at RSNA 2011; poster) 13 | * Gentili A. Chapman BE. Use of pyConText to Classify Reports Containing Critical Results. (Presented at RSNA 2011; oral). 14 | * Gentili A. Chapman BE. Use of pyConText to Assist in Auditing for Chest Biopsy Complications. (Presented RSNA 2012) 15 | * Chapman BE, Wei W, Chapman WW. The Frequency of ConText Lexical Items in Diverse Medical Texts. (Presented IEEE HISB 2012, poster) 16 | * Gentili A, Chapman BE. Use of Natural Language Processing to Classify Radiology Reports Containing Description of the Abdominal Aorta. (Presented at RSNA 2013). 17 | * Chapman BE, Gentili A, Chen J, Miyakoshi A, Chapman W. Measuring Expressions of Uncertainty in Radiology Texts for Natural Language Processing Applications. (Presented at RSNA 2013). 18 | * Chapman BE, Chen J, Miyakoshi A, Chapman WW, Gentili A. Measuring How Perceived meanings of Uncertainty Cues Differs with and Without Sentence-Level Context in Radiology Reports. (Presented at RSNA 2013). 19 | * Chapman BE, Heilbrun M. Lexical Disparities Between Reports Authored by Residents and Reports Authored by Attending Radiologists Using Natural Language Processing. (Presented at RSNA 2015). 20 | * Taggart M, Chapman WW, Steinberg BA, Ruckel S, Pregenzer-Wenzler A, Du Y, Ferraro F, Bucher BT, Lloyd-Jones DM, Rondina MT, Shah RU. Comparison of 2 Natural Language Processing Methods for Identification of Bleeding Among Critically Ill Patients [JAMA Netw Open. 2018;1(6):e183451](https://jamanetwork.com/journals/jamanetworkopen/fullarticle/2706498). 21 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # pyConTextNLP documentation build configuration file, created by 5 | # sphinx-quickstart on Thu Mar 17 21:09:29 2016. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | #sys.path.insert(0, os.path.abspath('.')) 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | #needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.doctest', 35 | 'sphinx.ext.mathjax', 36 | 'sphinx.ext.viewcode', 37 | ] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ['_templates'] 41 | 42 | # The suffix(es) of source filenames. 43 | # You can specify multiple suffix as a list of string: 44 | # source_suffix = ['.rst', '.md'] 45 | #source_suffix = '.rst' 46 | from recommonmark.parser import CommonMarkParser 47 | 48 | source_parsers = { 49 | '.md': CommonMarkParser, 50 | } 51 | 52 | source_suffix = ['.rst', '.md'] 53 | # The encoding of source files. 54 | #source_encoding = 'utf-8-sig' 55 | 56 | # The master toctree document. 57 | master_doc = 'index' 58 | 59 | # General information about the project. 60 | project = 'pyConTextNLP' 61 | copyright = '2016, Brian E. Chapman, Ph.D.' 62 | author = 'Brian E. Chapman, Ph.D.' 63 | 64 | # The version info for the project you're documenting, acts as replacement for 65 | # |version| and |release|, also used in various other places throughout the 66 | # built documents. 67 | # 68 | # The short X.Y version. 69 | version = '0.6.0.9' 70 | # The full version, including alpha/beta/rc tags. 71 | release = '0.6.0.9' 72 | 73 | # The language for content autogenerated by Sphinx. Refer to documentation 74 | # for a list of supported languages. 75 | # 76 | # This is also used if you do content translation via gettext catalogs. 77 | # Usually you set "language" from the command line for these cases. 78 | language = None 79 | 80 | # There are two options for replacing |today|: either, you set today to some 81 | # non-false value, then it is used: 82 | #today = '' 83 | # Else, today_fmt is used as the format for a strftime call. 84 | #today_fmt = '%B %d, %Y' 85 | 86 | # List of patterns, relative to source directory, that match files and 87 | # directories to ignore when looking for source files. 88 | exclude_patterns = [] 89 | 90 | # The reST default role (used for this markup: `text`) to use for all 91 | # documents. 92 | #default_role = None 93 | 94 | # If true, '()' will be appended to :func: etc. cross-reference text. 95 | #add_function_parentheses = True 96 | 97 | # If true, the current module name will be prepended to all description 98 | # unit titles (such as .. function::). 99 | #add_module_names = True 100 | 101 | # If true, sectionauthor and moduleauthor directives will be shown in the 102 | # output. They are ignored by default. 103 | #show_authors = False 104 | 105 | # The name of the Pygments (syntax highlighting) style to use. 106 | pygments_style = 'sphinx' 107 | 108 | # A list of ignored prefixes for module index sorting. 109 | #modindex_common_prefix = [] 110 | 111 | # If true, keep warnings as "system message" paragraphs in the built documents. 112 | #keep_warnings = False 113 | 114 | # If true, `todo` and `todoList` produce output, else they produce nothing. 115 | todo_include_todos = False 116 | 117 | 118 | # -- Options for HTML output ---------------------------------------------- 119 | 120 | # The theme to use for HTML and HTML Help pages. See the documentation for 121 | # a list of builtin themes. 122 | html_theme = 'alabaster' 123 | 124 | # Theme options are theme-specific and customize the look and feel of a theme 125 | # further. For a list of options available for each theme, see the 126 | # documentation. 127 | #html_theme_options = {} 128 | 129 | # Add any paths that contain custom themes here, relative to this directory. 130 | #html_theme_path = [] 131 | 132 | # The name for this set of Sphinx documents. If None, it defaults to 133 | # " v documentation". 134 | #html_title = None 135 | 136 | # A shorter title for the navigation bar. Default is the same as html_title. 137 | #html_short_title = None 138 | 139 | # The name of an image file (relative to this directory) to place at the top 140 | # of the sidebar. 141 | #html_logo = None 142 | 143 | # The name of an image file (within the static path) to use as favicon of the 144 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 145 | # pixels large. 146 | #html_favicon = None 147 | 148 | # Add any paths that contain custom static files (such as style sheets) here, 149 | # relative to this directory. They are copied after the builtin static files, 150 | # so a file named "default.css" will overwrite the builtin "default.css". 151 | html_static_path = ['_static'] 152 | 153 | # Add any extra paths that contain custom files (such as robots.txt or 154 | # .htaccess) here, relative to this directory. These files are copied 155 | # directly to the root of the documentation. 156 | #html_extra_path = [] 157 | 158 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 159 | # using the given strftime format. 160 | #html_last_updated_fmt = '%b %d, %Y' 161 | 162 | # If true, SmartyPants will be used to convert quotes and dashes to 163 | # typographically correct entities. 164 | #html_use_smartypants = True 165 | 166 | # Custom sidebar templates, maps document names to template names. 167 | #html_sidebars = {} 168 | 169 | # Additional templates that should be rendered to pages, maps page names to 170 | # template names. 171 | #html_additional_pages = {} 172 | 173 | # If false, no module index is generated. 174 | #html_domain_indices = True 175 | 176 | # If false, no index is generated. 177 | #html_use_index = True 178 | 179 | # If true, the index is split into individual pages for each letter. 180 | #html_split_index = False 181 | 182 | # If true, links to the reST sources are added to the pages. 183 | #html_show_sourcelink = True 184 | 185 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 186 | #html_show_sphinx = True 187 | 188 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 189 | #html_show_copyright = True 190 | 191 | # If true, an OpenSearch description file will be output, and all pages will 192 | # contain a tag referring to it. The value of this option must be the 193 | # base URL from which the finished HTML is served. 194 | #html_use_opensearch = '' 195 | 196 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 197 | #html_file_suffix = None 198 | 199 | # Language to be used for generating the HTML full-text search index. 200 | # Sphinx supports the following languages: 201 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 202 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' 203 | #html_search_language = 'en' 204 | 205 | # A dictionary with options for the search language support, empty by default. 206 | # Now only 'ja' uses this config value 207 | #html_search_options = {'type': 'default'} 208 | 209 | # The name of a javascript file (relative to the configuration directory) that 210 | # implements a search results scorer. If empty, the default will be used. 211 | #html_search_scorer = 'scorer.js' 212 | 213 | # Output file base name for HTML help builder. 214 | htmlhelp_basename = 'pyConTextNLPdoc' 215 | 216 | # -- Options for LaTeX output --------------------------------------------- 217 | 218 | latex_elements = { 219 | # The paper size ('letterpaper' or 'a4paper'). 220 | #'papersize': 'letterpaper', 221 | 222 | # The font size ('10pt', '11pt' or '12pt'). 223 | #'pointsize': '10pt', 224 | 225 | # Additional stuff for the LaTeX preamble. 226 | #'preamble': '', 227 | 228 | # Latex figure (float) alignment 229 | #'figure_align': 'htbp', 230 | } 231 | 232 | # Grouping the document tree into LaTeX files. List of tuples 233 | # (source start file, target name, title, 234 | # author, documentclass [howto, manual, or own class]). 235 | latex_documents = [ 236 | (master_doc, 'pyConTextNLP.tex', 'pyConTextNLP Documentation', 237 | 'Brian E. Chapman, Ph.D.', 'manual'), 238 | ] 239 | 240 | # The name of an image file (relative to this directory) to place at the top of 241 | # the title page. 242 | #latex_logo = None 243 | 244 | # For "manual" documents, if this is true, then toplevel headings are parts, 245 | # not chapters. 246 | #latex_use_parts = False 247 | 248 | # If true, show page references after internal links. 249 | #latex_show_pagerefs = False 250 | 251 | # If true, show URL addresses after external links. 252 | #latex_show_urls = False 253 | 254 | # Documents to append as an appendix to all manuals. 255 | #latex_appendices = [] 256 | 257 | # If false, no module index is generated. 258 | #latex_domain_indices = True 259 | 260 | 261 | # -- Options for manual page output --------------------------------------- 262 | 263 | # One entry per manual page. List of tuples 264 | # (source start file, name, description, authors, manual section). 265 | man_pages = [ 266 | (master_doc, 'pycontextnlp', 'pyConTextNLP Documentation', 267 | [author], 1) 268 | ] 269 | 270 | # If true, show URL addresses after external links. 271 | #man_show_urls = False 272 | 273 | 274 | # -- Options for Texinfo output ------------------------------------------- 275 | 276 | # Grouping the document tree into Texinfo files. List of tuples 277 | # (source start file, target name, title, author, 278 | # dir menu entry, description, category) 279 | texinfo_documents = [ 280 | (master_doc, 'pyConTextNLP', 'pyConTextNLP Documentation', 281 | author, 'pyConTextNLP', 'One line description of project.', 282 | 'Miscellaneous'), 283 | ] 284 | 285 | # Documents to append as an appendix to all manuals. 286 | #texinfo_appendices = [] 287 | 288 | # If false, no module index is generated. 289 | #texinfo_domain_indices = True 290 | 291 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 292 | #texinfo_show_urls = 'footnote' 293 | 294 | # If true, do not generate a @detailmenu in the "Top" node's menu. 295 | #texinfo_no_detailmenu = False 296 | -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- 1 | # pyConTextNLP 2 | ## Python ConText implementation for NLP 3 | 4 | ## What is pyConTextNLP? 5 | 6 | pyConTextNLP is a partial implementation of the ConText algorithm using Python. The original description of pyConTextNLP was provided in Chapman BE, Lee S, Kang HP, Chapman WW, "Document-level classification of CT pulmonary angiography reports based on an extension of the ConText algorithm." [J Biomed Inform. 2011 Oct;44(5):728-37](http://www.sciencedirect.com/science/article/pii/S1532046411000621) 7 | 8 | Since that publication pyConTextNLP has undergone several important revisions: 9 | 10 | 1. Incorporating NetworkX to describe target/modifier relationships. 11 | 1. Porting from Python 2.x to Python 3.x 12 | * This is a work in progress. pyConTextNLP does not have a clean transition for handling unicode in Python 2.x in my attempts to port to 3.x 13 | 1. Rewriting pyConTextNLP to have a more functional style. 14 | * This has been motivated by both the need to incorporate parallel processing into the algorithm for speed and to reduce unintended side effects. 15 | * This work currently lies in the subpackage ``functional``. 16 | 17 | 18 | 19 | 20 | ## Dependencies 21 | * [NetworkX](https://pypi.python.org/pypi/networkx/) for relating ConText relationships. 22 | * [TextBlob](https://pypi.python.org/pypi/textblob) for sentence splitting. 23 | * [nose](https://pypi.python.org/pypi/nose/) for unit testing. 24 | 25 | ## Installation 26 | 27 | pyConTextNLP is hosted on [GitHub](https://github.com/chapmanbe/pyConTextNLP) and is index in pypi so can be installed with pip: 28 | 29 | ``pip install pyConTextNLP`` 30 | 31 | ## [Bibliography](./bibliography.md) 32 | -------------------------------------------------------------------------------- /notebooks/BasicSentenceMarkup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Demonstration of Basic Sentence Markup with pyConTextNLP" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "pyConTextNLP uses NetworkX directional graphs to represent the markup: nodes in the graph will be the concepts that are identified in the sentence and edges in the graph will be the relationships between those concepts. " 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import pyConTextNLP.pyConText as pyConText\n", 24 | "import pyConTextNLP.itemData as itemData\n", 25 | "import networkx as nx" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "* ``pyConText`` contains the bulk of the pyConTextNLP functionality, including basic class definitions such as the ``ConTextMarkup`` class that represents the markup of a sentence.\n", 33 | "* ``itemData`` contains a class definition for an itemData and functions for reading itemData definitions which are assumed to be in a tab seperated file that is specified as either a local file or a remote resource. In this example we will read definitions straight from the GitHub repository.\n", 34 | " * An ``itemData`` in its most basic form is a four-tuple consisting of \n", 35 | " 1. A **literal** (e.g. \"pulmonary embolism\", \"no definite evidence of\")\n", 36 | " 1. A **category** (e.g. \"CRITICAL_FINDING\", \"PROBABLE_EXISTENCE\")\n", 37 | " 1. A **regular expression** that defines how to identify the literal concept. If no regular expression is specified, a regular expression will be built directly from the literal by wrapping it with word boundaries (e.g. r\"\"\"\\bpulmonary embolism\\b\"\"\")\n", 38 | " 1. A **rule** that defines how the concept works in the sentence (e.g. a negation term that looks **forward** in the sentence)." 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "### Sentences\n", 46 | "\n", 47 | "These example reports are taken from (with modification) the [MIMIC2 demo data set](https://physionet.org/mimic2/) that is a publically available database of de-identified medical records for deceased individuals. " 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "reports = [\n", 57 | " \"\"\"IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of\n", 58 | " bowel obstruction or mass identified within the abdomen or pelvis. Non-specific interstitial opacities and bronchiectasis seen at the right\n", 59 | " base, suggestive of post-inflammatory changes.\"\"\",\n", 60 | " \"\"\"IMPRESSION: Evidence of early pulmonary vascular congestion and interstitial edema. Probable scarring at the medial aspect of the right lung base, with no\n", 61 | " definite consolidation.\"\"\"\n", 62 | " ,\n", 63 | " \"\"\"IMPRESSION:\n", 64 | " \n", 65 | " 1. 2.0 cm cyst of the right renal lower pole. Otherwise, normal appearance\n", 66 | " of the right kidney with patent vasculature and no sonographic evidence of\n", 67 | " renal artery stenosis.\n", 68 | " 2. Surgically absent left kidney.\"\"\",\n", 69 | " \"\"\"IMPRESSION: No pneumothorax.\"\"\",\n", 70 | " \"\"\"IMPRESSION: No definite pneumothorax\"\"\"\n", 71 | " \"\"\"IMPRESSION: New opacity at the left lower lobe consistent with pneumonia.\"\"\"\n", 72 | "]" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "### Read the ``itemData`` definitions\n", 80 | "\n", 81 | "We're reading directly from GitHub. You could read from a local file using a `file://` URL." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "modifiers = itemData.get_items(\n", 91 | " \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.yml\")\n", 92 | "targets = itemData.get_items(\n", 93 | " \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.yml\")\n" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "### Example function to analyze each sentence\n", 101 | "\n", 102 | "This the function we'll use for each report. The following section of this document steps through each line." 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "def markup_sentence(s, modifiers, targets, prune_inactive=True):\n", 112 | " \"\"\"\n", 113 | " \"\"\"\n", 114 | " markup = pyConText.ConTextMarkup()\n", 115 | " markup.setRawText(s)\n", 116 | " markup.cleanText()\n", 117 | " markup.markItems(modifiers, mode=\"modifier\")\n", 118 | " markup.markItems(targets, mode=\"target\")\n", 119 | " markup.pruneMarks()\n", 120 | " markup.dropMarks('Exclusion')\n", 121 | " # apply modifiers to any targets within the modifiers scope\n", 122 | " markup.applyModifiers()\n", 123 | " markup.pruneSelfModifyingRelationships()\n", 124 | " if prune_inactive:\n", 125 | " markup.dropInactiveModifiers()\n", 126 | " return markup" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "### We're going to start with our simplest of sentences" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "reports[3]" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "### marking up a sentence\n", 150 | "\n", 151 | "We start by creating an instance of the ``ConTextMarkup`` class. This is a subclass of a NetworkX DiGraph. Information will be stored in the nodes and edges. " 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "markup = pyConText.ConTextMarkup()" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "isinstance(markup,nx.DiGraph)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "#### Set the text to be processed" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "markup.setRawText(reports[3].lower())\n", 186 | "print(markup)\n", 187 | "print(len(markup.getRawText()))\n" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "#### Clean the text\n", 195 | "\n", 196 | "Prior to processing we do some basic cleaning of the text, sucha s replacing multiple white spaces with a single space. You'll notice this in the spacing between the colon and \"no\" in the ``raw`` and ``clean`` versions of the text." 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "markup.cleanText()\n", 206 | "print(markup)\n", 207 | "print(len(markup.getText()))" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "#### Identify concepts in the sentence\n", 215 | "\n", 216 | "The ``markItems`` method takes a list of itemData and uses the regular expressions to identify any instances of the itemData in the sentence. With the ``mode`` keyword we specify whether these ``itemData`` are targets or modifiers. This value will be stored as a data attribute of the node that is created in the graph for any identified concepts." 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "[( 193409165385789347928701545101362172235 no ['definite_negated_existence'] , {'category': 'modifier'})]\n", 229 | "\n" 230 | ] 231 | } 232 | ], 233 | "source": [ 234 | "markup.markItems(modifiers, mode=\"modifier\")\n", 235 | "print(markup.nodes(data=True))\n", 236 | "print(type(list(markup.nodes())[0]))" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "#### What does our initial markup look like?\n", 244 | "\n", 245 | "* We've identified one concept in the sentence: ``no``\n", 246 | "* We've created a ``tagObject`` for this concept which keeps track of the actual phrase identified by the regular expression, what the category of the itemData was (``definite_negated_existence``), this is a list because there can be multiple categories. There is also an absurdly long identifier for the node. Note that our mode ``modifier`` has been stored as a data element of the node. In NetworkX each node (or edge) has a dictionary for data." 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "#### Now let's markup the targets" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "markup.markItems(targets, mode=\"target\")" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "name": "stdout", 272 | "output_type": "stream", 273 | "text": [ 274 | "( 193409165385789347928701545101362172235 no ['definite_negated_existence'] , {'category': 'modifier'})\n", 275 | "( 199069516875446444699146669155103248715 pneumothorax ['pneumothorax'] , {'category': 'target'})\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "for node in markup.nodes(data=True):\n", 281 | " print(node)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "#### What does our markup look like now?\n", 289 | "\n", 290 | "We've added another node to the graph. This time the ``target`` ``pneumothorax``." 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "#### Prune Marks\n", 298 | "\n", 299 | "After identifying concepts, we prune concepts that are a subset of another identified concept. This results in no changes here, but the importance will be shown later with a different sentence." 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "markup.pruneMarks()\n", 309 | "for node in markup.nodes(data=True):\n", 310 | " print(node)" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "#### Are there any relationships in our markup?\n", 318 | "\n", 319 | "We do not yet have any relationships (edges) between our concepts (target and modifier edges)" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [ 328 | "print(markup.edges())" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "#### Apply modifiers\n", 336 | "\n", 337 | "We now call the ``applyModifiers`` method of the ConTextMarkup object to identify any relationships between the nodes." 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "markup.applyModifiers()\n", 347 | "for edge in markup.edges():\n", 348 | " print(edge)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "#### We now have a relationship!\n", 356 | "\n", 357 | "We now have a directed edge between our ``no`` node and our ``pneumothorax`` node. This will be interepreted as ``pneumothorax`` being a definitely negated concept in the sentence." 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "## What's next?\n", 365 | "\n", 366 | "The value of pruning is shown in [this](./BasicSentencemarkupPart2.ipynb) notebook." 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [] 382 | } 383 | ], 384 | "metadata": { 385 | "kernelspec": { 386 | "display_name": "Python 3", 387 | "language": "python", 388 | "name": "python3" 389 | }, 390 | "language_info": { 391 | "codemirror_mode": { 392 | "name": "ipython", 393 | "version": 3 394 | }, 395 | "file_extension": ".py", 396 | "mimetype": "text/x-python", 397 | "name": "python", 398 | "nbconvert_exporter": "python", 399 | "pygments_lexer": "ipython3", 400 | "version": "3.5.23.6.8" 401 | } 402 | }, 403 | "nbformat": 4, 404 | "nbformat_minor": 1 405 | } 406 | -------------------------------------------------------------------------------- /notebooks/BasicSentenceMarkupPart2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Demonstration of Basic Sentence Markup with pyConTextNLP, Part 2.\n", 8 | "## An ever-so-slightly more complex sentence\n", 9 | "\n", 10 | "### Let's use a slightly more complex sentence that will illustrate pruning." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import pyConTextNLP.pyConText as pyConText\n", 20 | "import pyConTextNLP.itemData as itemData\n", 21 | "import networkx as nx" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### Sentences\n", 29 | "\n", 30 | "These example reports are taken from (with modification) the [MIMIC2 demo data set](https://physionet.org/mimic2/) that is a publically available database of de-identified medical records for deceased individuals. " 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "reports = [\n", 40 | " \"\"\"IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of\n", 41 | " bowel obstruction or mass identified within the abdomen or pelvis. Non-specific interstitial opacities and bronchiectasis seen at the right\n", 42 | " base, suggestive of post-inflammatory changes.\"\"\",\n", 43 | " \"\"\"IMPRESSION: Evidence of early pulmonary vascular congestion and interstitial edema. Probable scarring at the medial aspect of the right lung base, with no\n", 44 | " definite consolidation.\"\"\"\n", 45 | " ,\n", 46 | " \"\"\"IMPRESSION:\n", 47 | " \n", 48 | " 1. 2.0 cm cyst of the right renal lower pole. Otherwise, normal appearance\n", 49 | " of the right kidney with patent vasculature and no sonographic evidence of\n", 50 | " renal artery stenosis.\n", 51 | " 2. Surgically absent left kidney.\"\"\",\n", 52 | " \"\"\"IMPRESSION: No pneumothorax.\"\"\",\n", 53 | " \"\"\"IMPRESSION: No definite pneumothorax\"\"\",\n", 54 | " \"\"\"IMPRESSION: New opacity at the left lower lobe consistent with pneumonia.\"\"\"\n", 55 | "]" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### Read the ``itemData`` definitions" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "modifiers = itemData.get_items(\n", 72 | " \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.yml\")\n", 73 | "targets = itemData.get_items(\n", 74 | " \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.yml\")\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "### We're going to start with our simplest of sentences" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "reports[4]" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "### marking up a sentence\n", 98 | "\n", 99 | "We start by creating an instance of the ``ConTextMarkup`` class. This is a subclass of a NetworkX DiGraph. Information will be stored in the nodes and edges. " 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "markup = pyConText.ConTextMarkup()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "markup.setRawText(reports[4].lower())\n", 118 | "print(markup)\n", 119 | "print(len(markup.getRawText()))\n", 120 | "\n", 121 | "markup.cleanText()\n", 122 | "print(markup)\n", 123 | "print(len(markup.getText()))" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "#### Identify concepts in the sentence\n" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "( 256833253737566050220546835725615337803 no ['definite_negated_existence'] , {'category': 'modifier'})\n", 143 | "( 256833892316555915191107839689855045963 no definite ['definite_negated_existence'] , {'category': 'modifier'})\n", 144 | "( 256826997881853923908450449495296807243 definite ['definite_existence'] , {'category': 'modifier'})\n", 145 | "( 256849716557454889207255398223055655243 pneumothorax ['pneumothorax'] , {'category': 'target'})\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "markup.markItems(modifiers, mode=\"modifier\")\n", 151 | "markup.markItems(targets, mode=\"target\")\n", 152 | "for node in markup.nodes(data=True):\n", 153 | " print(node)\n" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "#### What does our initial markup look like?\n", 161 | "\n", 162 | "* We've identified three concepts in the sentence: \n", 163 | " 1. \"no\"\n", 164 | " 1. \"no definite\"\n", 165 | " 1. \"pneumothorax\"\n", 166 | "* Here \"no\" is not a true concept in the sentence; it is a subset of the concept \"no definite\"" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "#### Prune Marks\n", 174 | "\n", 175 | "After identifying concepts, we prune concepts that are a subset of another identified concept." 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "markup.pruneMarks()\n", 185 | "for node in markup.nodes(data=True):\n", 186 | " print(node)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "#### What is the effect of ``pruneMarks``\n", 194 | "\n", 195 | "We've correctly dropped ``no`` as an identified concept." 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "#### Apply modifiers\n", 203 | "\n", 204 | "We now call the ``applyModifiers`` method of the ConTextMarkup object to identify any relationships between the nodes." 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "markup.applyModifiers()\n", 214 | "for edge in markup.edges():\n", 215 | " print(edge)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "### Here is a notebook for [Multisentence Documents](./MultiSentenceDocuments.ipynb)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [] 231 | } 232 | ], 233 | "metadata": { 234 | "kernelspec": { 235 | "display_name": "Python 3", 236 | "language": "python", 237 | "name": "python3" 238 | }, 239 | "language_info": { 240 | "codemirror_mode": { 241 | "name": "ipython", 242 | "version": 3 243 | }, 244 | "file_extension": ".py", 245 | "mimetype": "text/x-python", 246 | "name": "python", 247 | "nbconvert_exporter": "python", 248 | "pygments_lexer": "ipython3", 249 | "version": "3.7.2" 250 | } 251 | }, 252 | "nbformat": 4, 253 | "nbformat_minor": 1 254 | } 255 | -------------------------------------------------------------------------------- /notebooks/MultiSentenceDocuments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Processing Multisentence Documents\n", 8 | "\n", 9 | "This notebook uses [TextBlob](https://pypi.org/project/textblob/) to do sentence splitting. If you do not have TextBlob installed, you can install it by uncomment the line in the cell below and executing that cell. " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#!pip install textblob" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import pyConTextNLP.pyConText as pyConText\n", 28 | "import pyConTextNLP.itemData as itemData\n", 29 | "from textblob import TextBlob\n", 30 | "import networkx as nx\n", 31 | "import pyConTextNLP.display.html as html\n", 32 | "from IPython.display import display, HTML" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "reports = [\n", 42 | " \"\"\"IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of\n", 43 | " bowel obstruction or mass identified within the abdomen or pelvis. Non-specific interstitial opacities and bronchiectasis seen at the right\n", 44 | " base, suggestive of post-inflammatory changes.\"\"\",\n", 45 | " \"\"\"IMPRESSION: Evidence of early pulmonary vascular congestion and interstitial edema. Probable scarring at the medial aspect of the right lung base, with no\n", 46 | " definite consolidation.\"\"\"\n", 47 | " ,\n", 48 | " \"\"\"IMPRESSION:\n", 49 | " \n", 50 | " 1. 2.0 cm cyst of the right renal lower pole. Otherwise, normal appearance\n", 51 | " of the right kidney with patent vasculature and no sonographic evidence of\n", 52 | " renal artery stenosis.\n", 53 | " 2. Surgically absent left kidney.\"\"\",\n", 54 | " \"\"\"IMPRESSION: No pneumothorax.\"\"\",\n", 55 | " \"\"\"IMPRESSION: No definite pneumothorax\"\"\"\n", 56 | " \"\"\"IMPRESSION: New opacity at the left lower lobe consistent with pneumonia.\"\"\"\n", 57 | "]" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "modifiers = itemData.get_items(\n", 67 | " \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.yml\")\n", 68 | "targets = itemData.get_items(\n", 69 | " \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.yml\")\n" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Define ``markup_sentence``\n", 77 | "\n", 78 | "We are putting the functionality we went through in the previous two notebooks ([BasicSentenceMarkup](./BasicSentenceMarkup.ipynb) and [BasicSentenceMarkupPart2](BasicSentenceMarkupPart2.ipynb)) into a function ``markup_sentence``. We add one step to the function: ``dropInactiveModifiers`` will delete any modifier node that does not get attached to a target node." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "def markup_sentence(s, modifiers, targets, prune_inactive=True):\n", 88 | " \"\"\"\n", 89 | " \"\"\"\n", 90 | " markup = pyConText.ConTextMarkup()\n", 91 | " markup.setRawText(s)\n", 92 | " markup.cleanText()\n", 93 | " markup.markItems(modifiers, mode=\"modifier\")\n", 94 | " markup.markItems(targets, mode=\"target\")\n", 95 | " markup.pruneMarks()\n", 96 | " markup.dropMarks('Exclusion')\n", 97 | " # apply modifiers to any targets within the modifiers scope\n", 98 | " markup.applyModifiers()\n", 99 | " markup.pruneSelfModifyingRelationships()\n", 100 | " if prune_inactive:\n", 101 | " markup.dropInactiveModifiers()\n", 102 | " return markup" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "report = reports[0]\n", 112 | "print(report)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "#### Create a ``ConTextDocument``\n", 120 | "\n", 121 | "``ConTextDocument`` is a class for organizing the markup of multiple sentences. It has a private attribute that is NetworkX DiGraph that represents the document structure. In this exmaple we only use the ``ConTextDocument`` class to collect multiple sentence markups." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "context = pyConText.ConTextDocument()" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "#### Split the document into sentences and process each sentence\n", 138 | "\n", 139 | "pyConTextNLP comes with a simple sentence splitter in ``helper.py``. I have not been maintaining this and have recently been using TextBlob to split sentences. A known problem with either sentence splitting solution is enumerated lists that don't use periods." 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "blob = TextBlob(report.lower())\n", 149 | "count = 0\n", 150 | "rslts = []\n", 151 | "for s in blob.sentences:\n", 152 | " m = markup_sentence(s.raw, modifiers=modifiers, targets=targets)\n", 153 | " rslts.append(m)\n", 154 | "\n", 155 | "for r in rslts:\n", 156 | " context.addMarkup(r)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "### Displaying pyConTextNLP Markups\n", 164 | "\n", 165 | "The ``display`` subpackage contains some functionality for visualizing the markups. Here I use HTML to color-code identified concepts." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "clrs = {\\\n", 175 | " \"bowel_obstruction\": \"blue\",\n", 176 | " \"inflammation\": \"blue\",\n", 177 | " \"definite_negated_existence\": \"red\",\n", 178 | " \"probable_negated_existence\": \"indianred\",\n", 179 | " \"ambivalent_existence\": \"orange\",\n", 180 | " \"probable_existence\": \"forestgreen\",\n", 181 | " \"definite_existence\": \"green\",\n", 182 | " \"historical\": \"goldenrod\",\n", 183 | " \"indication\": \"pink\",\n", 184 | " \"acute\": \"golden\"\n", 185 | "}" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "display(HTML(html.mark_document_with_html(context,colors = clrs, default_color=\"black\")))" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "### There is also a rich XML description of the ``ConTextDocument``" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "print(context.getXML())" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [] 226 | } 227 | ], 228 | "metadata": { 229 | "kernelspec": { 230 | "display_name": "Python 3", 231 | "language": "python", 232 | "name": "python3" 233 | }, 234 | "language_info": { 235 | "codemirror_mode": { 236 | "name": "ipython", 237 | "version": 3 238 | }, 239 | "file_extension": ".py", 240 | "mimetype": "text/x-python", 241 | "name": "python", 242 | "nbconvert_exporter": "python", 243 | "pygments_lexer": "ipython3", 244 | "version": "3.7.2" 245 | } 246 | }, 247 | "nbformat": 4, 248 | "nbformat_minor": 1 249 | } 250 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # Read in order 2 | 3 | These notebooks walk through pyConTextNLP concepts in order of complexity. Recommended reading order is: 4 | 5 | 1. [BasicSentenceMarkup](./BasicSentenceMarkup.ipynb) 6 | 2. [BasicSentenceMarkupPart2](./BasicSentenceMarkupPart2.ipynb) 7 | 3. [MultiSentenceDocuments](./MultiSentenceDocuments.ipynb) 8 | -------------------------------------------------------------------------------- /notebooks/cherrypy_pyConText.py: -------------------------------------------------------------------------------- 1 | import pyConTextNLP.pyConTextGraph as pyConText 2 | import pyConTextNLP.itemData as itemData 3 | from textblob import TextBlob 4 | import networkx as nx 5 | import pyConTextNLP.display.html as html 6 | import json 7 | import cherrypy 8 | 9 | 10 | class pyConTextNLP_REST(object): 11 | 12 | mod = itemData.instantiateFromCSVtoitemData( 13 | "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.tsv") 14 | tar = itemData.instantiateFromCSVtoitemData( 15 | "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.tsv") 16 | 17 | clrs ={\ 18 | "bowel_obstruction": "blue", 19 | "inflammation": "blue", 20 | "definite_negated_existence": "red", 21 | "probable_negated_existence": "indianred", 22 | "ambivalent_existence": "orange", 23 | "probable_existence": "forestgreen", 24 | "definite_existence": "green", 25 | "historical": "goldenrod", 26 | "indication": "pink", 27 | "acute": "golden" 28 | } 29 | 30 | @cherrypy.expose 31 | def index(self): 32 | return "Welcome to pyConTextNLP REST API. To start go to /markup_report." 33 | 34 | @cherrypy.expose 35 | def markup_report(self, report='''IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of 36 | bowel obstruction or mass identified within the abdomen or pelvis. 37 | Non-specific interstitial opacities and bronchiectasis seen at the right 38 | base, suggestive of post-inflammatory changes. 39 | ''', 40 | modifiers=None, 41 | targets=None): 42 | print("type of modifiers",type(modifiers)) 43 | print("len of modifiers",len(modifiers)) 44 | print(modifiers) 45 | for m in modifiers: 46 | print(m) 47 | 48 | if modifiers==None: 49 | _modifiers = self.mod 50 | else: 51 | _modifiers = itemData.itemData() 52 | _modifiers.extend(json.loads(modifiers)) 53 | if targets==None: 54 | _targets=self.tar 55 | else: 56 | _targets = itemData.itemData() 57 | _targets.extend(json.loads(targets)) 58 | 59 | 60 | 61 | context = self.split_sentences(report, _modifiers, _targets) 62 | clrs = self.get_colors_dict(_modifiers, _targets) 63 | return html.mark_document_with_html(context, colors=clrs) 64 | 65 | 66 | def markup_sentence(self, s, modifiers, targets, prune_inactive=True): 67 | """ 68 | """ 69 | 70 | 71 | 72 | markup = pyConText.ConTextMarkup() 73 | markup.setRawText(s) 74 | markup.cleanText() 75 | markup.markItems(modifiers, mode="modifier") 76 | markup.markItems(targets, mode="target") 77 | markup.pruneMarks() 78 | markup.dropMarks('Exclusion') 79 | # apply modifiers to any targets within the modifiers scope 80 | markup.applyModifiers() 81 | markup.pruneSelfModifyingRelationships() 82 | if prune_inactive: 83 | markup.dropInactiveModifiers() 84 | return markup 85 | 86 | def split_sentences(self, report, modifiers, targets): 87 | blob = TextBlob(report.lower()) 88 | count = 0 89 | rslts = [] 90 | for s in blob.sentences: 91 | m = self.markup_sentence(s.raw, modifiers, targets) 92 | rslts.append(m) 93 | 94 | context = pyConText.ConTextDocument() 95 | for r in rslts: 96 | context.addMarkup(r) 97 | 98 | return context 99 | 100 | 101 | def get_colors_dict(self, modifiers, targets): 102 | # this method will basically assign blue to all targets 103 | # and then assigns a different color for each modifier category 104 | #import colorsys 105 | import randomcolor 106 | colors = {} 107 | rcol = randomcolor.RandomColor() 108 | for t in targets: 109 | colors[t.getCategory()[0]] = 'blue' 110 | mm = set([c.getCategory()[0] for c in modifiers]) 111 | #HSV = [(x*1.0/len(mm), 0.5, 0.5) for x in range(len(mm))] 112 | #RGB = map(lambda x: colorsys.hsv_to_rgb(*x), HSV) 113 | #RGB = lambda: random.randint(0,255) 114 | #for m,rgb in zip(mm,RGB): 115 | for m in mm: 116 | colors[m] = rcol.generate()[0]#"rgb{0}".format(rgb)#"rgb({0},{1},{2})".format(RGB(),RGB(),RGB()) 117 | 118 | return colors 119 | 120 | 121 | 122 | if __name__ == '__main__': 123 | cherrypy.config.update({'server.socket_port': 3030,}) 124 | cherrypy.quickstart(pyConTextNLP_REST()) 125 | -------------------------------------------------------------------------------- /notebooks/functional/2/Reading_ConTextItems.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### This notebook generates how to generate ConTextItems" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import os\n", 19 | "import pyConTextNLP.functional.conTextItem as CI\n" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "'/Users/brian/anaconda/envs/NLP/lib/python2.7/site-packages/pyConTextNLP-0.6.0.9-py2.7.egg/pyConTextNLP/functional/conTextItem.pyc'" 31 | ] 32 | }, 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "CI.__file__" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "### ConTextItems can be read from the web" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "literal<>; category<>; re<>; rule<>\n", 59 | "literal<>; category<>; re<>; rule<>\n", 60 | "literal<>; category<>; re<>; rule<>\n", 61 | "literal<>; category<>; re<>; rule<>\n", 62 | "literal<>; category<>; re<>; rule<>\n", 63 | "literal<>; category<>; re<>; rule<>\n", 64 | "literal<>; category<>; re<>; rule<>\n", 65 | "literal<>; category<>; re<<\\b(examination|exam|study)\\b>>; rule<>\n", 66 | "literal<>; category<>; re<>; rule<>\n", 67 | "literal<>; category<>; re<>; rule<>\n" 68 | ] 69 | } 70 | ], 71 | "source": [ 72 | "kb = [\"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_04292013.tsv\", \n", 73 | " \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/criticalfinder_generalized_modifiers.tsv\"]\n", 74 | "items = []\n", 75 | "for k in kb:\n", 76 | " items.extend(CI.readConTextItems(k)[0])\n", 77 | "for i in items[0:10]:\n", 78 | " print(i)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### ConTextItems can also be read from local files" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 5, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "literal<>; category<>; re<>; rule<>\n", 98 | "literal<>; category<>; re<>; rule<>\n", 99 | "literal<>; category<>; re<>; rule<>\n", 100 | "literal<>; category<>; re<>; rule<>\n", 101 | "literal<>; category<>; re<>; rule<>\n", 102 | "literal<>; category<>; re<>; rule<>\n", 103 | "literal<>; category<>; re<>; rule<>\n", 104 | "literal<>; category<>; re<<\\b(examination|exam|study)\\b>>; rule<>\n", 105 | "literal<>; category<>; re<>; rule<>\n", 106 | "literal<>; category<>; re<>; rule<>\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "PCDIR = os.path.join(os.path.expanduser(\"~\"),\n", 112 | " \"Documents\",\"NLP\",\"pyConTextNLP\")\n", 113 | "kb_local = [\"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_04292013.tsv\",\n", 114 | " os.path.join(PCDIR,\"KB\",\"quality_artifacts.tsv\")]\n", 115 | "items_local = []\n", 116 | "for k in kb_local:\n", 117 | " items_local.extend(CI.readConTextItems(k)[0])\n", 118 | "for i in items_local[0:10]:\n", 119 | " print(i)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [] 139 | } 140 | ], 141 | "metadata": { 142 | "kernelspec": { 143 | "display_name": "Python 3", 144 | "language": "python", 145 | "name": "python3" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": { 149 | "name": "ipython", 150 | "version": 3 151 | }, 152 | "file_extension": ".py", 153 | "mimetype": "text/x-python", 154 | "name": "python", 155 | "nbconvert_exporter": "python", 156 | "pygments_lexer": "ipython3", 157 | "version": "3.5.4" 158 | } 159 | }, 160 | "nbformat": 4, 161 | "nbformat_minor": 1 162 | } 163 | -------------------------------------------------------------------------------- /notebooks/functional/3/Reading_ConTextItems.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### This notebook generates how to generate ConTextItems" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import os\n", 19 | "import pyConTextNLP.functional.conTextItem as CI\n" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "### ConTextItems can be read from the web" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "conTextItem(literal='are ruled out', category=('definite_negated_existence',), re='are ruled out', rule='backward')\n", 39 | "conTextItem(literal='be ruled out', category=('indication',), re='be ruled out', rule='backward')\n", 40 | "conTextItem(literal='being ruled out', category=('indication',), re='being ruled out', rule='backward')\n", 41 | "conTextItem(literal='can be ruled out', category=('definite_negated_existence',), re='can be ruled out', rule='backward')\n", 42 | "conTextItem(literal='cannot be excluded', category=('ambivalent_existence',), re='cannot be excluded', rule='backward')\n", 43 | "conTextItem(literal='cannot totally be excluded', category=('probable_negated_existence',), re='cannot totally be excluded', rule='backward')\n", 44 | "conTextItem(literal='could be ruled out', category=('definite_negated_existence',), re='could be ruled out', rule='backward')\n", 45 | "conTextItem(literal='examination', category=('indication',), re='\\\\b(examination|exam|study)\\\\b', rule='backward')\n", 46 | "conTextItem(literal='free', category=('definite_negated_existence',), re='free', rule='backward')\n", 47 | "conTextItem(literal='has been ruled out', category=('definite_negated_existence',), re='has been ruled out', rule='backward')\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "kb = [\"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_04292013.tsv\", \n", 53 | " \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/criticalfinder_generalized_modifiers.tsv\"]\n", 54 | "items = []\n", 55 | "for k in kb:\n", 56 | " items.extend(CI.readConTextItems(k)[0])\n", 57 | "for i in items[0:10]:\n", 58 | " print(i)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "### ConTextItems can also be read from local files" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 3, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "conTextItem(literal='are ruled out', category=('definite_negated_existence',), re='are ruled out', rule='backward')\n", 78 | "conTextItem(literal='be ruled out', category=('indication',), re='be ruled out', rule='backward')\n", 79 | "conTextItem(literal='being ruled out', category=('indication',), re='being ruled out', rule='backward')\n", 80 | "conTextItem(literal='can be ruled out', category=('definite_negated_existence',), re='can be ruled out', rule='backward')\n", 81 | "conTextItem(literal='cannot be excluded', category=('ambivalent_existence',), re='cannot be excluded', rule='backward')\n", 82 | "conTextItem(literal='cannot totally be excluded', category=('probable_negated_existence',), re='cannot totally be excluded', rule='backward')\n", 83 | "conTextItem(literal='could be ruled out', category=('definite_negated_existence',), re='could be ruled out', rule='backward')\n", 84 | "conTextItem(literal='examination', category=('indication',), re='\\\\b(examination|exam|study)\\\\b', rule='backward')\n", 85 | "conTextItem(literal='free', category=('definite_negated_existence',), re='free', rule='backward')\n", 86 | "conTextItem(literal='has been ruled out', category=('definite_negated_existence',), re='has been ruled out', rule='backward')\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "PCDIR = os.path.join(os.path.expanduser(\"~\"),\n", 92 | " \"Documents\",\"NLP\",\"pyConTextNLP\")\n", 93 | "kb_local = [\"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_04292013.tsv\",\n", 94 | " os.path.join(PCDIR,\"KB\",\"quality_artifacts.tsv\")]\n", 95 | "items_local = []\n", 96 | "for k in kb_local:\n", 97 | " items_local.extend(CI.readConTextItems(k)[0])\n", 98 | "for i in items_local[0:10]:\n", 99 | " print(i)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "Python 3", 124 | "language": "python", 125 | "name": "python3" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.5.4" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 1 142 | } 143 | -------------------------------------------------------------------------------- /pyConTextNLP/ConTextMarkup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module defining ConTextMarkup class 3 | """ 4 | import re 5 | import uuid 6 | from . io.xml import xmlScrub 7 | from . tagObject import tagObject 8 | 9 | import networkx as nx 10 | 11 | REG_CLEAN1 = re.compile(r"""\W""", re.UNICODE) 12 | REG_CLEAN2 = re.compile(r"""\s+""", re.UNICODE) 13 | REG_CLEAN3 = re.compile(r"""\d""", re.UNICODE) 14 | 15 | COMPILED_REGEXPRS = {} 16 | 17 | NODE_XML_SKEL = \ 18 | """ 19 | 20 | {0} 21 | 22 | """ 23 | 24 | EDGE_XML_SKEL = \ 25 | """ 26 | 27 | {0} 28 | {1} 29 | {2} 30 | 31 | """ 32 | 33 | CONTEXT_MARKUP_XML_SKEL = \ 34 | """ 35 | 36 | {0} 37 | {1} 38 | 39 | {2} 40 | 41 | 42 | {3} 43 | 44 | 45 | """ 46 | 47 | 48 | def create_tag_id(): 49 | """ 50 | get a unique identifier 51 | """ 52 | return uuid.uuid1().int 53 | 54 | 55 | 56 | class ConTextMarkup(nx.DiGraph): 57 | """ 58 | base class for context document. 59 | build around markedTargets a list of termObjects representing desired terms 60 | found in text and markedModifiers, tagObjects found in the text 61 | """ 62 | 63 | 64 | def __init__(self, txt='', unicodeEncoding='utf-8'): 65 | """txt is the string to parse""" 66 | # __document capture the document level structure 67 | # for each sentence and then put in the archives when the next sentence 68 | # is processed 69 | super(ConTextMarkup, self).__init__(__txt=None, 70 | __rawtxt=txt, 71 | __scope=None, 72 | __SCOPEUPDATED=False) 73 | self.__document = nx.DiGraph() 74 | self.__document.add_node("top", category="document") 75 | self.__VERBOSE = False 76 | self.__tagID = 0 77 | self.__unicodeEncoding = unicodeEncoding 78 | 79 | 80 | def getUnicodeEncoding(self): 81 | """ 82 | return the unicode encoding used for the class 83 | """ 84 | return self.__unicodeEncoding 85 | 86 | 87 | def toggleVerbose(self): 88 | """toggles the boolean value for verbose mode""" 89 | self.__VERBOSE = not self.__VERBOSE 90 | 91 | 92 | def getVerbose(self): 93 | """ 94 | return the verbose setting 95 | """ 96 | return self.__VERBOSE 97 | 98 | 99 | def setRawText(self, txt=''): 100 | """ 101 | sets the current txt to txt and resets the current attributes to empty 102 | values, but does not modify the object archive 103 | """ 104 | if self.getVerbose(): 105 | print("Setting text to", txt) 106 | self.graph["__rawTxt"] = txt 107 | self.graph["__txt"] = None 108 | self.graph["__scope"] = None 109 | self.graph["__SCOPEUPDATED"] = False 110 | 111 | 112 | def getText(self): 113 | """ 114 | return the cleaned text values 115 | """ 116 | return self.graph.get("__txt", '') 117 | 118 | 119 | def getScope(self): 120 | """ 121 | return the scope of the markup 122 | """ 123 | return self.graph.get("__scope", '') 124 | 125 | 126 | def getScopeUpdated(self): 127 | """ 128 | return boolean whether the scope has been updated 129 | """ 130 | return self.graph.get("__SCOPEUPDATED") 131 | 132 | 133 | def getRawText(self): 134 | """ 135 | get the original (uncleaned) text 136 | """ 137 | return self.graph.get("__rawTxt", '') 138 | 139 | 140 | def getNumod_byerSentences(self): # !!! Need to rewrite this to match graph 141 | """ 142 | get the numod_byer o sentences in the context 143 | """ 144 | return len(self.__document) 145 | 146 | 147 | def cleanText(self, stripNonAlphaNumeric=False, stripNumod_byers=False): 148 | """Need to rename. applies the regular expression scrubbers to rawTxt""" 149 | if stripNonAlphaNumeric: 150 | txt = REG_CLEAN1.sub(" ", self.getRawText()) 151 | else: 152 | txt = self.getRawText() 153 | 154 | # clean up white spaces 155 | txt = REG_CLEAN2.sub(" ", txt) 156 | if stripNumod_byers: 157 | txt = REG_CLEAN3.sub("", txt) 158 | 159 | self.graph["__scope"] = (0, len(txt)) 160 | self.graph["__txt"] = txt 161 | if self.getVerbose(): 162 | print("cleaned text is now", self.getText()) 163 | 164 | 165 | def getXML(self): 166 | """ 167 | return an XML representation of the markup 168 | """ 169 | nodes = list(self.nodes(data=True)) 170 | nodes.sort() 171 | node_string = '' 172 | for n in nodes: 173 | attribute_string = '' 174 | keys = list(n[1].keys()) 175 | keys.sort() 176 | for k in keys: 177 | attribute_string += """<{0}> {1} \n""".format(k, n[1][k], k) 178 | modification_string = '' 179 | modified_by = self.predecessors(n[0]) 180 | if modified_by: 181 | for mod in modified_by: 182 | modification_string += """\n""" 183 | modification_string += \ 184 | """ %s \n"""%mod.getTagID() 185 | modification_string += \ 186 | """ %s \n"""%mod.getCategory() 187 | modification_string += """\n""" 188 | modifies = self.successors(n[0]) 189 | if modifies: 190 | for modified in modifies: 191 | modification_string += """\n""" 192 | modification_string += \ 193 | """ {0} \n""".format(modified.getTagID()) 194 | modification_string += \ 195 | """\n""" 196 | node_string += \ 197 | NODE_XML_SKEL.format(attribute_string+"{0}".format(n[0].getXML()) +\ 198 | modification_string) 199 | edges = list(self.edges(data=True)) 200 | edges.sort() 201 | edge_string = '' 202 | for edge in edges: 203 | keys = list(edge[2].keys()) 204 | keys.sort() 205 | attribute_string = '' 206 | for key in keys: 207 | attribute_string += """<{0}> {1} \n""".format(key, edge[2][key], key) 208 | edge_string += "{0}".format(EDGE_XML_SKEL.format(edge[0].getTagID(), 209 | edge[1].getTagID(), 210 | attribute_string)) 211 | 212 | return CONTEXT_MARKUP_XML_SKEL.format(xmlScrub(self.getRawText()), 213 | xmlScrub(self.getText()), 214 | node_string, 215 | edge_string) 216 | 217 | 218 | def __unicode__(self): 219 | txt = '_'*42+"\n" 220 | txt += 'rawText: {0}\n'.format(self.getRawText()) 221 | txt += 'cleanedText: {0}\n'.format(self.getText()) 222 | nodes = [n for n in self.nodes(data=True) if n[1].get('category', '') == 'target'] 223 | nodes.sort() 224 | for n in nodes: 225 | txt += "*"*32+"\n" 226 | txt += "TARGET: {0}\n".format(n[0].__unicode__()) 227 | modifiers = list(self.predecessors(n[0])) 228 | modifiers.sort() 229 | for mod in modifiers: 230 | txt += "-"*4+"MODIFIED BY: {0}\n".format(mod.__unicode__()) 231 | modifiers = self.predecessors(mod) 232 | if modifiers: 233 | for modifier in modifiers: 234 | txt += "-"*8+"MODIFIED BY: %s\n"%modifier.__unicode__() 235 | 236 | txt += "_"*42+"\n" 237 | return txt 238 | 239 | 240 | def __str__(self): 241 | return self.__unicode__() 242 | def __repr__(self): 243 | return self.__unicode__() 244 | 245 | 246 | def getConTextModeNodes(self, mode): 247 | """ 248 | get the numod_byer of nodes of type mode 249 | """ 250 | nodes = [n[0] for n in self.nodes(data=True) if n[1]['category'] == mode] 251 | nodes.sort() 252 | return nodes 253 | 254 | 255 | def updateScopes(self): 256 | """ 257 | update the scopes of all the marked modifiers in the txt. The scope 258 | of a modifier is limited by its own span, the span of modifiers in the 259 | same category marked in the text, and modifiers with rule 'terminate'. 260 | """ 261 | if self.getVerbose(): 262 | print("updating scopes") 263 | self.__SCOPEUPDATED = True 264 | # make sure each tag has its own self-limited scope 265 | modifiers = self.getConTextModeNodes("modifier") 266 | for modifier in modifiers: 267 | if self.getVerbose(): 268 | print("old scope for {0} is {1}".format(modifier.__str__(), modifier.getScope())) 269 | modifier.setScope() 270 | if self.getVerbose(): 271 | print("new scope for {0} is {1}".format(modifier.__str__(), modifier.getScope())) 272 | 273 | 274 | # Now limit scope based on the domains of the spans of the other 275 | # modifier 276 | for i in range(len(modifiers)-1): 277 | modifier = modifiers[i] 278 | for j in range(i+1, len(modifiers)): 279 | modifier2 = modifiers[j] 280 | if modifier.limitScope(modifier2) and \ 281 | modifier2.getRule().lower() == 'terminate': 282 | self.add_edge(modifier2, modifier) 283 | if modifier2.limitScope(modifier) and \ 284 | modifier.getRule().lower() == 'terminate': 285 | self.add_edge(modifier, modifier2) 286 | 287 | 288 | def markItems(self, items, mode="target"): 289 | """tags the sentence for a list of items 290 | items: a list of contextItems""" 291 | if not items: 292 | return 293 | for item in items: 294 | self.add_nodes_from(self.markItem(item, ConTextMode=mode), category=mode) 295 | 296 | 297 | def markItem(self, item, ConTextMode="target", ignoreCase=True): 298 | """ 299 | markup the current text with the current item. 300 | If ignoreCase is True (default), the regular expression is compiled with 301 | IGNORECASE.""" 302 | 303 | if not self.getText(): 304 | self.cleanText() 305 | 306 | # See if we have already created a regular expression 307 | 308 | if not item.getLiteral() in COMPILED_REGEXPRS: 309 | if not item.getRE(): 310 | reg_exp = r"\b{}\b".format(item.getLiteral()) 311 | if self.getVerbose(): 312 | print("generating regular expression", reg_exp) 313 | else: 314 | reg_exp = item.getRE() 315 | if self.getVerbose(): 316 | print("using provided regular expression", reg_exp) 317 | if ignoreCase: 318 | regex = re.compile(reg_exp, re.IGNORECASE|re.UNICODE) 319 | else: 320 | regex = re.compile(reg_exp, re.UNICODE) 321 | COMPILED_REGEXPRS[item.getLiteral()] = regex 322 | else: 323 | regex = COMPILED_REGEXPRS[item.getLiteral()] 324 | _iter = regex.finditer(self.getText()) 325 | terms = [] 326 | for i in _iter: 327 | tag_0 = tagObject(item, 328 | ConTextMode, 329 | tagid=create_tag_id(), 330 | scope=self.getScope()) 331 | 332 | tag_0.setSpan(i.span()) 333 | tag_0.setPhrase(i.group()) 334 | tag_0.setMatchedGroupDictionary(i.groupdict()) 335 | if self.getVerbose(): 336 | print("marked item", tag_0) 337 | terms.append(tag_0) 338 | return terms 339 | 340 | 341 | def pruneMarks(self): 342 | """ 343 | prune Marked objects by deleting any objects that lie within the span of 344 | another object. Currently modifiers and targets are treated separately 345 | """ 346 | self.__prune_marks(self.nodes(data=True)) 347 | 348 | 349 | def dropInactiveModifiers(self): 350 | """ 351 | drop modifiers that are not modifying any targets 352 | """ 353 | if self.getNumMarkedTargets() == 0: 354 | if self.getVerbose(): 355 | print("No targets in this sentence; dropping ALL modifiers.") 356 | mnodes = self.getConTextModeNodes("modifier") 357 | else: 358 | mnodes = [n for n in self.getConTextModeNodes("modifier") if self.degree(n) == 0] 359 | 360 | if self.getVerbose() and mnodes: 361 | print("dropping the following inactive modifiers") 362 | for node in mnodes: 363 | print(node) 364 | self.remove_nodes_from(mnodes) 365 | 366 | 367 | def pruneModifierRelationships(self): 368 | """Initially modifiers may be applied to multiple targets. This function 369 | computes the text difference between the modifier and each modified 370 | target and keeps only the minimum distance relationship 371 | 372 | Finally, we make sure that there are no self modifying modifiers present (e.g. "free" in 373 | the phrase "free air" modifying the target "free air"). 374 | """ 375 | modifiers = self.getConTextModeNodes("modifier") 376 | for modifier in modifiers: 377 | modified_by = self.successors(modifier) 378 | if modified_by and len(modified_by) > 1: 379 | minm = min([(modifier.dist(mod_by), mod_by) for mod_by in modified_by]) 380 | edgs = self.edges(modifier) 381 | edgs.remove((modifier, minm[1])) 382 | if self.getVerbose(): 383 | print("deleting relationship(s)", edgs) 384 | 385 | self.remove_edges_from(edgs) 386 | 387 | 388 | def pruneSelfModifyingRelationships(self): 389 | """ 390 | We make sure that there are no self modifying modifiers present (e.g. "free" in 391 | the phrase "free air" modifying the target "free air"). 392 | modifiers = self.getConTextModeNodes("modifier") 393 | """ 394 | modifiers = self.getConTextModeNodes("modifier") 395 | nodes_to_remove = [] 396 | for modifier in modifiers: 397 | modified_by = self.successors(modifier) 398 | if modified_by: 399 | for mod_by in modified_by: 400 | if self.getVerbose(): 401 | print(mod_by, modifier, mod_by.encompasses(modifier)) 402 | if mod_by.encompasses(modifier): 403 | nodes_to_remove.append(modifier) 404 | if self.getVerbose(): 405 | print("removing the following self modifying nodes", nodes_to_remove) 406 | self.remove_nodes_from(nodes_to_remove) 407 | 408 | 409 | def __prune_marks(self, _marks): 410 | if len(_marks) < 2: 411 | return 412 | # this can surely be done faster 413 | marks = list(_marks) 414 | marks.sort() 415 | nodes_to_remove = [] 416 | for i in range(len(marks)-1): 417 | mark1 = marks[i] 418 | if mark1[0] not in nodes_to_remove: 419 | for j in range(i+1, len(marks)): 420 | mark2 = marks[j] 421 | if mark1[0].encompasses(mark2[0]) and \ 422 | mark1[1]['category'] == mark2[1]['category']: 423 | nodes_to_remove.append(mark2[0]) 424 | elif mark2[0].encompasses(mark1[0]) and \ 425 | mark2[1]['category'] == mark1[1]['category']: 426 | nodes_to_remove.append(mark1[0]) 427 | break 428 | if self.getVerbose(): 429 | print("pruning the following nodes") 430 | for node in nodes_to_remove: 431 | print(node) 432 | self.remove_nodes_from(nodes_to_remove) 433 | 434 | 435 | def dropMarks(self, category="exclusion"): 436 | """Drop any targets that have the category equal to category""" 437 | if self.getVerbose(): 438 | print("in dropMarks") 439 | for n in self.nodes(): 440 | print(n.getCategory(), n.isA(category.lower())) 441 | dnodes = [n for n in self.nodes() if n.isA(category)] 442 | if self.getVerbose() and dnodes: 443 | print("droping the following markedItems") 444 | for n in dnodes: 445 | print(n) 446 | self.remove_nodes_from(dnodes) 447 | 448 | 449 | def applyModifiers(self): 450 | """ 451 | If the scope has not yet been updated, do this first. 452 | 453 | Loop through the marked targets and for each target apply the modifiers 454 | """ 455 | if not self.getScopeUpdated(): 456 | self.updateScopes() 457 | targets = self.getConTextModeNodes("target") 458 | modifiers = self.getConTextModeNodes("modifier") 459 | for target in targets: 460 | for modifier in modifiers: 461 | if modifier.applyRule(target): 462 | if self.getVerbose(): 463 | print("applying relationship between", modifier, target) 464 | 465 | self.add_edge(modifier, target) 466 | 467 | 468 | def getMarkedTargets(self): 469 | """ 470 | Return the list of marked targets in the current sentence. List is sorted by span 471 | """ 472 | targets = self.getConTextModeNodes("target") 473 | targets.sort() 474 | return targets 475 | 476 | 477 | def getNumMarkedTargets(self): 478 | """ 479 | Return the numod_byer of marked targets in the current sentence 480 | """ 481 | return len(self.getConTextModeNodes("target")) 482 | 483 | 484 | def getModifiers(self, node): 485 | """ 486 | return immediate predecessorts of node. The returned list is sorted by node span. 487 | """ 488 | modifiers = self.predecessors(node) 489 | modifiers.sort() 490 | return modifiers 491 | 492 | 493 | def isModifiedByCategory(self, node, queryCategory): 494 | """ 495 | tests whether node in markUp is modified by a tagObject 496 | with category equal to queryCategory. Return modifier if True 497 | """ 498 | predecessors = self.getModifiers(node) 499 | for predecessor in predecessors: 500 | if predecessor.isA(queryCategory): 501 | return True 502 | 503 | return False 504 | 505 | 506 | def getTokenDistance(self, node1, node2): 507 | """returns the numod_byer of tokens (word) between node1 and node2""" 508 | txt = self.getText() 509 | if node1 < node2: 510 | start = node1.getSpan()[1]+1 511 | end = node2.getSpan()[0] 512 | direction = 1 513 | else: 514 | start = node2.getSpan()[1]+1 515 | end = node1.getSpan()[0] 516 | direction = -1 517 | 518 | sub_txt = txt[start:end] 519 | tokens = sub_txt.split() 520 | return len(tokens)*direction 521 | -------------------------------------------------------------------------------- /pyConTextNLP/__init__.py: -------------------------------------------------------------------------------- 1 | #Copyright 2010 Brian E. Chapman 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | """This is an alternative implementation of the pyConText package where I make 15 | use of graphs to indicate relationships between targets and modifiers. Nodes of 16 | thegraphs are the targets and modifiers identified in the text; edges of the 17 | graphs are relationships between the targets. This provides for much simpler 18 | code than what exists in the other version of pyConText where each object has a 19 | dictionary of __modifies and __modifiedby that must be kept in sync with each 20 | other. 21 | 22 | Also it is hoped that the use of a directional graph could ultimately simplify 23 | our itemData structures as we could chain together items""" 24 | 25 | import os 26 | version = {} 27 | with open(os.path.join(os.path.dirname(__file__),"version.py")) as f0: 28 | exec(f0.read(), version) 29 | 30 | __version__ = version['__version__'] 31 | -------------------------------------------------------------------------------- /pyConTextNLP/display/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/pyConTextNLP/display/__init__.py -------------------------------------------------------------------------------- /pyConTextNLP/display/_bokeh.py: -------------------------------------------------------------------------------- 1 | """This module provides pyConText visualizaiton tools using Bokeh""" 2 | 3 | from bokeh.plotting import ColumnDataSource 4 | import bokeh.plotting as bp 5 | from bokeh.models import HoverTool 6 | from collections import OrderedDict 7 | import networkx as nx 8 | 9 | def graph2DataSource(g): 10 | 11 | tmp = [(n.getSpan(), 12 | n.getCategory(), 13 | n.getPhrase(), 14 | n.getTagID(), 15 | n.getLiteral(), 16 | n.getScope()) for n in g.nodes()] 17 | span, category, text, ids, literals, scopes = zip(*tmp) 18 | 19 | return ColumnDataSource(data=dict(text=text, 20 | literal=literals, 21 | id=ids, 22 | span=span, 23 | scope=scopes, 24 | category=category)) 25 | 26 | 27 | def graphDocumentBokeh(g, width=600, height=300, title=""): 28 | """ 29 | Returns a bokeh plotting figure of the pyConTextNLP graph g 30 | """ 31 | colors = {'target':'blue', 'modifier':'red'} 32 | TOOLS = "pan, box_zoom, reset, hover, previewsave" 33 | 34 | try: 35 | pos = nx.graphviz_layout(g) 36 | except: 37 | pos = nx.spring_layout(g) 38 | try: 39 | xs = [p[0] for p in pos.values()] 40 | ys = [p[1] for p in pos.values()] 41 | delta = 75 42 | minx, maxx = int(min(xs)-delta), int(max(xs)+delta) 43 | miny, maxy = int(min(ys)-delta), int(max(ys)+delta) 44 | radius = 0.1*(maxx-maxy) 45 | p = bp.figure(plot_width=width, plot_height=height, title="", 46 | x_axis_type=None, y_axis_type=None, 47 | x_range=[minx, maxx], 48 | y_range=[miny, maxy], 49 | min_border=0, outline_line_color=None, 50 | tools=TOOLS) 51 | xpos = [pos[n][0] for n in g.nodes()] 52 | ypos = [pos[n][1] for n in g.nodes()] 53 | tcolors = [colors[g.node[n]['category']] for n in g.nodes()] 54 | text = [n.getPhrase() for n in g.nodes()] 55 | source = graph2DataSource(g) 56 | for e in g.edges(): 57 | p.line([pos[e[0]][0], pos[e[1]][0]], 58 | [pos[e[0]][1], pos[e[1]][1]], 59 | line_cap="round", 60 | line_width=3, 61 | line_alpha=0.4) 62 | p.diamond([pos[e[1]][0]], [pos[e[1]][1]], 63 | alpha=0.4, size=[10]) 64 | p.text(xpos, ypos, 65 | text=text, text_color=tcolors, 66 | angle=0, text_font_size="12pt", 67 | text_align='center', text_baseline='middle') 68 | p.circle(xpos, ypos, 69 | radius=radius, source=source, 70 | fill_color=None, fill_alpha=0.1, line_color=None) 71 | hover = p.select(dict(type=HoverTool)) 72 | hover.tooltips = OrderedDict([ 73 | ("index", "$index"), 74 | ("id", "@id"), 75 | ("phrase", "@text"), 76 | ("literal", "@literal"), 77 | ("span", "@span"), 78 | ("scope", "@scope"), 79 | ("category", "@category"), 80 | ]) 81 | bp.show(p) 82 | except Exception as error: 83 | print(error, ": Cannot render graph with %d nodes and %d edges"% 84 | (g.number_of_nodes(), g.number_of_edges())) 85 | -------------------------------------------------------------------------------- /pyConTextNLP/display/_mpld3.py: -------------------------------------------------------------------------------- 1 | """This module provides pyConTextNLP visualization using matplotlib and mpld3""" 2 | 3 | import networkx as nx 4 | 5 | def graphDocument(g): 6 | """ """ 7 | try: 8 | pos=nx.graphviz_layout(g) 9 | except: 10 | pos=nx.spring_layout(g,iterations=20) 11 | 12 | nx.draw_networkx_edges(g,pos,alpha=0.3, edge_color='r') 13 | xpos = [pos[n][0] for n in g.nodes()] 14 | ypos = [pos[n][1] for n in g.nodes()] 15 | text = [n.getPhrase() for n in g.nodes()] 16 | -------------------------------------------------------------------------------- /pyConTextNLP/display/html.py: -------------------------------------------------------------------------------- 1 | """Module containing functions for generating various display options for pyConTextNLP""" 2 | import copy 3 | from ..utils import get_document_markups 4 | from ..utils import get_section_markups 5 | 6 | def __sort_by_span(_nodes): 7 | n = list(copy.copy(_nodes)) 8 | n.sort(key=lambda x: x.getSpan()) 9 | return n 10 | def __insert_color(txt,s,c): 11 | """insert HTML span style into txt. The span will change the color of the 12 | text located between s[0] and s[1]: 13 | txt: txt to be modified 14 | s: span of where to insert tag 15 | c: color to set the span to""" 16 | return txt[:s[0]]+''.format(c)+\ 17 | txt[s[0]:s[1]]+''+txt[s[1]:] 18 | 19 | def mark_text(txt,nodes,colors = {"name":"red","pet":"blue"},default_color="black"): 20 | if not nodes: 21 | return txt 22 | else: 23 | n = nodes.pop(-1) 24 | return mark_text(__insert_color(txt, 25 | n.getSpan(), 26 | colors.get(n.getCategory()[0],default_color)), 27 | nodes, 28 | colors=colors) 29 | 30 | def mark_document_with_html(doc,colors = {"name":"red","pet":"blue"}, default_color="black"): 31 | """takes a ConTextDocument object and returns an HTML paragraph with marked phrases in the 32 | object highlighted with the colors coded in colors 33 | 34 | doc: ConTextDocument 35 | colors: dictionary keyed by ConText category with values valid HTML colors 36 | 37 | """ 38 | return """

{0}

""".format(" ".join([mark_text(m.graph['__txt'], 39 | __sort_by_span(m.nodes()), 40 | colors=colors, 41 | default_color=default_color) for m in get_document_markups(doc)])) 42 | 43 | 44 | 45 | def mark_document_with_html_sections(doc,colors = {"name":"red","pet":"blue"}, default_color="black"): 46 | """takes a ConTextDocument object and returns 47 | a series of sections marked in HTML header tags followed by 48 | HTML paragraphs with marked phrases in the 49 | object highlighted with the colors coded in colors 50 | 51 | doc: ConTextDocument 52 | colors: dictionary keyed by ConText category with values valid HTML colors 53 | """ 54 | h = """""" 55 | 56 | for hierarchy in doc.getDocumentSections(): 57 | if hierarchy == 'document': 58 | continue 59 | for section in hierarchy: 60 | h += """

{0}

""".format(section) 61 | # print("## h2: ", section) 62 | # print("#### Nodes:", context.getSectionNodes(section)) 63 | # print("#### Marked up sentences:", context.getSectionMarkups(section)) 64 | # h += html.mark_document_with_html(context.getSectionMarkups(section),colors = colors, default_color="black") 65 | # print(h) 66 | # 67 | h += """

{0}

""".format(" ".join([mark_text( 68 | m.graph['__txt'], __sort_by_span(m.nodes()), colors=colors, default_color=default_color 69 | ) for m in get_section_markups(doc,section) 70 | ])) 71 | 72 | return h 73 | -------------------------------------------------------------------------------- /pyConTextNLP/helpers.py: -------------------------------------------------------------------------------- 1 | #Copyright 2010 Brian E. Chapman 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | """ 16 | The module defines a class sentenceSplitter that defines how sentence splitting is to be done including 17 | exception terms that include sentence termination terms but do not indicate a termination (e.g. Mrs.). 18 | The exception terms are contained in the attribute exceptionTerms. Terms can be added or deleted 19 | through the class methods addExceptionTerms and deleteExceptionTerms. A short list of default terms 20 | common in English texts are included in the attribute defaultExceptions. By default these are 21 | used when a sentenceSplitter instance is created. 22 | """ 23 | class sentenceSplitter(object): 24 | """Class for splitting sentences""" 25 | 26 | def __init__(self, useDefaults=True, useCaseVariants=True): 27 | """ 28 | useDefaults: Populate the exceptionTerms with default values 29 | useCaseVariants: add upper and lower case variants of terms also 30 | """ 31 | 32 | self.defaultExceptions = ['.','Dr.','Mr.','Mrs.','Ms.','M.D.', 'D.O.', 33 | 'Ph.D.','D.M.D.','R.N.','B.A.','A.B.', 34 | 'B.S.','M.S.','q.','viz.','e.g.'] 35 | self.exceptionTerms = set(()) 36 | 37 | self.digits = set('0123456789') 38 | if useDefaults: 39 | for term in self.defaultExceptions: 40 | try: 41 | self.exceptionTerms.add(term) 42 | if useCaseVariants: 43 | self.exceptionTerms.add(term.lower()) 44 | self.exceptionTerms.add(term.upper()) 45 | except TypeError: 46 | print("Terms must be of type string. You provided {0} which is a {1}".format(term,type(term))) 47 | 48 | def addExceptionTerms(self, *terms, **kwargs): #addCaseVariants=True): 49 | """add exception terms to list of terms not to terminate sentence at. 50 | If keyword argument addCaseVariants = True is provided, then also add the lower and upper case variants to the list """ 51 | addCaseVariants = kwargs.pop('addCaseVariants', False) 52 | for t in terms: 53 | self.exceptionTerms.add(t) 54 | if addCaseVariants: 55 | self.exceptionTerms.add(t.lower()) 56 | self.exceptionTerms.add(t.upper()) 57 | 58 | def getExceptionTerms(self): 59 | return self.exceptionTerms 60 | def deleteExceptionTerms(self,*terms, **kwargs): #deleteCaseVariants=True): 61 | """delete exception terms from list of terms not to terminate sentence at. 62 | If keyword argument deleteCaseVariants = True is provided, then also delete the lower and upper case variants from the list""" 63 | deleteCaseVariants = kwargs.pop('deleteCaseVariants', False) 64 | for t in terms: 65 | self.exceptionTerms.discard(t) 66 | if deleteCaseVariants: 67 | self.exceptionTerms.discard(t.lower()) 68 | self.exceptionTerms.discard(t.upper()) 69 | 70 | def splitSentences(self,txt): 71 | """ 72 | Splt txt into sentences a list of sentences is returned 73 | """ 74 | txt = txt.split() 75 | sentences = [] 76 | wordLoc = 0 77 | 78 | while wordLoc < len(txt): 79 | currentWord = txt[wordLoc] 80 | if currentWord[-1] in '.?!': 81 | if currentWord in self.exceptionTerms: 82 | wordLoc += 1 83 | # per discussion with A.G. dropped this exception, since assuming numbers only use decimal points if there 84 | # are actual decimal point digits expressed and thus the period would not be the last character of the word. 85 | #elif( self.digits.intersection(currentWord) and 86 | #not set('()').intersection(currentWord)): # word doesn't include parentheses. Is this necessary? 87 | #wordLoc += 1 88 | else: 89 | sentences.append(' '.join(txt[:wordLoc+1])) 90 | txt = txt[wordLoc+1:] 91 | wordLoc = 0 92 | else: 93 | wordLoc += 1 94 | 95 | # if any texts remains (due to failure to identify a final sentence termination, 96 | # then take all remaining text and put into a sentence 97 | if txt: 98 | sentences.append(' '.join(txt) ) 99 | 100 | return sentences 101 | -------------------------------------------------------------------------------- /pyConTextNLP/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/pyConTextNLP/io/__init__.py -------------------------------------------------------------------------------- /pyConTextNLP/io/xml.py: -------------------------------------------------------------------------------- 1 | """ 2 | module for creating XML files 3 | """ 4 | import re 5 | 6 | rlt = re.compile(r"""<""", re.UNICODE) 7 | ramp = re.compile(r"""&""", re.UNICODE) 8 | 9 | 10 | def xmlScrub(tmp): 11 | return rlt.sub(r"<",ramp.sub(r"&",u"{0}".format(tmp))) 12 | 13 | -------------------------------------------------------------------------------- /pyConTextNLP/itemData.py: -------------------------------------------------------------------------------- 1 | #Copyright 2010 Brian E. Chapman 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | 15 | """ 16 | A module defining the contextItem class. 17 | """ 18 | import yaml 19 | import urllib.request, urllib.error, urllib.parse 20 | 21 | 22 | def _get_fileobj(_file): 23 | if not urllib.parse.urlparse(_file).scheme: 24 | _file = "file://"+_file 25 | return urllib.request.urlopen(_file, data=None) 26 | 27 | def get_items(_file): 28 | f0 = _get_fileobj(_file) 29 | context_items = [contextItem((d["Lex"], 30 | d["Type"], 31 | r"%s"%d["Regex"], 32 | d["Direction"])) for d in yaml.load_all(f0)] 33 | f0.close() 34 | return context_items 35 | 36 | 37 | class contextItem(object): 38 | 39 | 40 | def __init__(self, args): 41 | self.__literal = args[0] 42 | cs = args[1].split(",") 43 | self.__category = [] 44 | for c in cs: 45 | self.__category.append(c.lower().strip()) 46 | self.__re = r"%s"%args[2] # I need to figure out how to read this raw string in properly 47 | self.__rule = args[3].lower() 48 | 49 | # generate regex from literal if no regex provided 50 | if not self.__re: 51 | self.__re = r"\b{}\b".format(self.__literal) 52 | 53 | def getLiteral(self): 54 | """return the literal associated with this item""" 55 | return self.__literal 56 | def getCategory(self): 57 | """return the list of categories associated with this item""" 58 | return self.__category[:] 59 | def categoryString(self): 60 | """return the categories as a string delimited by '_'""" 61 | return '_'.join(self.__category) 62 | 63 | 64 | def isA(self,testCategory): 65 | """test whether testCategory is one of the categories associated with self""" 66 | try: 67 | return testCategory.lower().strip() in self.__category 68 | except: 69 | for tc in testCategory: 70 | if( tc.lower().strip() in self.__category ): 71 | return True 72 | return False 73 | 74 | def getRE(self): 75 | return self.__re 76 | def getRule(self): 77 | return self.__rule 78 | def __str__(self): 79 | txt = """literal<<{0}>>; category<<{1}>>; re<<{2}>>; rule<<{3}>>""".format( 80 | self.__literal,self.__category,self.__re, self.__rule) 81 | return txt 82 | def __repr__(self): 83 | return self.__str__() 84 | 85 | -------------------------------------------------------------------------------- /pyConTextNLP/pyConText.py: -------------------------------------------------------------------------------- 1 | #Copyright 2010 Brian E. Chapman 2 | # 3 | #Licensed under the Apache License, Version 2.0 (the "License"); 4 | #you may not use this file except in compliance with the License. 5 | #You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | #Unless required by applicable law or agreed to in writing, software 10 | #distributed under the License is distributed on an "AS IS" BASIS, 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | #See the License for the specific language governing permissions and 13 | #limitations under the License. 14 | """ 15 | This module contains three class definitions that are used in the pyConText 16 | algorithm. The pyConText algorithm relies on regular expressions to identify 17 | sub-texts of interest 18 | 19 | 1) termObject: a class that describes terms of interest within the text 20 | 2) tagObject: a class inherited from termObject that describes modifiers 21 | 3) pyConText: a class that implements the context algorithm 22 | 23 | """ 24 | import re 25 | from .ConTextMarkup import ConTextMarkup 26 | from .io.xml import xmlScrub 27 | import networkx as nx 28 | 29 | 30 | ConTextDocumentXMLSkel=\ 31 | """ 32 | 33 | {0} 34 | 35 | """ 36 | 37 | 38 | class ConTextDocument(object): 39 | """ 40 | base class for context. 41 | build around markedTargets a list of termObjects representing desired terms 42 | found in text and markedModifiers, tagObjects found in the text 43 | """ 44 | rb = re.compile(r"""\b""",re.UNICODE) 45 | def __init__(self,unicodeEncoding='utf-8'): 46 | """txt is the string to parse""" 47 | # __document capture the document level structure 48 | # for each sentence and then put in the archives when the next sentence 49 | # is processed 50 | self.__unicodeEncoding = unicodeEncoding 51 | self.__document = nx.DiGraph() 52 | self.__currentSentenceNum = 0 53 | self.__currentSectionNum = 0 54 | self.__document.add_node("document", category="section", __sectionNumber = self.__currentSectionNum) 55 | self.__currentSectionNum += 1 56 | self.__currentParent = "document" 57 | self.__root = "document" 58 | self.__documentGraph = None 59 | 60 | def insertSection(self,sectionLabel,setToParent=False): 61 | self.__document.add_edge(self.__currentParent,sectionLabel,category="section",__sectionNumber=self.__currentSectionNum) 62 | self.__currentSectionNum += 1 63 | if setToParent: 64 | self.__currentParent = sectionLabel 65 | 66 | def getDocument(self): 67 | return self.__document 68 | def getCurrentSentenceNumber(self): 69 | return self.__currentSentenceNum 70 | def getCurrentSectionNumber(self): 71 | return self.__currentSectionNum 72 | def setParent(self, label=None): 73 | self.__currentParent = label 74 | def getCurrentparent(self): 75 | return self.__currentParent 76 | def addSectionattributes(self,**kwargs): 77 | for key in kwargs.keys(): 78 | self.__document.node[self.__currentParent][key] = kwargs[key] 79 | def getUnicodeEncoding(self): 80 | return self.__unicodeEncoding 81 | 82 | def addMarkup(self, markup): 83 | """ 84 | add the markup as a node in the document attached to the current parent. 85 | """ 86 | # I'm not sure if I want to be using copy here 87 | self.__document.add_edge(self.__currentParent,markup, 88 | category="markup", 89 | sentenceNumber=self.__currentSentenceNum) 90 | 91 | self.__currentSentenceNum += 1 92 | def retrieveMarkup(self,sentenceNumber): 93 | """ 94 | retrieve the markup corresponding to sentenceNumber 95 | """ 96 | edge = [e for e in self.__document.edges(data=True) if e[2]['category'] == "markup" and e[2]['sentenceNumber'] == sentenceNumber] 97 | if edge: 98 | return edge[0] 99 | 100 | def getSectionNodes(self,sectionLabel = None, category="markup"): 101 | if not sectionLabel: 102 | sectionLabel = self.__currentParent 103 | successors = [(e[2]['__sectionNumber'],e[1]) for e in self.__document.out_edges(sectionLabel, data=True) 104 | if e[2].get("category") == category] 105 | successors.sort() 106 | tmp = list(zip(*successors)) 107 | return tmp[1] 108 | 109 | def getSectionMarkups(self, sectionLabel = None, returnSentenceNumbers=True ): 110 | """return the markup graphs for the section ordered by sentence number""" 111 | if not sectionLabel: 112 | sectionLabel = self.__currentParent 113 | successors = [(e[2]['sentenceNumber'],e[1]) for e in self.__document.out_edges(sectionLabel, data=True) 114 | if e[2].get("category") == "markup"] 115 | successors.sort() 116 | if returnSentenceNumbers: 117 | return successors 118 | else: 119 | tmp = list(zip(*successors)) 120 | return tmp[1] 121 | 122 | def getDocumentSections(self): 123 | edges = [ (e[2]['__sectionNumber'],e[1]) for e in self.__document.edges(data=True) if e[2].get("category") == "section"] 124 | edges.sort() 125 | tmp = list(zip(*edges)) 126 | if len(tmp) > 1: 127 | tmp = [self.__root, tmp[1]] 128 | else: 129 | tmp = [self.__root] 130 | return tmp 131 | 132 | def getSectionText(self,sectionLabel = None ): 133 | """ 134 | """ 135 | markups = self.getSectionMarkups(sectionLabel,returnSentenceNumbers = False) 136 | txt = " ".join([ m.getText() for m in markups]) 137 | return txt 138 | 139 | def getDocumentGraph(self): 140 | if not self.__documentGraph: 141 | self.computeDocumentGraph() 142 | return self.__documentGraph 143 | 144 | def getXML(self): 145 | txt = "" 146 | # first generate string for all the sentences from the document in order to compute document level offsets 147 | documentString = "" 148 | sentenceOffsets = {} 149 | sections = self.getDocumentSections() 150 | for s in sections: 151 | markups = self.getSectionMarkups(s) 152 | for m in markups: 153 | sentenceOffsets[m[0]] = len(documentString) 154 | documentString = documentString + m[1].getText()+" " 155 | 156 | txt += xmlScrub(documentString) 157 | # get children sections of root 158 | 159 | 160 | for s in sections: 161 | txt += """
\n {0} \n""".format(s) 162 | markups = self.getSectionMarkups(s) 163 | for m in markups: 164 | txt += "\n %d \n %d \n%s"%( 165 | (m[0],sentenceOffsets[m[0]],m[1].getXML())) 166 | txt += """
\n""" 167 | 168 | return ConTextDocumentXMLSkel.format(txt) 169 | def __unicode__(self): 170 | txt = '_'*42+"\n" 171 | return txt 172 | def __str__(self): 173 | return self.__unicode__() 174 | def __repr__(self): 175 | return self.__unicode__()#.encode('utf-8') 176 | 177 | def computeDocumentGraph(self, verbose=False): 178 | """Create a single document graph from the union of the graphs created 179 | for each sentence in the archive. Note that the algorithm in NetworkX 180 | is different based on whether the Python version is greater than or 181 | equal to 2.6""" 182 | # Note that this as written does not include the currentGraph in the DocumentGraph 183 | # Maybe this should be changed 184 | self.__documentGraph = ConTextMarkup() 185 | if verbose: 186 | print("Document markup has {0d} edges".format(self.__document.number_of_edges())) 187 | markups = [e[1] for e in self.__document.edges(data=True) if e[2].get('category') == 'markup'] 188 | if verbose: 189 | print("Document markup has {0d} conTextMarkup objects".format(len(markups))) 190 | for i in range(len(markups)): 191 | #for m in markups: 192 | m = markups[i] 193 | if verbose: 194 | print("markup {0d} has {1d} total items including {2d} targets".format(i,m.number_of_nodes(),m.getNumMarkedTargets())) 195 | 196 | self.__documentGraph = nx.union(m,self.__documentGraph) 197 | if verbose: 198 | print("documentGraph now has {0d} nodes".format(self.__documentGraph.number_of_nodes())) 199 | 200 | 201 | 202 | 203 | -------------------------------------------------------------------------------- /pyConTextNLP/tagObject.py: -------------------------------------------------------------------------------- 1 | """ 2 | tabObject module 3 | """ 4 | 5 | import uuid 6 | import copy 7 | from .io.xml import xmlScrub 8 | 9 | tagObjectXMLSkel=\ 10 | """ 11 | 12 | {0} 13 | {1} 14 | {2} 15 | {3} 16 | {4:d} 17 | {5:d} 18 | {6:d} 19 | {7:d} 20 | 21 | """ 22 | 23 | 24 | class tagObject(object): 25 | """ 26 | A class that describes terms of interest in the text. 27 | tagObject is characterized by the following attributes 28 | 1) The contextItem defining the tag 29 | 3) The location of the tag within the text being parsed 30 | 31 | """ 32 | def __init__(self, item, ConTextCategory, scope=None, tagid=None, **kwargs): 33 | """ 34 | item: contextItem used to generate term 35 | ConTextCategory: category this term is being used for in pyConText 36 | 37 | variants 38 | """ 39 | self.__item = item 40 | self.__category = self.__item.getCategory() 41 | self.__spanStart = 0 42 | self.__spanEnd = 0 43 | self.__foundPhrase = '' 44 | self.__foundDict = {} 45 | self.__ConTextCategory = ConTextCategory 46 | if not tagid: 47 | tagid = uuid.uid1().int 48 | self.__tagID = tagid 49 | if scope == None: 50 | self.__scope = [] 51 | else: 52 | self.__scope = list(scope) 53 | self.__SCOPEUPDATED = False 54 | 55 | 56 | def setScope(self): 57 | """ 58 | applies the objects own rule and span to modify the object's scope 59 | Currently only "forward" and "backward" rules are implemented 60 | """ 61 | 62 | if 'forward' in self.__item.getRule().lower(): 63 | self.__scope[0] = self.getSpan()[1] 64 | elif 'backward' in self.__item.getRule().lower(): 65 | self.__scope[1] = self.getSpan()[0] 66 | 67 | 68 | def getTagID(self): 69 | return self.__tagID 70 | 71 | 72 | def parseRule(self): 73 | """parse the rule for the associated""" 74 | pass 75 | 76 | 77 | def getScope(self): 78 | return self.__scope 79 | 80 | 81 | def getRule(self): 82 | return self.__item.getRule() 83 | 84 | 85 | def limitScope(self, obj): 86 | """If self and obj are of the same category or if obj has a rule of 87 | 'terminate', use the span of obj to 88 | update the scope of self 89 | returns True if a obj modified the scope of self""" 90 | if not self.getRule() or self.getRule()== 'terminate' or \ 91 | (not self.isA(obj.getCategory()) and obj.getRule() != 'terminate'): 92 | return False 93 | originalScope = copy.copy((self.getScope())) 94 | if 'forward' in self.getRule().lower() or \ 95 | 'bidirectional' in self.getRule().lower(): 96 | if obj > self: 97 | self.__scope[1] = min(self.__scope[1],obj.getSpan()[0]) 98 | elif 'backward' in self.getRule().lower() or \ 99 | 'bidirectional' in self.getRule().lower(): 100 | if obj < self: 101 | self.__scope[0] = max(self.__scope[0],obj.getSpan()[1]) 102 | if originalScope != self.__scope: 103 | return True 104 | else: 105 | return False 106 | 107 | 108 | def applyRule(self, term): 109 | """applies self's rule to term. If the start of term lines within 110 | the span of self, then term may be modified by self""" 111 | if not self.getRule() or self.getRule() == 'terminate': 112 | return False 113 | if self.__scope[0] <= term.getSpan()[0] <= self.__scope[1]: 114 | return True 115 | 116 | 117 | def getConTextCategory(self): 118 | return self.__ConTextCategory 119 | 120 | 121 | def getXML(self): 122 | return tagObjectXMLSkel.format(self.getTagID(),xmlScrub(self.getPhrase()), 123 | xmlScrub(self.getLiteral()),xmlScrub(self.getCategory()), 124 | self.getSpan()[0],self.getSpan()[1], 125 | self.getScope()[0],self.getScope()[1]) 126 | 127 | 128 | def getBriefDescription(self): 129 | description = u""" {0} """.format(self.getTagID()) 130 | description+= u""" {0} """.format(self.getPhrase()) 131 | description+= u""" {0} """.format(self.getCategory()) 132 | return description 133 | 134 | 135 | def getLiteral(self): 136 | """returns the term defining this object""" 137 | return self.__item.getLiteral() 138 | 139 | 140 | def getCategory(self): 141 | """returns the category (e.g. CONJUNCTION) for this object""" 142 | return self.__category[:] 143 | 144 | 145 | def categoryString(self): 146 | return u'_'.join(self.__category) 147 | 148 | 149 | def isA(self,category): 150 | return self.__item.isA(category) 151 | 152 | 153 | def setCategory(self,category): 154 | self.__category = category 155 | 156 | 157 | def replaceCategory(self,oldCategory, newCategory): 158 | for index, item in enumerate(self.__category): 159 | if item == oldCategory.lower().strip(): 160 | try: 161 | self.__category[index] = newCategory.lower().strip() 162 | except: 163 | del self.__category[index] 164 | self.__category.extend([nc.lower().strip() for nc in newCategory]) 165 | 166 | 167 | def setSpan(self, span): 168 | """set the span within the associated text for this object""" 169 | self.__spanStart = span[0] 170 | self.__spanEnd = span[1] 171 | 172 | 173 | def getSpan(self): 174 | """return the span within the associated text for this object""" 175 | return self.__spanStart,self.__spanEnd 176 | 177 | def setPhrase(self, phrase): 178 | """set the actual matched phrase used to generate this object""" 179 | self.__foundPhrase = phrase 180 | 181 | 182 | def getPhrase(self): 183 | """return the actual matched phrase used to generate this object""" 184 | return self.__foundPhrase 185 | 186 | 187 | def setMatchedGroupDictionary(self, mdict): 188 | """set the foundDict variable to mdict. This gets the name/value pair for each NAMED group within the regular expression""" 189 | self.__foundDict = mdict.copy() 190 | 191 | 192 | def getMatchedGroupDictionary(self): 193 | """return a copy of the matched group dictionary""" 194 | return self.__foundDict.copy() 195 | 196 | 197 | def dist(self, obj): 198 | """returns the minimum distance from the current object and obj. 199 | Distance is measured as current start to object end or current end to object start""" 200 | return min(abs(self.__spanEnd-obj.__spanStart), abs(self.__spanStart-obj.__spanEnd)) 201 | 202 | def __lt__(self, other): return self.__spanStart < other.__spanStart 203 | def __le__(self, other): return self.__spanStart <= other.__spanStart 204 | def __eq__(self, other): 205 | return (self.__spanStart == other.__spanStart and 206 | self.__spanEnd == other.__spanEnd) 207 | def __ne__(self, other): return self.__spanStart != other.__spanStart 208 | def __gt__(self, other): return self.__spanStart > other.__spanStart 209 | def __ge__(self, other): return self.__spanStart >= other.__spanStart 210 | 211 | def __hash__(self): 212 | return hash(repr(self)) 213 | 214 | 215 | def encompasses(self, other): 216 | """tests whether other is completely encompassed with the current object 217 | ??? should we not prune identical span tagObjects???""" 218 | if self.__spanStart <= other.__spanStart and \ 219 | self.__spanEnd >= other.__spanEnd: 220 | return True 221 | else: 222 | return False 223 | 224 | 225 | def overlap(self, other): 226 | """ 227 | tests whether other overlaps with self 228 | """ 229 | if (other.__spanStart >= self.__spanStart and other.__spanStart <= self.__spanEnd ) or \ 230 | (other.__spanEnd >= self.__spanStart and other.__spanEnd <= self.__spanEnd): 231 | return True 232 | else: 233 | return False 234 | 235 | 236 | def leftOverlap(self, other): 237 | """ 238 | tests whether other has partial overlap to the left with self. 239 | """ 240 | if self.encompasses(other): 241 | return False 242 | if self.overlap(other) and self.__gt__(other): 243 | return True 244 | else: 245 | return False 246 | 247 | 248 | def rightOverlap(self, other): 249 | """ 250 | tests whether other has partial overlap to the right with self 251 | """ 252 | if self.encompasses(other): 253 | return False 254 | if self.overlap(other) and self.__lt__(other): 255 | return True 256 | else: 257 | return False 258 | 259 | 260 | def __unicode__(self): 261 | txt = self.getBriefDescription() 262 | return txt 263 | 264 | 265 | def __str__(self): 266 | return self.__unicode__() 267 | def __repr__(self): 268 | return self.__unicode__() 269 | 270 | 271 | 272 | -------------------------------------------------------------------------------- /pyConTextNLP/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/pyConTextNLP/tests/__init__.py -------------------------------------------------------------------------------- /pyConTextNLP/tests/test_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | """Modifying the testing structure to include a class setup and teardown""" 5 | from unittest import TestCase 6 | import warnings 7 | 8 | 9 | class TestClass(TestCase): 10 | def setUp(self): 11 | """setUp is called before each test is run, tearDown is called after""" 12 | pass 13 | def tearDown(self): 14 | pass 15 | -------------------------------------------------------------------------------- /pyConTextNLP/tests/test_contextitem.py: -------------------------------------------------------------------------------- 1 | import pyConTextNLP.itemData as itemData 2 | import pytest 3 | 4 | @pytest.fixture(scope="module") 5 | def items(): 6 | 7 | return [ ["pulmonary embolism", 8 | "PULMONARY_EMBOLISM", 9 | r"""pulmonary\s(artery )?(embol[a-z]+)""", 10 | ""], 11 | ["no gross evidence of", 12 | "PROBABLE_NEGATED_EXISTENCE", 13 | "", 14 | "forward"]] 15 | 16 | def test_instantiate_contextItem0(items): 17 | for item in items: 18 | assert itemData.contextItem(item) 19 | 20 | 21 | def test_contextItem_rule(items): 22 | cti = itemData.contextItem(items[1]) 23 | 24 | assert cti.getRule() == "forward" 25 | 26 | 27 | def test_contextItem_literal(items): 28 | cti = itemData.contextItem(items[0]) 29 | 30 | assert cti.getLiteral() == "pulmonary embolism" 31 | 32 | 33 | def test_contextItem_category(items): 34 | cti = itemData.contextItem(items[1]) 35 | assert cti.getCategory() == ["probable_negated_existence"] 36 | 37 | def test_contextItem_isa(items): 38 | cti = itemData.contextItem(items[0]) 39 | assert cti.isA("pulmonary_embolism") 40 | 41 | 42 | def test_contextItem_isa1(items): 43 | cti = itemData.contextItem(items[0]) 44 | assert cti.isA("PULMONARY_EMBOLISM") 45 | 46 | 47 | def test_contextItem_isa2(items): 48 | cti = itemData.contextItem(items[1]) 49 | assert cti.isA("PROBABLE_NEGATED_EXISTENCE") 50 | 51 | 52 | def test_contextItem_getRE(items): 53 | cti = itemData.contextItem(items[1]) 54 | assert cti.getRE() == r'\b%s\b'%items[1][0] 55 | 56 | 57 | def test_contextItem_getRE1(items): 58 | cti = itemData.contextItem(items[0]) 59 | assert cti.getRE() == r"""pulmonary\s(artery )?(embol[a-z]+)""" 60 | 61 | -------------------------------------------------------------------------------- /pyConTextNLP/tests/test_contextmarkup.py: -------------------------------------------------------------------------------- 1 | from pyConTextNLP.ConTextMarkup import ConTextMarkup 2 | import pytest 3 | 4 | @pytest.fixture(scope="module") 5 | def sent1(): 6 | return 'kanso **diabetes** utesl\xf6t eller diabetes men inte s\xe4kert. Vi siktar p\xe5 en r\xf6ntgenkontroll. kan det vara nej panik\xe5ngesten\n?' 7 | 8 | @pytest.fixture(scope="module") 9 | def sent2(): 10 | return 'IMPRESSION: 1. LIMITED STUDY DEMONSTRATING NO GROSS EVIDENCE OF SIGNIFICANT PULMONARY EMBOLISM.' 11 | @pytest.fixture(scope="module") 12 | def sent3(): 13 | return 'This is a sentence that does not end with a number. But this sentence ends with 1. So this should be recognized as a third sentence.' 14 | 15 | @pytest.fixture(scope="module") 16 | def sent4(): 17 | return 'This is a sentence with a numeric value equal to 1.43 and should not be split into two parts.' 18 | 19 | @pytest.fixture(scope="module") 20 | def items(): 21 | return [ ["pulmonary embolism", 22 | "PULMONARY_EMBOLISM", 23 | r"""pulmonary\s(artery )?(embol[a-z]+)""", 24 | ""], 25 | ["no gross evidence of", 26 | "PROBABLE_NEGATED_EXISTENCE", 27 | "", 28 | "forward"]] 29 | 30 | def test_setRawText1(sent1): 31 | context = ConTextMarkup() 32 | context.setRawText(sent1) 33 | assert context.getRawText() == sent1 34 | 35 | def test_scrub_preserve_unicode(sent1): 36 | context = ConTextMarkup() 37 | context.setRawText(sent1) 38 | context.cleanText(stripNonAlphaNumeric=True) 39 | assert context.getText().index(u'\xf6') == 40 40 | 41 | def test_scrub_text(sent2): 42 | context = ConTextMarkup() 43 | context.setRawText(sent2) 44 | context.cleanText(stripNonAlphaNumeric=True) 45 | assert context.getText().rfind(u'.') == -1 46 | -------------------------------------------------------------------------------- /pyConTextNLP/tests/test_env.py: -------------------------------------------------------------------------------- 1 | def test_yaml(): 2 | import yaml 3 | assert yaml 4 | 5 | def test_networkx(): 6 | import networkx as nx 7 | assert nx 8 | 9 | def test_networkx_v2x(): 10 | import networkx as nx 11 | assert nx.__version__[0] == '2' 12 | -------------------------------------------------------------------------------- /pyConTextNLP/tests/test_helpers.py: -------------------------------------------------------------------------------- 1 | import pyConTextNLP.helpers as helpers 2 | import pytest 3 | 4 | @pytest.fixture(scope="module") 5 | def splitter(): 6 | return helpers.sentenceSplitter() 7 | 8 | 9 | def test_createSentenceSplitter(): 10 | assert helpers.sentenceSplitter() 11 | 12 | 13 | def test_getExceptionTerms(splitter): 14 | assert splitter.getExceptionTerms() 15 | 16 | 17 | def test_addExceptionTermsWithoutCaseVariants(splitter): 18 | splitter.addExceptionTerms("D.D.S.", "D.O.") 19 | assert ("D.O." in splitter.getExceptionTerms()) 20 | #assert ("d.o." in splitter.getExceptionTerms()) 21 | 22 | 23 | def test_addExceptionTermsWithCaseVariants(splitter): 24 | splitter.addExceptionTerms("D.D.S.", "D.O.",addCaseVariants=True) 25 | assert ("d.o." in splitter.getExceptionTerms()) 26 | 27 | 28 | def test_deleteExceptionTermsWithoutCaseVariants(splitter): 29 | splitter.deleteExceptionTerms("M.D.") 30 | assert ("M.D." not in splitter.getExceptionTerms()) 31 | assert ("m.d." in splitter.getExceptionTerms()) 32 | -------------------------------------------------------------------------------- /pyConTextNLP/tests/test_itemData.py: -------------------------------------------------------------------------------- 1 | import pyConTextNLP.itemData as itemData 2 | from pathlib import PurePath 3 | import os 4 | import pytest 5 | 6 | 7 | @pytest.fixture(scope="session") 8 | def get_tmp_dirs(): 9 | pass 10 | 11 | def test_get_fileobj_1(): 12 | fobj = PurePath(PurePath(os.path.abspath(__file__)).parent, "..", "..", "KB", "test.yml") 13 | yaml_fo = itemData.get_fileobj(str(fobj)) 14 | assert yaml_fo 15 | 16 | def test_get_fileobj_2(): 17 | wdir = PurePath(os.path.abspath(__file__))#, "..", "..", "KB") 18 | fobj = PurePath(wdir.parent, "..", "..", "KB", "test.yml") 19 | yfo = itemData.get_fileobj("file://"+str(fobj)) 20 | assert yfo 21 | 22 | def test_get_fileobj_3(): 23 | yfo = itemData.get_fileobj( 24 | "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/test.yml") 25 | assert yfo 26 | -------------------------------------------------------------------------------- /pyConTextNLP/utils.py: -------------------------------------------------------------------------------- 1 | """ """ 2 | 3 | def get_document_markups(document): 4 | """ Given a ConTextDocument return an ordered list of the ConTextmarkup objects consistituting the document""" 5 | tmp = [(e[1],e[2]['sentenceNumber']) for e in document.getDocument().edges(data=True) if 6 | e[2].get('category') == 'markup'] 7 | tmp.sort(key=lambda x:x[1]) 8 | return [t[0] for t in tmp] 9 | 10 | def get_section_markups(document, sectionLabel): 11 | """ Given a ConTextDocument and sectionLabel, return an ordered list of the ConTextmarkup objects in that section""" 12 | tmp = [(e[1],e[2]['sentenceNumber']) for e in document.getDocument().out_edges(sectionLabel, data=True) if 13 | e[2].get('category') == 'markup'] 14 | tmp.sort(key=lambda x:x[1]) 15 | return [t[0] for t in tmp] 16 | 17 | def conceptInDocument(document, concept): 18 | """tests whether concept is in any nodes of document""" 19 | pass 20 | -------------------------------------------------------------------------------- /pyConTextNLP/version.py: -------------------------------------------------------------------------------- 1 | __version__="0.7.0.0" 2 | -------------------------------------------------------------------------------- /requirements-py2.txt: -------------------------------------------------------------------------------- 1 | unicodecsv 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = 1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages # Always prefer setuptools over distutils 2 | from codecs import open # To use a consistent encoding 3 | from os import path 4 | 5 | version = {} 6 | here = path.abspath(path.dirname(__file__)) 7 | 8 | 9 | with open(path.join("pyConTextNLP","version.py")) as f0: 10 | exec(f0.read(), version) 11 | print(version) 12 | 13 | # Get the long description from the relevant file 14 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f: 15 | long_description = f.read() 16 | 17 | setup( 18 | name='pyConTextNLP', 19 | 20 | # Versions should comply with PEP440. For a discussion on single-sourcing 21 | # the version across setup.py and the project code, see 22 | # https://packaging.python.org/en/latest/single_source_version.html 23 | version=version["__version__"], 24 | 25 | description='A Python implementation of the ConText algorithm', 26 | long_description=long_description, 27 | 28 | # The project's main homepage. 29 | url='https://github.com/chapmanbe/pyConTextNLP', 30 | 31 | # Author details 32 | author='Brian Chapman', 33 | 34 | author_email='brian.chapman@utah.edu', 35 | 36 | # Choose your license 37 | license='http://www.apache.org/licenses/LICENSE-2.0', 38 | 39 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 40 | classifiers=[ 41 | # How mature is this project? Common values are 42 | # 3 - Alpha 43 | # 4 - Beta 44 | # 5 - Production/Stable 45 | 'Development Status :: 4 - Beta', 46 | 47 | # Indicate who your project is intended for 48 | #'Intended Audience :: Students', 49 | #'Topic :: Software Development :: Build Tools', 50 | 51 | # Pick your license as you wish (should match "license" above) 52 | #'License :: OSI Approved :: Apache2', 53 | 54 | # Specify the Python versions you support here. In particular, ensure 55 | # that you indicate whether you support Python 2, Python 3 or both. 56 | #'Programming Language :: Python :: 2', 57 | #'Programming Language :: Python :: 2.6', 58 | 'Programming Language :: Python :: 3', 59 | ], 60 | 61 | # What does your project relate to? 62 | keywords='ConText NLP', 63 | 64 | # You can just specify the packages manually here if your project is 65 | # simple. Or you can use find_packages(). 66 | packages=find_packages(exclude=['contrib', 67 | 'docs', 68 | 'pyConText', 69 | 'tests*']), 70 | 71 | # List run-time dependencies here. These will be installed by pip when your 72 | # project is installed. For an analysis of "install_requires" vs pip's 73 | # requirements files see: 74 | # https://packaging.python.org/en/latest/requirements.html 75 | install_requires=['networkx', 'pyyaml'], 76 | 77 | # List additional groups of dependencies here (e.g. development dependencies). 78 | # You can install these using the following syntax, for example: 79 | # $ pip install -e .[dev,test] 80 | extras_require = { 81 | 'dev': ['check-manifest'], 82 | 'test': ['coverage'], 83 | }, 84 | 85 | # If there are data files included in your packages that need to be 86 | # installed, specify them here. If using Python 2.6 or less, then these 87 | # have to be included in MANIFEST.in as well. 88 | package_data={ 89 | }, 90 | 91 | # Although 'package_data' is the preferred approach, in some case you may 92 | # need to place data files outside of your packages. 93 | # see http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files 94 | # In this case, 'data_file' will be installed into '/my_data' 95 | #data_files=[('my_data', ['data/data_file'])], 96 | 97 | # To provide executable scripts, use entry points in preference to the 98 | # "scripts" keyword. Entry points provide cross-platform support and allow 99 | # pip to create the appropriate form of executable for the target platform. 100 | #entry_points={ 101 | # 'console_scripts': [ 102 | # 'sample=sample:main', 103 | # ], 104 | #}, 105 | ) 106 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/tests/__init__.py -------------------------------------------------------------------------------- /tests/pyConTextNLP/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/tests/pyConTextNLP/__init__.py -------------------------------------------------------------------------------- /tests/pyConTextNLP/display/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/tests/pyConTextNLP/display/__init__.py -------------------------------------------------------------------------------- /tests/pyConTextNLP/tests2.py: -------------------------------------------------------------------------------- 1 | from pyConTextNLP import pyConTextGraph as pyConText 2 | import networkx as nx 3 | 4 | def test_ConTextMarkup(): 5 | assert isinstance(pyConText.ConTextMarkup(), nx.DiGraph) 6 | 7 | 8 | def markup_sentence(s, modifiers, targets, prune_inactive=True): 9 | """ 10 | """ 11 | markup = pyConText.ConTextMarkup() 12 | markup.setRawText(s) 13 | markup.cleanText() 14 | markup.markItems(modifiers, mode="modifier") 15 | markup.markItems(targets, mode="target") 16 | markup.pruneMarks() 17 | markup.dropMarks('Exclusion') 18 | # apply modifiers to any targets within the modifiers scope 19 | markup.applyModifiers() 20 | markup.pruneSelfModifyingRelationships() 21 | if prune_inactive: 22 | markup.dropInactiveModifiers() 23 | return markup 24 | --------------------------------------------------------------------------------