├── .gitignore
├── KB
    ├── classificationRules3.csv
    ├── critical_findings.tsv
    ├── critical_findings.yml
    ├── critical_modifiers.tsv
    ├── critical_modifiers.yml
    ├── criticalfinder_generalized_modifiers.tsv
    ├── criticalfinder_generalized_modifiers.yml
    ├── lexical_kb_04292013.tsv
    ├── lexical_kb_04292013.yml
    ├── lexical_kb_05042016.tsv
    ├── lexical_kb_05042016.yml
    ├── lexical_kb_nlm.tsv
    ├── lexical_kb_nlm.yml
    ├── pah_utah.tsv
    ├── pah_utah.txt
    ├── pah_utah.yml
    ├── pe_kb.tsv
    ├── pe_kb.yml
    ├── pneumonia_modifiers.yml
    ├── pneumonia_targets.yml
    ├── quality_artifacts.tsv
    ├── quality_artifacts.yml
    ├── schema2.csv
    ├── test.yml
    ├── utah_crit.tsv
    └── utah_crit.yml
├── README.rst
├── docs
    ├── Makefile
    └── source
    │   ├── bibliography.md
    │   ├── conf.py
    │   └── index.md
├── notebooks
    ├── BasicSentenceMarkup.ipynb
    ├── BasicSentenceMarkupPart2.ipynb
    ├── MultiSentenceDocuments.ipynb
    ├── README.md
    ├── cherrypy_pyConText.py
    ├── functional
    │   ├── 2
    │   │   └── Reading_ConTextItems.ipynb
    │   └── 3
    │   │   └── Reading_ConTextItems.ipynb
    ├── html
    │   ├── BasicSentenceMarkup.html
    │   └── BasicSentenceMarkupPart2.html
    └── pyConText_REST_demo.ipynb
├── pyConTextNLP
    ├── ConTextMarkup.py
    ├── __init__.py
    ├── display
    │   ├── __init__.py
    │   ├── _bokeh.py
    │   ├── _mpld3.py
    │   └── html.py
    ├── helpers.py
    ├── io
    │   ├── __init__.py
    │   └── xml.py
    ├── itemData.py
    ├── pyConText.py
    ├── tagObject.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_base.py
    │   ├── test_contextitem.py
    │   ├── test_contextmarkup.py
    │   ├── test_env.py
    │   ├── test_helpers.py
    │   └── test_itemData.py
    ├── utils.py
    └── version.py
├── requirements-py2.txt
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    └── pyConTextNLP
        ├── __init__.py
        ├── display
            └── __init__.py
        └── tests2.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | eggs/
15 | lib/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | 
24 | # Installer logs
25 | pip-log.txt
26 | pip-delete-this-directory.txt
27 | 
28 | # Unit test / coverage reports
29 | htmlcov/
30 | .tox/
31 | .coverage
32 | .cache
33 | nosetests.xml
34 | coverage.xml
35 | 
36 | # Translations
37 | *.mo
38 | *.pot
39 | 
40 | # Django stuff:
41 | *.log
42 | 
43 | # Sphinx documentation
44 | docs/_build/
45 | 
46 | # VIM
47 | *.swp
48 | 
49 | *.pdf
50 | *.png
51 | *.text
52 | *.html
53 | *.backup
54 | 
55 | *.ipynb_checkpoints
56 | 


--------------------------------------------------------------------------------
/KB/classificationRules3.csv:
--------------------------------------------------------------------------------
 1 | # Lines that start with the # symbol are comments and are ignored,,,,,,,,,,,,,
 2 | # processReport current has three types of rules: @CLASSIFICATION_RULE, @CATEGORY_RULE, and @SEVERITY_RULE,,,,,,,,,,,
 3 | # classification rules would be for things like disease_state, certainty_state, temporality state,,,,,,,,,,,
 4 | # For each classification_rule set," there is a rule label (e.g. ""DISEASE_STATE"". This must match",,,,,,,,,,,,
 5 | # the terms used in the schema file,,,,,,,,,,,,,
 6 | # Each rule set requires a DEFAULT which is the schema value to be returned if no rule conditions are satisifed,,,,,,,,,,,,,
 7 | # Each rule set has zero or more rules consisting of a schema value to be returned if the rule evaluates to true,,,,,,,,,,,,,
 8 | # A rule evalutes to true if the target is modified by one or more of the ConText CATEGORIES listed following,,,,,,,,,,,,,
 9 | @CLASSIFICATION_RULE,DISEASE_STATE,RULE,0,DEFINITE_NEGATED_EXISTENCE,PROBABLE_NEGATED_EXISTENCE,FUTURE,INDICATION,PSEUDONEG,,,,,
10 | @CLASSIFICATION_RULE,DISEASE_STATE,RULE,2,AMBIVALENT_EXISTENCE,,,,,,,,,
11 | @CLASSIFICATION_RULE,DISEASE_STATE,RULE,1,PROBABLE_EXISTENCE,DEFINITE_EXISTENCE,,,,,,,,
12 | @CLASSIFICATION_RULE,DISEASE_STATE,DEFAULT,1,,,,,,,,,,
13 | @CLASSIFICATION_RULE,CERTAINTY_STATE,RULE,0,PROBABLE_NEGATED_EXISTENCE,AMBIVALENT_EXISTENCE,PROBABLE_EXISTENCE,,,,,,,
14 | @CLASSIFICATION_RULE,CERTAINTY_STATE,DEFAULT,1,,,,,,,,,,
15 | @CLASSIFICATION_RULE,ACUTE_STATE,RULE,0,HISTORICAL,,,,,,,,,
16 | @CLASSIFICATION_RULE,ACUTE_STATE,DEFAULT,1,,,,,,,,,,
17 | #CATEGORY_RULE rules specify what Findings (e.g. DVT) can have the category modified by the following ANATOMIC modifies,,,,,,,,,,,,,
18 | @CATEGORY_RULE,DVT,LOWER_DEEP_VEIN,UPPER_DEEP_VEIN,HEPATIC_VEIN,PORTAL_SYSTEM_VEIN,PULMONARY_VEIN,RENAL_VEIN,SINUS_VEIN,LOWER_SUPERFICIAL_VEIN,UPPER_SUPERFICIAL_VEIN,VARICOCELE,ARTERIAL,NON_VASCULAR
19 | @CATEGORY_RULE,INFARCT,BRAIN_ANATOMY,HEART_ANATOMY,OTHER_CRITICAL_ANATOMY,,,,,,,,,
20 | @CATEGORY_RULE,ANEURYSM,AORTIC_ANATOMY,,,,,,,,,,,
21 | #SEVERITY_RUlE specifiy which targets to try to obtain severity measures for,,,,,,,,,,,,,
22 | @SEVERITY_RULE,AORTIC_ANATOMY_ANEURYSM,SEVERITY,,,,,,,,,,,
23 | 


--------------------------------------------------------------------------------
/KB/critical_findings.tsv:
--------------------------------------------------------------------------------
1 | Lex	Type	Regex	Directionembolism	PULMONARY_EMBOLISM	\b(emboli|embolism|embolus)\b	pe	PULMONARY_EMBOLISM	\bpe\b	pulmonary embolism	PULMONARY_EMBOLISM	pulmonary\s(artery )?(embol[a-z]+)	aneurysm (target)	ANEURYSM	aneurysm[a-z]*	dilation	ANEURYSM		aneurysmal dilatation	ANEURYSM	(aneurysmal )?dilatation	aortic dissection	AORTIC_DISSECTION	(aortic|aorta)\s(artery\s)? dissection	appendicitis	APPENDICITIS		bowel obstruction	BOWEL_OBSTRUCTION		midline shift	BRAIN_HERNIATION	((subfalcian|subfalcine|tonsillar)\sherniation)|((midline|mid-line)\sshift)	carotid dissection	CAROTID_DISSECTION	carotid?\s*?\w*\s*dissection	cerebral hemorrhage	CEREBRAL_HEMORRHAGE	(cereblal|intracranial|brain)\s(hemorrhage|hematoma|bleed)	cervical fracture	CERVICAL_FRACTURE	(cervical spine|c[1-7]|hangman|jefferson|dens|odontoid)\sfracture	cholecystitis	CHOLECYSTITIS		cord compression	CORD_COMPRESSION		depressed skull fracture	DEPRESSED_SKULL_FRACTURE		diverticulitis	DIVERTICULITIS		DVT	DVT	((non.?)?occlusive)?\s?(thromb(us|i|osis|osed)\b|DVT|clot\b)	2/28/13ectasia	ECTASIA	(ectasia| ectatic)	ectopic pregnancy	ECTOPIC_PREGNANCY		epiglottitis	EPIGLOTTITIS		fetal demise	FETAL_DEMISE		free air	FREE_AIR	(pneumoperitoneum|((intraperitoneal|free)\s(gas|air)))	infarct	INFARCT	\b(stroke|infarct|infarction)\b	ischemic bowel	ISCHEMIC_BOWEL		lacunar infarct	LACUNAR_INFARCT		mediastinal emphysema	MEDIASTINAL_EMPHYSEMA		omental infarct	OMENTAL_INFARCT		bone infarct	OSTEONECROSIS	(bone infarct|osteonecrosis)	pneumonia	PNEUMONIA	pneumoni*	aspiration	PNEUMONIA	aspirat*	consolidation	PNEUMONIA	consolidat*	pneumothorax	PNEUMOTHORAX		portal venous air	PORTAL_VENOUS_AIR	portal\b(venous\b)?(gas|air)	renal infarct	RENAL_INFARCT		retroperitoneal hemorrhage	RETROPERITONEAL_HEMORRHAGE	(retro|intra)?peritoneal\s(hemorrhage|hematoma|bleed)	retropharyngeal abscess	RETROPHARYNGEAL_ABSCESS		ruptured aneurysm	RUPTURED_ANEURYSM	(ruptured aneurysm|aortic rupture)	spinal cord compression	SPINAL_CORD_COMPRESSION		splenic infarct	SPLENIC_INFARCT		tension pneumothorax	TENSION_PNEUMOTHORAX		torsion	TORSION		volvulus	VOLVULUS		


--------------------------------------------------------------------------------
/KB/critical_findings.yml:
--------------------------------------------------------------------------------
  1 | Comments: ''
  2 | Direction: ''
  3 | Lex: embolism
  4 | Regex: \b(emboli|embolism|embolus)\b
  5 | Type: PULMONARY_EMBOLISM
  6 | ---
  7 | Comments: ''
  8 | Direction: ''
  9 | Lex: pe
 10 | Regex: \bpe\b
 11 | Type: PULMONARY_EMBOLISM
 12 | ---
 13 | Comments: ''
 14 | Direction: ''
 15 | Lex: pulmonary embolism
 16 | Regex: pulmonary\s(artery )?(embol[a-z]+)
 17 | Type: PULMONARY_EMBOLISM
 18 | ---
 19 | Comments: ''
 20 | Direction: ''
 21 | Lex: aneurysm (target)
 22 | Regex: aneurysm[a-z]*
 23 | Type: ANEURYSM
 24 | ---
 25 | Comments: ''
 26 | Direction: ''
 27 | Lex: dilation
 28 | Regex: ''
 29 | Type: ANEURYSM
 30 | ---
 31 | Comments: ''
 32 | Direction: ''
 33 | Lex: aneurysmal dilatation
 34 | Regex: (aneurysmal )?dilatation
 35 | Type: ANEURYSM
 36 | ---
 37 | Comments: ''
 38 | Direction: ''
 39 | Lex: aortic dissection
 40 | Regex: (aortic|aorta)\s(artery\s)? dissection
 41 | Type: AORTIC_DISSECTION
 42 | ---
 43 | Comments: ''
 44 | Direction: ''
 45 | Lex: appendicitis
 46 | Regex: ''
 47 | Type: APPENDICITIS
 48 | ---
 49 | Comments: ''
 50 | Direction: ''
 51 | Lex: bowel obstruction
 52 | Regex: ''
 53 | Type: BOWEL_OBSTRUCTION
 54 | ---
 55 | Comments: ''
 56 | Direction: ''
 57 | Lex: midline shift
 58 | Regex: ((subfalcian|subfalcine|tonsillar)\sherniation)|((midline|mid-line)\sshift)
 59 | Type: BRAIN_HERNIATION
 60 | ---
 61 | Comments: ''
 62 | Direction: ''
 63 | Lex: carotid dissection
 64 | Regex: carotid?\s*?\w*\s*dissection
 65 | Type: CAROTID_DISSECTION
 66 | ---
 67 | Comments: ''
 68 | Direction: ''
 69 | Lex: cerebral hemorrhage
 70 | Regex: (cereblal|intracranial|brain)\s(hemorrhage|hematoma|bleed)
 71 | Type: CEREBRAL_HEMORRHAGE
 72 | ---
 73 | Comments: ''
 74 | Direction: ''
 75 | Lex: cervical fracture
 76 | Regex: (cervical spine|c[1-7]|hangman|jefferson|dens|odontoid)\sfracture
 77 | Type: CERVICAL_FRACTURE
 78 | ---
 79 | Comments: ''
 80 | Direction: ''
 81 | Lex: cholecystitis
 82 | Regex: ''
 83 | Type: CHOLECYSTITIS
 84 | ---
 85 | Comments: ''
 86 | Direction: ''
 87 | Lex: cord compression
 88 | Regex: ''
 89 | Type: CORD_COMPRESSION
 90 | ---
 91 | Comments: ''
 92 | Direction: ''
 93 | Lex: depressed skull fracture
 94 | Regex: ''
 95 | Type: DEPRESSED_SKULL_FRACTURE
 96 | ---
 97 | Comments: ''
 98 | Direction: ''
 99 | Lex: diverticulitis
100 | Regex: ''
101 | Type: DIVERTICULITIS
102 | ---
103 | Comments: ''
104 | Direction: 2/28/13
105 | Lex: DVT
106 | Regex: ((non.?)?occlusive)?\s?(thromb(us|i|osis|osed)\b|DVT|clot\b)
107 | Type: DVT
108 | ---
109 | Comments: ''
110 | Direction: ''
111 | Lex: ectasia
112 | Regex: (ectasia| ectatic)
113 | Type: ECTASIA
114 | ---
115 | Comments: ''
116 | Direction: ''
117 | Lex: ectopic pregnancy
118 | Regex: ''
119 | Type: ECTOPIC_PREGNANCY
120 | ---
121 | Comments: ''
122 | Direction: ''
123 | Lex: epiglottitis
124 | Regex: ''
125 | Type: EPIGLOTTITIS
126 | ---
127 | Comments: ''
128 | Direction: ''
129 | Lex: fetal demise
130 | Regex: ''
131 | Type: FETAL_DEMISE
132 | ---
133 | Comments: ''
134 | Direction: ''
135 | Lex: free air
136 | Regex: (pneumoperitoneum|((intraperitoneal|free)\s(gas|air)))
137 | Type: FREE_AIR
138 | ---
139 | Comments: ''
140 | Direction: ''
141 | Lex: infarct
142 | Regex: \b(stroke|infarct|infarction)\b
143 | Type: INFARCT
144 | ---
145 | Comments: ''
146 | Direction: ''
147 | Lex: ischemic bowel
148 | Regex: ''
149 | Type: ISCHEMIC_BOWEL
150 | ---
151 | Comments: ''
152 | Direction: ''
153 | Lex: lacunar infarct
154 | Regex: ''
155 | Type: LACUNAR_INFARCT
156 | ---
157 | Comments: ''
158 | Direction: ''
159 | Lex: mediastinal emphysema
160 | Regex: ''
161 | Type: MEDIASTINAL_EMPHYSEMA
162 | ---
163 | Comments: ''
164 | Direction: ''
165 | Lex: omental infarct
166 | Regex: ''
167 | Type: OMENTAL_INFARCT
168 | ---
169 | Comments: ''
170 | Direction: ''
171 | Lex: bone infarct
172 | Regex: (bone infarct|osteonecrosis)
173 | Type: OSTEONECROSIS
174 | ---
175 | Comments: ''
176 | Direction: ''
177 | Lex: pneumonia
178 | Regex: pneumoni*
179 | Type: PNEUMONIA
180 | ---
181 | Comments: ''
182 | Direction: ''
183 | Lex: aspiration
184 | Regex: aspirat*
185 | Type: PNEUMONIA
186 | ---
187 | Comments: ''
188 | Direction: ''
189 | Lex: consolidation
190 | Regex: consolidat*
191 | Type: PNEUMONIA
192 | ---
193 | Comments: ''
194 | Direction: ''
195 | Lex: pneumothorax
196 | Regex: ''
197 | Type: PNEUMOTHORAX
198 | ---
199 | Comments: ''
200 | Direction: ''
201 | Lex: portal venous air
202 | Regex: portal\b(venous\b)?(gas|air)
203 | Type: PORTAL_VENOUS_AIR
204 | ---
205 | Comments: ''
206 | Direction: ''
207 | Lex: renal infarct
208 | Regex: ''
209 | Type: RENAL_INFARCT
210 | ---
211 | Comments: ''
212 | Direction: ''
213 | Lex: retroperitoneal hemorrhage
214 | Regex: (retro|intra)?peritoneal\s(hemorrhage|hematoma|bleed)
215 | Type: RETROPERITONEAL_HEMORRHAGE
216 | ---
217 | Comments: ''
218 | Direction: ''
219 | Lex: retropharyngeal abscess
220 | Regex: ''
221 | Type: RETROPHARYNGEAL_ABSCESS
222 | ---
223 | Comments: ''
224 | Direction: ''
225 | Lex: ruptured aneurysm
226 | Regex: (ruptured aneurysm|aortic rupture)
227 | Type: RUPTURED_ANEURYSM
228 | ---
229 | Comments: ''
230 | Direction: ''
231 | Lex: spinal cord compression
232 | Regex: ''
233 | Type: SPINAL_CORD_COMPRESSION
234 | ---
235 | Comments: ''
236 | Direction: ''
237 | Lex: splenic infarct
238 | Regex: ''
239 | Type: SPLENIC_INFARCT
240 | ---
241 | Comments: ''
242 | Direction: ''
243 | Lex: tension pneumothorax
244 | Regex: ''
245 | Type: TENSION_PNEUMOTHORAX
246 | ---
247 | Comments: ''
248 | Direction: ''
249 | Lex: torsion
250 | Regex: ''
251 | Type: TORSION
252 | ---
253 | Comments: ''
254 | Direction: ''
255 | Lex: volvulus
256 | Regex: ''
257 | Type: VOLVULUS
258 | 


--------------------------------------------------------------------------------
/KB/critical_modifiers.tsv:
--------------------------------------------------------------------------------
 1 | Lex	Type	Regex	Direction	
 2 | bolus timing	QUALITY_FEATURE	\bbolus[ -]{0,1}timing	bidirectional	 # fixes pedoc #129 dq
 3 | limited exam	QUALITY_FEATURE	(suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing)	bidirectional	
 4 | nondiagnostic exam	QUALITY_FEATURE	nondiagnostic (exam[a-z]*|study|scan|evaluation)	bidirectional	 #fix for pedoc #231
 5 | artifact	ARTIFACT	artifact(ual)?		
 6 | bulk motion	ARTIFACT			
 7 | motion	ARTIFACT			
 8 | patient motion	ARTIFACT			
 9 | respiratory motion	ARTIFACT			
10 | declot	EXCLUSION			
11 | detorsion	EXCLUSION			
12 | embolization	EXCLUSION			
13 | epiploic appendicitis	EXCLUSION			
14 | in the setting of	EXCLUSION			
15 | pe examination	EXCLUSION	(pulmonary )(artery )?(embol[a-z]+)(exam[a-z]*|study|protocol)		
16 | septic embolism	EXCLUSION	septic\s(emboli|embolus|embolism)		
17 | thrombectomy	EXCLUSION			
18 | thrombin	EXCLUSION			
19 | upstroke	EXCLUSION			
20 | 


--------------------------------------------------------------------------------
/KB/critical_modifiers.yml:
--------------------------------------------------------------------------------
  1 | Comments: ' # fixes pedoc #129 dq'
  2 | Direction: bidirectional
  3 | Lex: bolus timing
  4 | Regex: \bbolus[ -]{0,1}timing
  5 | Type: QUALITY_FEATURE
  6 | ---
  7 | Comments: ''
  8 | Direction: bidirectional
  9 | Lex: limited exam
 10 | Regex: (suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing)
 11 | Type: QUALITY_FEATURE
 12 | ---
 13 | Comments: ' #fix for pedoc #231'
 14 | Direction: bidirectional
 15 | Lex: nondiagnostic exam
 16 | Regex: nondiagnostic (exam[a-z]*|study|scan|evaluation)
 17 | Type: QUALITY_FEATURE
 18 | ---
 19 | Comments: ''
 20 | Direction: ''
 21 | Lex: artifact
 22 | Regex: artifact(ual)?
 23 | Type: ARTIFACT
 24 | ---
 25 | Comments: ''
 26 | Direction: ''
 27 | Lex: bulk motion
 28 | Regex: ''
 29 | Type: ARTIFACT
 30 | ---
 31 | Comments: ''
 32 | Direction: ''
 33 | Lex: motion
 34 | Regex: ''
 35 | Type: ARTIFACT
 36 | ---
 37 | Comments: ''
 38 | Direction: ''
 39 | Lex: patient motion
 40 | Regex: ''
 41 | Type: ARTIFACT
 42 | ---
 43 | Comments: ''
 44 | Direction: ''
 45 | Lex: respiratory motion
 46 | Regex: ''
 47 | Type: ARTIFACT
 48 | ---
 49 | Comments: ''
 50 | Direction: ''
 51 | Lex: declot
 52 | Regex: ''
 53 | Type: EXCLUSION
 54 | ---
 55 | Comments: ''
 56 | Direction: ''
 57 | Lex: detorsion
 58 | Regex: ''
 59 | Type: EXCLUSION
 60 | ---
 61 | Comments: ''
 62 | Direction: ''
 63 | Lex: embolization
 64 | Regex: ''
 65 | Type: EXCLUSION
 66 | ---
 67 | Comments: ''
 68 | Direction: ''
 69 | Lex: epiploic appendicitis
 70 | Regex: ''
 71 | Type: EXCLUSION
 72 | ---
 73 | Comments: ''
 74 | Direction: ''
 75 | Lex: in the setting of
 76 | Regex: ''
 77 | Type: EXCLUSION
 78 | ---
 79 | Comments: ''
 80 | Direction: ''
 81 | Lex: pe examination
 82 | Regex: (pulmonary )(artery )?(embol[a-z]+)(exam[a-z]*|study|protocol)
 83 | Type: EXCLUSION
 84 | ---
 85 | Comments: ''
 86 | Direction: ''
 87 | Lex: septic embolism
 88 | Regex: septic\s(emboli|embolus|embolism)
 89 | Type: EXCLUSION
 90 | ---
 91 | Comments: ''
 92 | Direction: ''
 93 | Lex: thrombectomy
 94 | Regex: ''
 95 | Type: EXCLUSION
 96 | ---
 97 | Comments: ''
 98 | Direction: ''
 99 | Lex: thrombin
100 | Regex: ''
101 | Type: EXCLUSION
102 | ---
103 | Comments: ''
104 | Direction: ''
105 | Lex: upstroke
106 | Regex: ''
107 | Type: EXCLUSION
108 | 


--------------------------------------------------------------------------------
/KB/criticalfinder_generalized_modifiers.tsv:
--------------------------------------------------------------------------------
  1 | Lex	Type	Regex	Direction
  2 | abdominal aorta	AORTIC_ANATOMY	abdominal aort(a|ic)	bidirectional
  3 | aorta	AORTIC_ANATOMY	aort(a|ic)	bidirectional
  4 | ascending aorta	AORTIC_ANATOMY	ascending aort(a|ic)	bidirectional
  5 | thoracic aorta	AORTIC_ANATOMY	thoracic aort(a|ic)	bidirectional
  6 | A1	BRAIN_ANATOMY		bidirectional
  7 | intracranial	BRAIN_ANATOMY		bidirectional
  8 | cerebral	BRAIN_ANATOMY		bidirectional
  9 | A2	BRAIN_ANATOMY		bidirectional
 10 | ACA	BRAIN_ANATOMY		bidirectional
 11 | anterior limb	BRAIN_ANATOMY	(anterior limb|crus anterius capsulae internae|capsula interna|pars anterior|crus anterior|capsulae internae)	bidirectional
 12 | posterior limb	BRAIN_ANATOMY	(posterior limb|crus posterius capsulae internae|capsula interna|pars posterior|crus posterior|capsulae internae)	bidirectional
 13 | genu limb	BRAIN_ANATOMY	(genu limb|internal capsule genu|genu capsulae internae)	bidirectional
 14 | brain	BRAIN_ANATOMY		bidirectional
 15 | caudate	BRAIN_ANATOMY	(caudate|caudate nucleus|nucleus caudatus)	bidirectional
 16 | cerebellar	BRAIN_ANATOMY		bidirectional
 17 | cerebellum	BRAIN_ANATOMY	(cerebellum|epencephalon-1|kleinhirn)	bidirectional
 18 | corona radiata	BRAIN_ANATOMY		bidirectional
 19 | cerebral cortex	BRAIN_ANATOMY	(cerebral cortex|cortex of cerebrum|cortex cerebri|pallium|cortex cerebralis)	bidirectional
 20 | encephalo	BRAIN_ANATOMY		bidirectional
 21 | brain lobe	BRAIN_ANATOMY	(frontal|parietal|occipital|temporal|limbic)( lobe(s))?	bidirectional
 22 | ganglia	BRAIN_ANATOMY		bidirectional
 23 | gray matter	BRAIN_ANATOMY	(gray|grey) matter	bidirectional
 24 | white matter	BRAIN_ANATOMY	white matter	bidirectional
 25 | gyrus	BRAIN_ANATOMY		bidirectional
 26 | hemisphere	BRAIN_ANATOMY		bidirectional
 27 | insular	BRAIN_ANATOMY		bidirectional
 28 | internal capsule	BRAIN_ANATOMY	(internal capsule|internal capsule radiations|capsula interna)	bidirectional
 29 | lentiform	BRAIN_ANATOMY		bidirectional
 30 | M1	BRAIN_ANATOMY		bidirectional
 31 | M2	BRAIN_ANATOMY		bidirectional
 32 | MCA	BRAIN_ANATOMY		bidirectional
 33 | nuclei	BRAIN_ANATOMY	nucle(i|us)	bidirectional
 34 | P1	BRAIN_ANATOMY		bidirectional
 35 | P2	BRAIN_ANATOMY		bidirectional
 36 | PCA	BRAIN_ANATOMY		bidirectional
 37 | pons	BRAIN_ANATOMY		bidirectional
 38 | putamen	BRAIN_ANATOMY		bidirectional
 39 | semiovale	BRAIN_ANATOMY	(semiovale center|semiovale|semi-ovale|medullary center|white matter of cerebrum|centrum semiovale|substantia centralis medullaris cerebri|corpus medullare cerebri|centrum ovale)	bidirectional
 40 | sulcus	BRAIN_ANATOMY		bidirectional
 41 | territorial	BRAIN_ANATOMY		bidirectional
 42 | territory	BRAIN_ANATOMY		bidirectional
 43 | measuring X.X cm	SEVERITY	(measuring|diameter of|measured) (?P<value>[0-9]+(\.[0-9]*)?)( )?(?P<unit>(cm|mm))	backward
 44 | measuring XxY cm	SEVERITY	(measuring|diameter of|measured) (?P<value>[0-9]+(\.[0-9]*)?( )?(x|by)( )?[0-9]+(\.[0-9]*)?)( )?(?P<unit>(cm|mm))	backward
 45 | X.Y cm	SEVERITY	(?P<value>[0-9]+(\.[0-9]*)?)( )?(?P<unit>(cm|mm))	forward
 46 | XxY cm	SEVERITY	(?P<value>[0-9]+(\.[0-9]*)?( )?(x|by)( )?[0-9]+(\.[0-9]*)?)( )?(?P<unit>(cm|mm))	forward
 47 | anterior	HEART_ANATOMY		bidirectional
 48 | septal	HEART_ANATOMY		bidirectional
 49 | septum	HEART_ANATOMY	(cardiac septum|septum)	bidirectional
 50 | inferior	HEART_ANATOMY		bidirectional
 51 | heart apex	HEART_ANATOMY	(cardiac apex|apex of (the )?heart)	bidirectional
 52 | inferolateral	HEART_ANATOMY		bidirectional
 53 | lateral 	HEART_ANATOMY		bidirectional
 54 | anteroseptal 	HEART_ANATOMY		bidirectional
 55 | transmural	HEART_ANATOMY		bidirectional
 56 | wall	HEART_ANATOMY		bidirectional
 57 | left ventricular wall	HEART_ANATOMY	(\blv\b|left ventricular)( wall)?	bidirectional
 58 | right ventricular wall	HEART_ANATOMY	(\brv\b|right ventricular)( wall)?	bidirectional
 59 | myocardial	HEART_ANATOMY	(myocardial|myocardium)	bidirectional
 60 | cardiac	HEART_ANATOMY	(cardiac|heart)	forward
 61 | omental	OTHER_CRITICAL_ANATOMY	(omentum|omental)	bidirectional
 62 | spleen	OTHER_CRITICAL_ANATOMY	(spleen|splenic)	bidirectional
 63 | kidney	OTHER_CRITICAL_ANATOMY	(kidney|\brenal\b)	bidirectional
 64 | testis	OTHER_CRITICAL_ANATOMY	(testis|testicular|testes)	bidirectional
 65 | ovary	OTHER_CRITICAL_ANATOMY	(ovary|ovaries|ovarian)	bidirectional
 66 | azygos vein	AZYGOS_VEIN		bidirectional
 67 | azygos arch	AZYGOS_VEIN		bidirectional
 68 | inferior vena cava	LOWER_DEEP_VEIN		bidirectional
 69 | common iliac vein	LOWER_DEEP_VEIN		bidirectional
 70 | internal iliac vein	LOWER_DEEP_VEIN		bidirectional
 71 | external iliac vein	LOWER_DEEP_VEIN		bidirectional
 72 | common femoral vein	LOWER_DEEP_VEIN		bidirectional
 73 | femoral vein	LOWER_DEEP_VEIN		bidirectional
 74 | popliteal vein	LOWER_DEEP_VEIN		bidirectional
 75 | anterior tibial vein	LOWER_DEEP_VEIN		bidirectional
 76 | peroneal vein	LOWER_DEEP_VEIN		bidirectional
 77 | posterior tibial vein	LOWER_DEEP_VEIN		bidirectional
 78 | superior vena cava	UPPER_DEEP_VEIN		bidirectional
 79 | brachiocephalic vein	UPPER_DEEP_VEIN		bidirectional
 80 | subclavian vein	UPPER_DEEP_VEIN		bidirectional
 81 | axillary vein	UPPER_DEEP_VEIN		bidirectional
 82 | brachial vein	UPPER_DEEP_VEIN		bidirectional
 83 | external jugular vein	UPPER_DEEP_VEIN		bidirectional
 84 | anterior jugular vein	UPPER_DEEP_VEIN		bidirectional
 85 | jugular venous arch	UPPER_DEEP_VEIN		bidirectional
 86 | internal jugular vein	UPPER_DEEP_VEIN		bidirectional
 87 | hepatic vein	HEPATIC_VEIN		bidirectional
 88 | portal vein	PORTAL_SYSTEM_VEIN		bidirectional
 89 | splenic vein	PORTAL_SYSTEM_VEIN		bidirectional
 90 | inferior mesenteric vein	PORTAL_SYSTEM_VEIN		bidirectional
 91 | superior mesenteric vein	PORTAL_SYSTEM_VEIN		bidirectional
 92 | pulmonary vein	PULMONARY_VEIN		bidirectional
 93 | renal vein	RENAL_VEIN		bidirectional
 94 | sigmoid sinus	SINUS_VEIN		bidirectional
 95 | inferior petrosal sinus	SINUS_VEIN		bidirectional
 96 | cavernous sinus	SINUS_VEIN		bidirectional
 97 | greater saphenous vein	LOWER_SUPERFICIAL_VEIN		bidirectional
 98 | lesser saphenous vein	LOWER_SUPERFICIAL_VEIN		bidirectional
 99 | basilic vein	UPPER_SUPERFICIAL_VEIN		bidirectional
100 | cephalic vein	UPPER_SUPERFICIAL_VEIN		bidirectional
101 | varicocele	VARICOCELE		bidirectional
102 | varicous vein	UPPER_SUPERFICIAL_VEIN		bidirectional
103 | 


--------------------------------------------------------------------------------
/KB/criticalfinder_generalized_modifiers.yml:
--------------------------------------------------------------------------------
  1 | Comments: ''
  2 | Direction: bidirectional
  3 | Lex: abdominal aorta
  4 | Regex: abdominal aort(a|ic)
  5 | Type: AORTIC_ANATOMY
  6 | ---
  7 | Comments: ''
  8 | Direction: bidirectional
  9 | Lex: aorta
 10 | Regex: aort(a|ic)
 11 | Type: AORTIC_ANATOMY
 12 | ---
 13 | Comments: ''
 14 | Direction: bidirectional
 15 | Lex: ascending aorta
 16 | Regex: ascending aort(a|ic)
 17 | Type: AORTIC_ANATOMY
 18 | ---
 19 | Comments: ''
 20 | Direction: bidirectional
 21 | Lex: thoracic aorta
 22 | Regex: thoracic aort(a|ic)
 23 | Type: AORTIC_ANATOMY
 24 | ---
 25 | Comments: ''
 26 | Direction: bidirectional
 27 | Lex: A1
 28 | Regex: ''
 29 | Type: BRAIN_ANATOMY
 30 | ---
 31 | Comments: ''
 32 | Direction: bidirectional
 33 | Lex: intracranial
 34 | Regex: ''
 35 | Type: BRAIN_ANATOMY
 36 | ---
 37 | Comments: ''
 38 | Direction: bidirectional
 39 | Lex: cerebral
 40 | Regex: ''
 41 | Type: BRAIN_ANATOMY
 42 | ---
 43 | Comments: ''
 44 | Direction: bidirectional
 45 | Lex: A2
 46 | Regex: ''
 47 | Type: BRAIN_ANATOMY
 48 | ---
 49 | Comments: ''
 50 | Direction: bidirectional
 51 | Lex: ACA
 52 | Regex: ''
 53 | Type: BRAIN_ANATOMY
 54 | ---
 55 | Comments: ''
 56 | Direction: bidirectional
 57 | Lex: anterior limb
 58 | Regex: (anterior limb|crus anterius capsulae internae|capsula interna|pars anterior|crus
 59 |   anterior|capsulae internae)
 60 | Type: BRAIN_ANATOMY
 61 | ---
 62 | Comments: ''
 63 | Direction: bidirectional
 64 | Lex: posterior limb
 65 | Regex: (posterior limb|crus posterius capsulae internae|capsula interna|pars posterior|crus
 66 |   posterior|capsulae internae)
 67 | Type: BRAIN_ANATOMY
 68 | ---
 69 | Comments: ''
 70 | Direction: bidirectional
 71 | Lex: genu limb
 72 | Regex: (genu limb|internal capsule genu|genu capsulae internae)
 73 | Type: BRAIN_ANATOMY
 74 | ---
 75 | Comments: ''
 76 | Direction: bidirectional
 77 | Lex: brain
 78 | Regex: ''
 79 | Type: BRAIN_ANATOMY
 80 | ---
 81 | Comments: ''
 82 | Direction: bidirectional
 83 | Lex: caudate
 84 | Regex: (caudate|caudate nucleus|nucleus caudatus)
 85 | Type: BRAIN_ANATOMY
 86 | ---
 87 | Comments: ''
 88 | Direction: bidirectional
 89 | Lex: cerebellar
 90 | Regex: ''
 91 | Type: BRAIN_ANATOMY
 92 | ---
 93 | Comments: ''
 94 | Direction: bidirectional
 95 | Lex: cerebellum
 96 | Regex: (cerebellum|epencephalon-1|kleinhirn)
 97 | Type: BRAIN_ANATOMY
 98 | ---
 99 | Comments: ''
100 | Direction: bidirectional
101 | Lex: corona radiata
102 | Regex: ''
103 | Type: BRAIN_ANATOMY
104 | ---
105 | Comments: ''
106 | Direction: bidirectional
107 | Lex: cerebral cortex
108 | Regex: (cerebral cortex|cortex of cerebrum|cortex cerebri|pallium|cortex cerebralis)
109 | Type: BRAIN_ANATOMY
110 | ---
111 | Comments: ''
112 | Direction: bidirectional
113 | Lex: encephalo
114 | Regex: ''
115 | Type: BRAIN_ANATOMY
116 | ---
117 | Comments: ''
118 | Direction: bidirectional
119 | Lex: brain lobe
120 | Regex: (frontal|parietal|occipital|temporal|limbic)( lobe(s))?
121 | Type: BRAIN_ANATOMY
122 | ---
123 | Comments: ''
124 | Direction: bidirectional
125 | Lex: ganglia
126 | Regex: ''
127 | Type: BRAIN_ANATOMY
128 | ---
129 | Comments: ''
130 | Direction: bidirectional
131 | Lex: gray matter
132 | Regex: (gray|grey) matter
133 | Type: BRAIN_ANATOMY
134 | ---
135 | Comments: ''
136 | Direction: bidirectional
137 | Lex: white matter
138 | Regex: white matter
139 | Type: BRAIN_ANATOMY
140 | ---
141 | Comments: ''
142 | Direction: bidirectional
143 | Lex: gyrus
144 | Regex: ''
145 | Type: BRAIN_ANATOMY
146 | ---
147 | Comments: ''
148 | Direction: bidirectional
149 | Lex: hemisphere
150 | Regex: ''
151 | Type: BRAIN_ANATOMY
152 | ---
153 | Comments: ''
154 | Direction: bidirectional
155 | Lex: insular
156 | Regex: ''
157 | Type: BRAIN_ANATOMY
158 | ---
159 | Comments: ''
160 | Direction: bidirectional
161 | Lex: internal capsule
162 | Regex: (internal capsule|internal capsule radiations|capsula interna)
163 | Type: BRAIN_ANATOMY
164 | ---
165 | Comments: ''
166 | Direction: bidirectional
167 | Lex: lentiform
168 | Regex: ''
169 | Type: BRAIN_ANATOMY
170 | ---
171 | Comments: ''
172 | Direction: bidirectional
173 | Lex: M1
174 | Regex: ''
175 | Type: BRAIN_ANATOMY
176 | ---
177 | Comments: ''
178 | Direction: bidirectional
179 | Lex: M2
180 | Regex: ''
181 | Type: BRAIN_ANATOMY
182 | ---
183 | Comments: ''
184 | Direction: bidirectional
185 | Lex: MCA
186 | Regex: ''
187 | Type: BRAIN_ANATOMY
188 | ---
189 | Comments: ''
190 | Direction: bidirectional
191 | Lex: nuclei
192 | Regex: nucle(i|us)
193 | Type: BRAIN_ANATOMY
194 | ---
195 | Comments: ''
196 | Direction: bidirectional
197 | Lex: P1
198 | Regex: ''
199 | Type: BRAIN_ANATOMY
200 | ---
201 | Comments: ''
202 | Direction: bidirectional
203 | Lex: P2
204 | Regex: ''
205 | Type: BRAIN_ANATOMY
206 | ---
207 | Comments: ''
208 | Direction: bidirectional
209 | Lex: PCA
210 | Regex: ''
211 | Type: BRAIN_ANATOMY
212 | ---
213 | Comments: ''
214 | Direction: bidirectional
215 | Lex: pons
216 | Regex: ''
217 | Type: BRAIN_ANATOMY
218 | ---
219 | Comments: ''
220 | Direction: bidirectional
221 | Lex: putamen
222 | Regex: ''
223 | Type: BRAIN_ANATOMY
224 | ---
225 | Comments: ''
226 | Direction: bidirectional
227 | Lex: semiovale
228 | Regex: (semiovale center|semiovale|semi-ovale|medullary center|white matter of cerebrum|centrum
229 |   semiovale|substantia centralis medullaris cerebri|corpus medullare cerebri|centrum
230 |   ovale)
231 | Type: BRAIN_ANATOMY
232 | ---
233 | Comments: ''
234 | Direction: bidirectional
235 | Lex: sulcus
236 | Regex: ''
237 | Type: BRAIN_ANATOMY
238 | ---
239 | Comments: ''
240 | Direction: bidirectional
241 | Lex: territorial
242 | Regex: ''
243 | Type: BRAIN_ANATOMY
244 | ---
245 | Comments: ''
246 | Direction: bidirectional
247 | Lex: territory
248 | Regex: ''
249 | Type: BRAIN_ANATOMY
250 | ---
251 | Comments: ''
252 | Direction: backward
253 | Lex: measuring X.X cm
254 | Regex: (measuring|diameter of|measured) (?P<value>[0-9]+(\.[0-9]*)?)( )?(?P<unit>(cm|mm))
255 | Type: SEVERITY
256 | ---
257 | Comments: ''
258 | Direction: backward
259 | Lex: measuring XxY cm
260 | Regex: (measuring|diameter of|measured) (?P<value>[0-9]+(\.[0-9]*)?( )?(x|by)( )?[0-9]+(\.[0-9]*)?)(
261 |   )?(?P<unit>(cm|mm))
262 | Type: SEVERITY
263 | ---
264 | Comments: ''
265 | Direction: forward
266 | Lex: X.Y cm
267 | Regex: (?P<value>[0-9]+(\.[0-9]*)?)( )?(?P<unit>(cm|mm))
268 | Type: SEVERITY
269 | ---
270 | Comments: ''
271 | Direction: forward
272 | Lex: XxY cm
273 | Regex: (?P<value>[0-9]+(\.[0-9]*)?( )?(x|by)( )?[0-9]+(\.[0-9]*)?)( )?(?P<unit>(cm|mm))
274 | Type: SEVERITY
275 | ---
276 | Comments: ''
277 | Direction: bidirectional
278 | Lex: anterior
279 | Regex: ''
280 | Type: HEART_ANATOMY
281 | ---
282 | Comments: ''
283 | Direction: bidirectional
284 | Lex: septal
285 | Regex: ''
286 | Type: HEART_ANATOMY
287 | ---
288 | Comments: ''
289 | Direction: bidirectional
290 | Lex: septum
291 | Regex: (cardiac septum|septum)
292 | Type: HEART_ANATOMY
293 | ---
294 | Comments: ''
295 | Direction: bidirectional
296 | Lex: inferior
297 | Regex: ''
298 | Type: HEART_ANATOMY
299 | ---
300 | Comments: ''
301 | Direction: bidirectional
302 | Lex: heart apex
303 | Regex: (cardiac apex|apex of (the )?heart)
304 | Type: HEART_ANATOMY
305 | ---
306 | Comments: ''
307 | Direction: bidirectional
308 | Lex: inferolateral
309 | Regex: ''
310 | Type: HEART_ANATOMY
311 | ---
312 | Comments: ''
313 | Direction: bidirectional
314 | Lex: 'lateral '
315 | Regex: ''
316 | Type: HEART_ANATOMY
317 | ---
318 | Comments: ''
319 | Direction: bidirectional
320 | Lex: 'anteroseptal '
321 | Regex: ''
322 | Type: HEART_ANATOMY
323 | ---
324 | Comments: ''
325 | Direction: bidirectional
326 | Lex: transmural
327 | Regex: ''
328 | Type: HEART_ANATOMY
329 | ---
330 | Comments: ''
331 | Direction: bidirectional
332 | Lex: wall
333 | Regex: ''
334 | Type: HEART_ANATOMY
335 | ---
336 | Comments: ''
337 | Direction: bidirectional
338 | Lex: left ventricular wall
339 | Regex: (\blv\b|left ventricular)( wall)?
340 | Type: HEART_ANATOMY
341 | ---
342 | Comments: ''
343 | Direction: bidirectional
344 | Lex: right ventricular wall
345 | Regex: (\brv\b|right ventricular)( wall)?
346 | Type: HEART_ANATOMY
347 | ---
348 | Comments: ''
349 | Direction: bidirectional
350 | Lex: myocardial
351 | Regex: (myocardial|myocardium)
352 | Type: HEART_ANATOMY
353 | ---
354 | Comments: ''
355 | Direction: forward
356 | Lex: cardiac
357 | Regex: (cardiac|heart)
358 | Type: HEART_ANATOMY
359 | ---
360 | Comments: ''
361 | Direction: bidirectional
362 | Lex: omental
363 | Regex: (omentum|omental)
364 | Type: OTHER_CRITICAL_ANATOMY
365 | ---
366 | Comments: ''
367 | Direction: bidirectional
368 | Lex: spleen
369 | Regex: (spleen|splenic)
370 | Type: OTHER_CRITICAL_ANATOMY
371 | ---
372 | Comments: ''
373 | Direction: bidirectional
374 | Lex: kidney
375 | Regex: (kidney|\brenal\b)
376 | Type: OTHER_CRITICAL_ANATOMY
377 | ---
378 | Comments: ''
379 | Direction: bidirectional
380 | Lex: testis
381 | Regex: (testis|testicular|testes)
382 | Type: OTHER_CRITICAL_ANATOMY
383 | ---
384 | Comments: ''
385 | Direction: bidirectional
386 | Lex: ovary
387 | Regex: (ovary|ovaries|ovarian)
388 | Type: OTHER_CRITICAL_ANATOMY
389 | ---
390 | Comments: ''
391 | Direction: bidirectional
392 | Lex: azygos vein
393 | Regex: ''
394 | Type: AZYGOS_VEIN
395 | ---
396 | Comments: ''
397 | Direction: bidirectional
398 | Lex: azygos arch
399 | Regex: ''
400 | Type: AZYGOS_VEIN
401 | ---
402 | Comments: ''
403 | Direction: bidirectional
404 | Lex: inferior vena cava
405 | Regex: ''
406 | Type: LOWER_DEEP_VEIN
407 | ---
408 | Comments: ''
409 | Direction: bidirectional
410 | Lex: common iliac vein
411 | Regex: ''
412 | Type: LOWER_DEEP_VEIN
413 | ---
414 | Comments: ''
415 | Direction: bidirectional
416 | Lex: internal iliac vein
417 | Regex: ''
418 | Type: LOWER_DEEP_VEIN
419 | ---
420 | Comments: ''
421 | Direction: bidirectional
422 | Lex: external iliac vein
423 | Regex: ''
424 | Type: LOWER_DEEP_VEIN
425 | ---
426 | Comments: ''
427 | Direction: bidirectional
428 | Lex: common femoral vein
429 | Regex: ''
430 | Type: LOWER_DEEP_VEIN
431 | ---
432 | Comments: ''
433 | Direction: bidirectional
434 | Lex: femoral vein
435 | Regex: ''
436 | Type: LOWER_DEEP_VEIN
437 | ---
438 | Comments: ''
439 | Direction: bidirectional
440 | Lex: popliteal vein
441 | Regex: ''
442 | Type: LOWER_DEEP_VEIN
443 | ---
444 | Comments: ''
445 | Direction: bidirectional
446 | Lex: anterior tibial vein
447 | Regex: ''
448 | Type: LOWER_DEEP_VEIN
449 | ---
450 | Comments: ''
451 | Direction: bidirectional
452 | Lex: peroneal vein
453 | Regex: ''
454 | Type: LOWER_DEEP_VEIN
455 | ---
456 | Comments: ''
457 | Direction: bidirectional
458 | Lex: posterior tibial vein
459 | Regex: ''
460 | Type: LOWER_DEEP_VEIN
461 | ---
462 | Comments: ''
463 | Direction: bidirectional
464 | Lex: superior vena cava
465 | Regex: ''
466 | Type: UPPER_DEEP_VEIN
467 | ---
468 | Comments: ''
469 | Direction: bidirectional
470 | Lex: brachiocephalic vein
471 | Regex: ''
472 | Type: UPPER_DEEP_VEIN
473 | ---
474 | Comments: ''
475 | Direction: bidirectional
476 | Lex: subclavian vein
477 | Regex: ''
478 | Type: UPPER_DEEP_VEIN
479 | ---
480 | Comments: ''
481 | Direction: bidirectional
482 | Lex: axillary vein
483 | Regex: ''
484 | Type: UPPER_DEEP_VEIN
485 | ---
486 | Comments: ''
487 | Direction: bidirectional
488 | Lex: brachial vein
489 | Regex: ''
490 | Type: UPPER_DEEP_VEIN
491 | ---
492 | Comments: ''
493 | Direction: bidirectional
494 | Lex: external jugular vein
495 | Regex: ''
496 | Type: UPPER_DEEP_VEIN
497 | ---
498 | Comments: ''
499 | Direction: bidirectional
500 | Lex: anterior jugular vein
501 | Regex: ''
502 | Type: UPPER_DEEP_VEIN
503 | ---
504 | Comments: ''
505 | Direction: bidirectional
506 | Lex: jugular venous arch
507 | Regex: ''
508 | Type: UPPER_DEEP_VEIN
509 | ---
510 | Comments: ''
511 | Direction: bidirectional
512 | Lex: internal jugular vein
513 | Regex: ''
514 | Type: UPPER_DEEP_VEIN
515 | ---
516 | Comments: ''
517 | Direction: bidirectional
518 | Lex: hepatic vein
519 | Regex: ''
520 | Type: HEPATIC_VEIN
521 | ---
522 | Comments: ''
523 | Direction: bidirectional
524 | Lex: portal vein
525 | Regex: ''
526 | Type: PORTAL_SYSTEM_VEIN
527 | ---
528 | Comments: ''
529 | Direction: bidirectional
530 | Lex: splenic vein
531 | Regex: ''
532 | Type: PORTAL_SYSTEM_VEIN
533 | ---
534 | Comments: ''
535 | Direction: bidirectional
536 | Lex: inferior mesenteric vein
537 | Regex: ''
538 | Type: PORTAL_SYSTEM_VEIN
539 | ---
540 | Comments: ''
541 | Direction: bidirectional
542 | Lex: superior mesenteric vein
543 | Regex: ''
544 | Type: PORTAL_SYSTEM_VEIN
545 | ---
546 | Comments: ''
547 | Direction: bidirectional
548 | Lex: pulmonary vein
549 | Regex: ''
550 | Type: PULMONARY_VEIN
551 | ---
552 | Comments: ''
553 | Direction: bidirectional
554 | Lex: renal vein
555 | Regex: ''
556 | Type: RENAL_VEIN
557 | ---
558 | Comments: ''
559 | Direction: bidirectional
560 | Lex: sigmoid sinus
561 | Regex: ''
562 | Type: SINUS_VEIN
563 | ---
564 | Comments: ''
565 | Direction: bidirectional
566 | Lex: inferior petrosal sinus
567 | Regex: ''
568 | Type: SINUS_VEIN
569 | ---
570 | Comments: ''
571 | Direction: bidirectional
572 | Lex: cavernous sinus
573 | Regex: ''
574 | Type: SINUS_VEIN
575 | ---
576 | Comments: ''
577 | Direction: bidirectional
578 | Lex: greater saphenous vein
579 | Regex: ''
580 | Type: LOWER_SUPERFICIAL_VEIN
581 | ---
582 | Comments: ''
583 | Direction: bidirectional
584 | Lex: lesser saphenous vein
585 | Regex: ''
586 | Type: LOWER_SUPERFICIAL_VEIN
587 | ---
588 | Comments: ''
589 | Direction: bidirectional
590 | Lex: basilic vein
591 | Regex: ''
592 | Type: UPPER_SUPERFICIAL_VEIN
593 | ---
594 | Comments: ''
595 | Direction: bidirectional
596 | Lex: cephalic vein
597 | Regex: ''
598 | Type: UPPER_SUPERFICIAL_VEIN
599 | ---
600 | Comments: ''
601 | Direction: bidirectional
602 | Lex: varicocele
603 | Regex: ''
604 | Type: VARICOCELE
605 | ---
606 | Comments: ''
607 | Direction: bidirectional
608 | Lex: varicous vein
609 | Regex: ''
610 | Type: UPPER_SUPERFICIAL_VEIN
611 | 


--------------------------------------------------------------------------------
/KB/lexical_kb_nlm.tsv:
--------------------------------------------------------------------------------
 1 | Lex	Type	Regex	Direction	Unnamed: 4	Unnamed: 5	Unnamed: 6	Codes
 2 | although	CONJ		terminate
 3 | apart from	CONJ		terminate
 4 | as a cause for	CONJ		terminate
 5 | as a cause of	CONJ		terminate
 6 | as a etiology for	CONJ		terminate
 7 | as a etiology of	CONJ		terminate
 8 | as a reason for	CONJ		terminate
 9 | as a reason of	CONJ		terminate
10 | as a secondary cause for	CONJ		terminate
11 | as a secondary cause of	CONJ		terminate
12 | as a secondary etiology for	CONJ		terminate
13 | as a secondary etiology of	CONJ		terminate
14 | as a secondary origin for	CONJ		terminate
15 | as a secondary origin of	CONJ		terminate
16 | as a secondary reason for	CONJ		terminate
17 | as a secondary reason of	CONJ		terminate
18 | as a secondary source for	CONJ		terminate
19 | as a secondary source of	CONJ		terminate
20 | as a source for	CONJ		terminate
21 | as a source of	CONJ		terminate
22 | as an cause for	CONJ		terminate
23 | as an cause of	CONJ		terminate
24 | as an etiology for	CONJ		terminate
25 | as an etiology of	CONJ		terminate
26 | as an origin for	CONJ		terminate
27 | as an origin of	CONJ		terminate
28 | as an reason for	CONJ		terminate
29 | as an reason of	CONJ		terminate
30 | as an secondary cause for	CONJ		terminate
31 | as an secondary cause of	CONJ		terminate
32 | as an secondary etiology for	CONJ		terminate
33 | as an secondary etiology of	CONJ		terminate
34 | as an secondary origin for	CONJ		terminate
35 | as an secondary origin of	CONJ		terminate
36 | as an secondary reason for	CONJ		terminate
37 | as an secondary reason of	CONJ		terminate
38 | as an secondary source for	CONJ		terminate
39 | as an secondary source of	CONJ		terminate
40 | as an source for	CONJ		terminate
41 | as an source of	CONJ		terminate
42 | as the cause for	CONJ		terminate
43 | as the cause of	CONJ		terminate
44 | as the etiology for	CONJ		terminate
45 | as the etiology of	CONJ		terminate
46 | as the origin for	CONJ		terminate
47 | as the origin of	CONJ		terminate
48 | as the reason for	CONJ		terminate
49 | as the reason of	CONJ		terminate
50 | as the secondary cause for	CONJ		terminate
51 | as the secondary cause of	CONJ		terminate
52 | as the secondary etiology for	CONJ		terminate
53 | as the secondary etiology of	CONJ		terminate
54 | as the secondary origin for	CONJ		terminate
55 | as the secondary origin of	CONJ		terminate
56 | as the secondary reason for	CONJ		terminate
57 | as the secondary reason of	CONJ		terminate
58 | as the secondary source for	CONJ		terminate
59 | as the secondary source of	CONJ		terminate
60 | as the source for	CONJ		terminate
61 | as the source of	CONJ		terminate
62 | as there are 	CONJ		terminate	2/14/2013
63 | aside from	CONJ		terminate
64 | but	CONJ		terminate
65 | cause for	CONJ		terminate
66 | cause of	CONJ		terminate
67 | causes for	CONJ		terminate
68 | causes of	CONJ		terminate
69 | etiology for	CONJ		terminate
70 | etiology of	CONJ		terminate
71 | except	CONJ		terminate
72 | however	CONJ		terminate
73 | involving	CONJ		terminate
74 | nevertheless	CONJ		terminate
75 | origin for	CONJ		terminate
76 | origin of	CONJ		terminate
77 | origins for	CONJ		terminate
78 | origins of	CONJ		terminate
79 | other possibilities of	CONJ		terminate
80 | reason for	CONJ		terminate
81 | reason of	CONJ		terminate
82 | reasons for	CONJ		terminate
83 | reasons of	CONJ		terminate
84 | secondary to	CONJ		terminate
85 | source for	CONJ		terminate
86 | source of	CONJ		terminate
87 | sources for	CONJ		terminate
88 | sources of	CONJ		terminate
89 | still	CONJ		terminate
90 | though	CONJ		terminate
91 | trigger event for	CONJ		terminate
92 | which	CONJ		terminate
93 | yet	CONJ		terminate
94 | 


--------------------------------------------------------------------------------
/KB/lexical_kb_nlm.yml:
--------------------------------------------------------------------------------
  1 | Comments: ''
  2 | Direction: terminate
  3 | Lex: although
  4 | Regex: ''
  5 | Type: CONJ
  6 | ---
  7 | Comments: ''
  8 | Direction: terminate
  9 | Lex: apart from
 10 | Regex: ''
 11 | Type: CONJ
 12 | ---
 13 | Comments: ''
 14 | Direction: terminate
 15 | Lex: as a cause for
 16 | Regex: ''
 17 | Type: CONJ
 18 | ---
 19 | Comments: ''
 20 | Direction: terminate
 21 | Lex: as a cause of
 22 | Regex: ''
 23 | Type: CONJ
 24 | ---
 25 | Comments: ''
 26 | Direction: terminate
 27 | Lex: as a etiology for
 28 | Regex: ''
 29 | Type: CONJ
 30 | ---
 31 | Comments: ''
 32 | Direction: terminate
 33 | Lex: as a etiology of
 34 | Regex: ''
 35 | Type: CONJ
 36 | ---
 37 | Comments: ''
 38 | Direction: terminate
 39 | Lex: as a reason for
 40 | Regex: ''
 41 | Type: CONJ
 42 | ---
 43 | Comments: ''
 44 | Direction: terminate
 45 | Lex: as a reason of
 46 | Regex: ''
 47 | Type: CONJ
 48 | ---
 49 | Comments: ''
 50 | Direction: terminate
 51 | Lex: as a secondary cause for
 52 | Regex: ''
 53 | Type: CONJ
 54 | ---
 55 | Comments: ''
 56 | Direction: terminate
 57 | Lex: as a secondary cause of
 58 | Regex: ''
 59 | Type: CONJ
 60 | ---
 61 | Comments: ''
 62 | Direction: terminate
 63 | Lex: as a secondary etiology for
 64 | Regex: ''
 65 | Type: CONJ
 66 | ---
 67 | Comments: ''
 68 | Direction: terminate
 69 | Lex: as a secondary etiology of
 70 | Regex: ''
 71 | Type: CONJ
 72 | ---
 73 | Comments: ''
 74 | Direction: terminate
 75 | Lex: as a secondary origin for
 76 | Regex: ''
 77 | Type: CONJ
 78 | ---
 79 | Comments: ''
 80 | Direction: terminate
 81 | Lex: as a secondary origin of
 82 | Regex: ''
 83 | Type: CONJ
 84 | ---
 85 | Comments: ''
 86 | Direction: terminate
 87 | Lex: as a secondary reason for
 88 | Regex: ''
 89 | Type: CONJ
 90 | ---
 91 | Comments: ''
 92 | Direction: terminate
 93 | Lex: as a secondary reason of
 94 | Regex: ''
 95 | Type: CONJ
 96 | ---
 97 | Comments: ''
 98 | Direction: terminate
 99 | Lex: as a secondary source for
100 | Regex: ''
101 | Type: CONJ
102 | ---
103 | Comments: ''
104 | Direction: terminate
105 | Lex: as a secondary source of
106 | Regex: ''
107 | Type: CONJ
108 | ---
109 | Comments: ''
110 | Direction: terminate
111 | Lex: as a source for
112 | Regex: ''
113 | Type: CONJ
114 | ---
115 | Comments: ''
116 | Direction: terminate
117 | Lex: as a source of
118 | Regex: ''
119 | Type: CONJ
120 | ---
121 | Comments: ''
122 | Direction: terminate
123 | Lex: as an cause for
124 | Regex: ''
125 | Type: CONJ
126 | ---
127 | Comments: ''
128 | Direction: terminate
129 | Lex: as an cause of
130 | Regex: ''
131 | Type: CONJ
132 | ---
133 | Comments: ''
134 | Direction: terminate
135 | Lex: as an etiology for
136 | Regex: ''
137 | Type: CONJ
138 | ---
139 | Comments: ''
140 | Direction: terminate
141 | Lex: as an etiology of
142 | Regex: ''
143 | Type: CONJ
144 | ---
145 | Comments: ''
146 | Direction: terminate
147 | Lex: as an origin for
148 | Regex: ''
149 | Type: CONJ
150 | ---
151 | Comments: ''
152 | Direction: terminate
153 | Lex: as an origin of
154 | Regex: ''
155 | Type: CONJ
156 | ---
157 | Comments: ''
158 | Direction: terminate
159 | Lex: as an reason for
160 | Regex: ''
161 | Type: CONJ
162 | ---
163 | Comments: ''
164 | Direction: terminate
165 | Lex: as an reason of
166 | Regex: ''
167 | Type: CONJ
168 | ---
169 | Comments: ''
170 | Direction: terminate
171 | Lex: as an secondary cause for
172 | Regex: ''
173 | Type: CONJ
174 | ---
175 | Comments: ''
176 | Direction: terminate
177 | Lex: as an secondary cause of
178 | Regex: ''
179 | Type: CONJ
180 | ---
181 | Comments: ''
182 | Direction: terminate
183 | Lex: as an secondary etiology for
184 | Regex: ''
185 | Type: CONJ
186 | ---
187 | Comments: ''
188 | Direction: terminate
189 | Lex: as an secondary etiology of
190 | Regex: ''
191 | Type: CONJ
192 | ---
193 | Comments: ''
194 | Direction: terminate
195 | Lex: as an secondary origin for
196 | Regex: ''
197 | Type: CONJ
198 | ---
199 | Comments: ''
200 | Direction: terminate
201 | Lex: as an secondary origin of
202 | Regex: ''
203 | Type: CONJ
204 | ---
205 | Comments: ''
206 | Direction: terminate
207 | Lex: as an secondary reason for
208 | Regex: ''
209 | Type: CONJ
210 | ---
211 | Comments: ''
212 | Direction: terminate
213 | Lex: as an secondary reason of
214 | Regex: ''
215 | Type: CONJ
216 | ---
217 | Comments: ''
218 | Direction: terminate
219 | Lex: as an secondary source for
220 | Regex: ''
221 | Type: CONJ
222 | ---
223 | Comments: ''
224 | Direction: terminate
225 | Lex: as an secondary source of
226 | Regex: ''
227 | Type: CONJ
228 | ---
229 | Comments: ''
230 | Direction: terminate
231 | Lex: as an source for
232 | Regex: ''
233 | Type: CONJ
234 | ---
235 | Comments: ''
236 | Direction: terminate
237 | Lex: as an source of
238 | Regex: ''
239 | Type: CONJ
240 | ---
241 | Comments: ''
242 | Direction: terminate
243 | Lex: as the cause for
244 | Regex: ''
245 | Type: CONJ
246 | ---
247 | Comments: ''
248 | Direction: terminate
249 | Lex: as the cause of
250 | Regex: ''
251 | Type: CONJ
252 | ---
253 | Comments: ''
254 | Direction: terminate
255 | Lex: as the etiology for
256 | Regex: ''
257 | Type: CONJ
258 | ---
259 | Comments: ''
260 | Direction: terminate
261 | Lex: as the etiology of
262 | Regex: ''
263 | Type: CONJ
264 | ---
265 | Comments: ''
266 | Direction: terminate
267 | Lex: as the origin for
268 | Regex: ''
269 | Type: CONJ
270 | ---
271 | Comments: ''
272 | Direction: terminate
273 | Lex: as the origin of
274 | Regex: ''
275 | Type: CONJ
276 | ---
277 | Comments: ''
278 | Direction: terminate
279 | Lex: as the reason for
280 | Regex: ''
281 | Type: CONJ
282 | ---
283 | Comments: ''
284 | Direction: terminate
285 | Lex: as the reason of
286 | Regex: ''
287 | Type: CONJ
288 | ---
289 | Comments: ''
290 | Direction: terminate
291 | Lex: as the secondary cause for
292 | Regex: ''
293 | Type: CONJ
294 | ---
295 | Comments: ''
296 | Direction: terminate
297 | Lex: as the secondary cause of
298 | Regex: ''
299 | Type: CONJ
300 | ---
301 | Comments: ''
302 | Direction: terminate
303 | Lex: as the secondary etiology for
304 | Regex: ''
305 | Type: CONJ
306 | ---
307 | Comments: ''
308 | Direction: terminate
309 | Lex: as the secondary etiology of
310 | Regex: ''
311 | Type: CONJ
312 | ---
313 | Comments: ''
314 | Direction: terminate
315 | Lex: as the secondary origin for
316 | Regex: ''
317 | Type: CONJ
318 | ---
319 | Comments: ''
320 | Direction: terminate
321 | Lex: as the secondary origin of
322 | Regex: ''
323 | Type: CONJ
324 | ---
325 | Comments: ''
326 | Direction: terminate
327 | Lex: as the secondary reason for
328 | Regex: ''
329 | Type: CONJ
330 | ---
331 | Comments: ''
332 | Direction: terminate
333 | Lex: as the secondary reason of
334 | Regex: ''
335 | Type: CONJ
336 | ---
337 | Comments: ''
338 | Direction: terminate
339 | Lex: as the secondary source for
340 | Regex: ''
341 | Type: CONJ
342 | ---
343 | Comments: ''
344 | Direction: terminate
345 | Lex: as the secondary source of
346 | Regex: ''
347 | Type: CONJ
348 | ---
349 | Comments: ''
350 | Direction: terminate
351 | Lex: as the source for
352 | Regex: ''
353 | Type: CONJ
354 | ---
355 | Comments: ''
356 | Direction: terminate
357 | Lex: as the source of
358 | Regex: ''
359 | Type: CONJ
360 | ---
361 | Comments: 2/14/2013
362 | Direction: terminate
363 | Lex: 'as there are '
364 | Regex: ''
365 | Type: CONJ
366 | ---
367 | Comments: ''
368 | Direction: terminate
369 | Lex: aside from
370 | Regex: ''
371 | Type: CONJ
372 | ---
373 | Comments: ''
374 | Direction: terminate
375 | Lex: but
376 | Regex: ''
377 | Type: CONJ
378 | ---
379 | Comments: ''
380 | Direction: terminate
381 | Lex: cause for
382 | Regex: ''
383 | Type: CONJ
384 | ---
385 | Comments: ''
386 | Direction: terminate
387 | Lex: cause of
388 | Regex: ''
389 | Type: CONJ
390 | ---
391 | Comments: ''
392 | Direction: terminate
393 | Lex: causes for
394 | Regex: ''
395 | Type: CONJ
396 | ---
397 | Comments: ''
398 | Direction: terminate
399 | Lex: causes of
400 | Regex: ''
401 | Type: CONJ
402 | ---
403 | Comments: ''
404 | Direction: terminate
405 | Lex: etiology for
406 | Regex: ''
407 | Type: CONJ
408 | ---
409 | Comments: ''
410 | Direction: terminate
411 | Lex: etiology of
412 | Regex: ''
413 | Type: CONJ
414 | ---
415 | Comments: ''
416 | Direction: terminate
417 | Lex: except
418 | Regex: ''
419 | Type: CONJ
420 | ---
421 | Comments: ''
422 | Direction: terminate
423 | Lex: however
424 | Regex: ''
425 | Type: CONJ
426 | ---
427 | Comments: ''
428 | Direction: terminate
429 | Lex: involving
430 | Regex: ''
431 | Type: CONJ
432 | ---
433 | Comments: ''
434 | Direction: terminate
435 | Lex: nevertheless
436 | Regex: ''
437 | Type: CONJ
438 | ---
439 | Comments: ''
440 | Direction: terminate
441 | Lex: origin for
442 | Regex: ''
443 | Type: CONJ
444 | ---
445 | Comments: ''
446 | Direction: terminate
447 | Lex: origin of
448 | Regex: ''
449 | Type: CONJ
450 | ---
451 | Comments: ''
452 | Direction: terminate
453 | Lex: origins for
454 | Regex: ''
455 | Type: CONJ
456 | ---
457 | Comments: ''
458 | Direction: terminate
459 | Lex: origins of
460 | Regex: ''
461 | Type: CONJ
462 | ---
463 | Comments: ''
464 | Direction: terminate
465 | Lex: other possibilities of
466 | Regex: ''
467 | Type: CONJ
468 | ---
469 | Comments: ''
470 | Direction: terminate
471 | Lex: reason for
472 | Regex: ''
473 | Type: CONJ
474 | ---
475 | Comments: ''
476 | Direction: terminate
477 | Lex: reason of
478 | Regex: ''
479 | Type: CONJ
480 | ---
481 | Comments: ''
482 | Direction: terminate
483 | Lex: reasons for
484 | Regex: ''
485 | Type: CONJ
486 | ---
487 | Comments: ''
488 | Direction: terminate
489 | Lex: reasons of
490 | Regex: ''
491 | Type: CONJ
492 | ---
493 | Comments: ''
494 | Direction: terminate
495 | Lex: secondary to
496 | Regex: ''
497 | Type: CONJ
498 | ---
499 | Comments: ''
500 | Direction: terminate
501 | Lex: source for
502 | Regex: ''
503 | Type: CONJ
504 | ---
505 | Comments: ''
506 | Direction: terminate
507 | Lex: source of
508 | Regex: ''
509 | Type: CONJ
510 | ---
511 | Comments: ''
512 | Direction: terminate
513 | Lex: sources for
514 | Regex: ''
515 | Type: CONJ
516 | ---
517 | Comments: ''
518 | Direction: terminate
519 | Lex: sources of
520 | Regex: ''
521 | Type: CONJ
522 | ---
523 | Comments: ''
524 | Direction: terminate
525 | Lex: still
526 | Regex: ''
527 | Type: CONJ
528 | ---
529 | Comments: ''
530 | Direction: terminate
531 | Lex: though
532 | Regex: ''
533 | Type: CONJ
534 | ---
535 | Comments: ''
536 | Direction: terminate
537 | Lex: trigger event for
538 | Regex: ''
539 | Type: CONJ
540 | ---
541 | Comments: ''
542 | Direction: terminate
543 | Lex: which
544 | Regex: ''
545 | Type: CONJ
546 | ---
547 | Comments: ''
548 | Direction: terminate
549 | Lex: yet
550 | Regex: ''
551 | Type: CONJ
552 | 


--------------------------------------------------------------------------------
/KB/pah_utah.tsv:
--------------------------------------------------------------------------------
 1 | Lex	Type	Regex	Direction	
 2 | hypertension	PULMONARY_ARTERIAL_HYPERTENSION	\bhypertension\b		
 3 | pah	PULMONARY_ARTERIAL_HYPERTENSION	\bpah\b		
 4 | pulmonary hypertension	PULMONARY_ARTERIAL_ANATOMY	pulmonary\s(arterial )?(hypertension)		
 5 | main pulmonary artery	PULMONARY_ARTERIAL_ANATOMY		 
 6 | pulmonary trunk	PULMONARY_ARTERIAL_ANATOMY		
 7 | pulmonary artery	PULMONARY_ARTERIAL_ANATOMY		
 8 | pulmonary arteries	PULMONARY_ARTERIAL_ANATOMY		
 9 | mosaic	PULMONARY_ARTERIAL_HYPERTENSION		
10 | right heart	CARDIAC_ANATOMY			
11 | septum	CARDIAC_ANATOMY			
12 | enlargement	PULMONARY_ARTERIAL_HYPERTENSION	(tapering|enlargement)		
13 | tapering	PULMONARY_ARTERIAL_HYPERTENSION			
14 | pruning	PULMONARY_ARTERIAL_HYPERTENSION			
15 | right heart strain	PULMONARY_ARTERIAL_HYPERTENSION	right heart (strain|failure)	
16 | 


--------------------------------------------------------------------------------
/KB/pah_utah.txt:
--------------------------------------------------------------------------------
1 | Lex	Type	Regex	Directionhypertension	PULMONARY_ARTERIAL_HYPERTENSION	\bhypertension\b	pah	PULMONARY_ARTERIAL_HYPERTENSION	\bpah\b	pulmonary hypertension	PULMONARY_ARTERIAL_ANATOMY	pulmonary\s(arterial )?(hypertension)	main pulmonary artery	PULMONARY_ARTERIAL_ANATOMY		 pulmonary trunk	PULMONARY_ARTERIAL_ANATOMY		mosaic	PULMONARY_ARTERIAL_HYPERTENSION		right heart	CARDIAC_ANATOMY		


--------------------------------------------------------------------------------
/KB/pah_utah.yml:
--------------------------------------------------------------------------------
 1 | Comments: ''
 2 | Direction: ''
 3 | Lex: hypertension
 4 | Regex: \bhypertension\b
 5 | Type: PULMONARY_ARTERIAL_HYPERTENSION
 6 | ---
 7 | Comments: ''
 8 | Direction: ''
 9 | Lex: pah
10 | Regex: \bpah\b
11 | Type: PULMONARY_ARTERIAL_HYPERTENSION
12 | ---
13 | Comments: ''
14 | Direction: ''
15 | Lex: pulmonary hypertension
16 | Regex: pulmonary\s(arterial )?(hypertension)
17 | Type: PULMONARY_ARTERIAL_ANATOMY
18 | ---
19 | Comments: ''
20 | Direction: ' '
21 | Lex: main pulmonary artery
22 | Regex: ''
23 | Type: PULMONARY_ARTERIAL_ANATOMY
24 | ---
25 | Comments: ''
26 | Direction: ''
27 | Lex: pulmonary trunk
28 | Regex: ''
29 | Type: PULMONARY_ARTERIAL_ANATOMY
30 | ---
31 | Comments: ''
32 | Direction: ''
33 | Lex: pulmonary artery
34 | Regex: ''
35 | Type: PULMONARY_ARTERIAL_ANATOMY
36 | ---
37 | Comments: ''
38 | Direction: ''
39 | Lex: pulmonary arteries
40 | Regex: ''
41 | Type: PULMONARY_ARTERIAL_ANATOMY
42 | ---
43 | Comments: ''
44 | Direction: ''
45 | Lex: mosaic
46 | Regex: ''
47 | Type: PULMONARY_ARTERIAL_HYPERTENSION
48 | ---
49 | Comments: ''
50 | Direction: ''
51 | Lex: right heart
52 | Regex: ''
53 | Type: CARDIAC_ANATOMY
54 | ---
55 | Comments: ''
56 | Direction: ''
57 | Lex: septum
58 | Regex: ''
59 | Type: CARDIAC_ANATOMY
60 | ---
61 | Comments: ''
62 | Direction: ''
63 | Lex: enlargement
64 | Regex: (tapering|enlargement)
65 | Type: PULMONARY_ARTERIAL_HYPERTENSION
66 | ---
67 | Comments: ''
68 | Direction: ''
69 | Lex: tapering
70 | Regex: ''
71 | Type: PULMONARY_ARTERIAL_HYPERTENSION
72 | ---
73 | Comments: ''
74 | Direction: ''
75 | Lex: pruning
76 | Regex: ''
77 | Type: PULMONARY_ARTERIAL_HYPERTENSION
78 | ---
79 | Comments: ''
80 | Direction: ''
81 | Lex: right heart strain
82 | Regex: right heart (strain|failure)
83 | Type: PULMONARY_ARTERIAL_HYPERTENSION
84 | 


--------------------------------------------------------------------------------
/KB/pe_kb.tsv:
--------------------------------------------------------------------------------
 1 | Lex	Type	Regex	Direction	
 2 | embolism	PULMONARY_EMBOLISM	\b(emboli|embolism|embolus)\b		
 3 | pe	PULMONARY_EMBOLISM	\bpe\b		
 4 | pulmonary embolism	PULMONARY_EMBOLISM	pulmonary\s(artery )?(embol[a-z]+)		
 5 | bolus timing	QUALITY_FEATURE	"\bbolus[ -]{0,1}timing"	bidirectional	 # fixes pedoc #129 dq
 6 | limited exam	QUALITY_FEATURE	(suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing)	bidirectional	
 7 | nondiagnostic exam	QUALITY_FEATURE	nondiagnostic (exam[a-z]*|study|scan|evaluation)	bidirectional	 #fix for pedoc #231
 8 | artifact	ARTIFACT	artifact(ual)?		
 9 | bulk motion	ARTIFACT			
10 | motion	ARTIFACT			
11 | patient motion	ARTIFACT			
12 | respiratory motion	ARTIFACT			
13 | thromboembolic disease	thromboembolic disease			


--------------------------------------------------------------------------------
/KB/pe_kb.yml:
--------------------------------------------------------------------------------
 1 | Comments: ''
 2 | Direction: ''
 3 | Lex: embolism
 4 | Regex: \b(emboli|embolism|embolus)\b
 5 | Type: PULMONARY_EMBOLISM
 6 | ---
 7 | Comments: ''
 8 | Direction: ''
 9 | Lex: pe
10 | Regex: \bpe\b
11 | Type: PULMONARY_EMBOLISM
12 | ---
13 | Comments: ''
14 | Direction: ''
15 | Lex: pulmonary embolism
16 | Regex: pulmonary\s(artery )?(embol[a-z]+)
17 | Type: PULMONARY_EMBOLISM
18 | ---
19 | Comments: ' # fixes pedoc #129 dq'
20 | Direction: bidirectional
21 | Lex: bolus timing
22 | Regex: \bbolus[ -]{0,1}timing
23 | Type: QUALITY_FEATURE
24 | ---
25 | Comments: ''
26 | Direction: bidirectional
27 | Lex: limited exam
28 | Regex: (suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing)
29 | Type: QUALITY_FEATURE
30 | ---
31 | Comments: ' #fix for pedoc #231'
32 | Direction: bidirectional
33 | Lex: nondiagnostic exam
34 | Regex: nondiagnostic (exam[a-z]*|study|scan|evaluation)
35 | Type: QUALITY_FEATURE
36 | ---
37 | Comments: ''
38 | Direction: ''
39 | Lex: artifact
40 | Regex: artifact(ual)?
41 | Type: ARTIFACT
42 | ---
43 | Comments: ''
44 | Direction: ''
45 | Lex: bulk motion
46 | Regex: ''
47 | Type: ARTIFACT
48 | ---
49 | Comments: ''
50 | Direction: ''
51 | Lex: motion
52 | Regex: ''
53 | Type: ARTIFACT
54 | ---
55 | Comments: ''
56 | Direction: ''
57 | Lex: patient motion
58 | Regex: ''
59 | Type: ARTIFACT
60 | ---
61 | Comments: ''
62 | Direction: ''
63 | Lex: respiratory motion
64 | Regex: ''
65 | Type: ARTIFACT
66 | ---
67 | Comments: ''
68 | Direction: ''
69 | Lex: thromboembolic disease
70 | Regex: ''
71 | Type: thromboembolic disease
72 | 


--------------------------------------------------------------------------------
/KB/pneumonia_targets.yml:
--------------------------------------------------------------------------------
 1 | Comments: ''
 2 | Direction: ''
 3 | Lex: pneumonia
 4 | Regex: \bpneumonia[s]?\b
 5 | Type: EVIDENCE_OF_PNEUMONIA
 6 | ---
 7 | Comments: ''
 8 | Direction: ''
 9 | Lex: consolidation
10 | Regex: ''
11 | Type: EVIDENCE_OF_PNEUMONIA
12 | ---
13 | Comments: ''
14 | Direction: ''
15 | Lex: infiltrate
16 | Regex: ''
17 | Type: EVIDENCE_OF_PNEUMONIA
18 | 


--------------------------------------------------------------------------------
/KB/quality_artifacts.tsv:
--------------------------------------------------------------------------------
 1 | Lex	Type	Regex	Direction	
 2 | bolus timing	QUALITY_FEATURE	\bbolus[ -]{0,1}timing	bidirectional	 # fixes pedoc #129 dq
 3 | limited exam	QUALITY_FEATURE	(suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing)	bidirectional	
 4 | nondiagnostic exam	QUALITY_FEATURE	nondiagnostic (exam[a-z]*|study|scan|evaluation)	bidirectional	 #fix for pedoc #231
 5 | artifact	ARTIFACT	artifact(ual)?		
 6 | bulk motion	ARTIFACT			
 7 | motion	ARTIFACT			
 8 | patient motion	ARTIFACT			
 9 | respiratory motion	ARTIFACT			
10 | 


--------------------------------------------------------------------------------
/KB/quality_artifacts.yml:
--------------------------------------------------------------------------------
 1 | Comments: ' # fixes pedoc #129 dq'
 2 | Direction: bidirectional
 3 | Lex: bolus timing
 4 | Regex: \bbolus[ -]{0,1}timing
 5 | Type: QUALITY_FEATURE
 6 | ---
 7 | Comments: ''
 8 | Direction: bidirectional
 9 | Lex: limited exam
10 | Regex: (suboptimal|degraded|limited) (exam[a-z]*|study|scan|evaluation|bolus|timing)
11 | Type: QUALITY_FEATURE
12 | ---
13 | Comments: ' #fix for pedoc #231'
14 | Direction: bidirectional
15 | Lex: nondiagnostic exam
16 | Regex: nondiagnostic (exam[a-z]*|study|scan|evaluation)
17 | Type: QUALITY_FEATURE
18 | ---
19 | Comments: ''
20 | Direction: ''
21 | Lex: artifact
22 | Regex: artifact(ual)?
23 | Type: ARTIFACT
24 | ---
25 | Comments: ''
26 | Direction: ''
27 | Lex: bulk motion
28 | Regex: ''
29 | Type: ARTIFACT
30 | ---
31 | Comments: ''
32 | Direction: ''
33 | Lex: motion
34 | Regex: ''
35 | Type: ARTIFACT
36 | ---
37 | Comments: ''
38 | Direction: ''
39 | Lex: patient motion
40 | Regex: ''
41 | Type: ARTIFACT
42 | ---
43 | Comments: ''
44 | Direction: ''
45 | Lex: respiratory motion
46 | Regex: ''
47 | Type: ARTIFACT
48 | 


--------------------------------------------------------------------------------
/KB/schema2.csv:
--------------------------------------------------------------------------------
 1 | # Lines that start with the # symbol are comments and are ignored
 2 | #The schema consists of a numeric value, followed by a label (e.g. "AMBIVALENT"), followed by a Python express that can evaluate to True or False
 3 | #The Python expression uses LABELS from the rules. processReports.py will substitute the LABEL with any matched values identified from 
 4 | #the corresponding rules
 5 | 1,AMBIVALENT,DISEASE_STATE == 2
 6 | 2,Negative/Certain/Acute,DISEASE_STATE == 0 and CERTAINTY_STATE == 1
 7 | 3,Negative/Uncertain/Chronic,DISEASE_STATE == 0 and CERTAINTY_STATE == 0 and ACUTE_STATE == 0
 8 | 4,Positive/Uncertain/Chronic,DISEASE_STATE == 1 and CERTAINTY_STATE == 0 and ACUTE_STATE == 0 
 9 | 5,Positive/Certain/Chronic,DISEASE_STATE == 1 and CERTAINTY_STATE == 1 and ACUTE_STATE == 0 
10 | 6,Negative/Uncertain/Acute,DISEASE_STATE == 0 and CERTAINTY_STATE == 0 
11 | 7,Positive/Uncertain/Acute,DISEASE_STATE == 1 and CERTAINTY_STATE == 0 and ACUTE_STATE == 1 
12 | 8,Positive/Certain/Acute,DISEASE_STATE == 1 and CERTAINTY_STATE == 1 and ACUTE_STATE == 1 
13 | 


--------------------------------------------------------------------------------
/KB/utah_crit.tsv:
--------------------------------------------------------------------------------
 1 | Lex	Type	Regex	Direction	Codes
 2 | pulmonary embolism	PULMONARY_EMBOLISM	pulmonary\s(artery )?(embol[a-z]+)|\bpe\b|pulmonary thromboembolic disease		Chest
 3 | aneurysm	ANEURYSM	\baneurysm[a-z]*\b|(aneurysmal )?dilatation		Chest, Neuro, ABD/Pel
 4 | aortic dissection	AORTIC_DISSECTION	(aortic|aorta)\s(artery\s)?dissection		Chest, ABD/Pel
 5 | appendicitis	APPENDICITIS			ABD/Pel
 6 | inflammation	INFLAMMATION	inflammation|inflammatory|infection		Spine, Neuro, ABD/Pel, Extremity
 7 | bowel obstruction	BOWEL_OBSTRUCTION			ABD/Pel
 8 | midline shift	BRAIN_HERNIATION	((subfalcian|subfalcine|tonsillar)\sherniation)|((midline|mid-line)\sshift)		Neuro
 9 | carotid dissection	CAROTID_DISSECTION	carotid?\s*?\w*\s*dissection		Neuro
10 | intracranial hemorrhage	INTRACRANIAL_HEMORRHAGE	(cerebral|intracranial|brain)\s(hemorrhage|hematoma|bleed)		Neuro
11 | fracture	FRACTURE	fracture(s)?		Spine, Neuro, ABD/Pel, Chest, Extremity
12 | cholecystitis	CHOLECYSTITIS			ABD/Pel
13 | cord compression	CORD_COMPRESSION	cord compression		Extremity, Spine, Neuro
14 | depressed skull fracture	DEPRESSED_SKULL_FRACTURE			Neuro
15 | diverticulitis	DIVERTICULITIS			ABD/Pel
16 | dvt	DVT	((non.?)?occlusive)?\s?(thromb(us|i|osis|osed)\b|DVT|clot\b)	2/28/13	ABD/Pel
17 | ectasia	ECTASIA	(ectasia| ectatic)		ABD/Pel
18 | ectopic pregnancy	ECTOPIC_PREGNANCY			ABD/Pel
19 | epiglottitis	EPIGLOTTITIS			Chest
20 | fetal demise	FETAL_DEMISE			ABD/Pel
21 | free air	FREE_AIR	(pneumoperitoneum|((intraperitoneal|free)\s(gas|air)))		Chest, ABD/Pel
22 | infarct	INFARCT	\b(stroke|infarct|infarction)\b		Neuro, ABD/Pel
23 | ischemic bowel	ISCHEMIC_BOWEL			ABD/Pel
24 | lacunar infarct	LACUNAR_INFARCT			Neuro
25 | mediastinal emphysema	MEDIASTINAL_EMPHYSEMA	(mediastinal emphysema|pneumomediastinum)		Chest
26 | omental infarct	OMENTAL_INFARCT			ABD/Pel
27 | bone infarct	OSTEONECROSIS	(bone infarct|osteonecrosis)		Extremity
28 | pneumothorax	PNEUMOTHORAX	pneumothorax|hydropneumothorax|pneumothoraces		Chest
29 | portal venous air	PORTAL_VENOUS_AIR	portal (venous\s)?(gas|air)		ABD/Pel
30 | renal infarct	RENAL_INFARCT			ABD/Pel
31 | retroperitoneal hemorrhage	RETROPERITONEAL_HEMORRHAGE	(retro|intra)?peritoneal\s(hemorrhage|hematoma|bleed)		ABD/Pel
32 | retropharyngeal abscess	RETROPHARYNGEAL_ABSCESS			Chest
33 | ruptured aneurysm	RUPTURED_ANEURYSM	(ruptured aneurysm|aortic rupture)		Chest, Neuro, ABD/Pel
34 | splenic infarct	SPLENIC_INFARCT			Chest, ABD/Pel
35 | torsion	TORSION			ABD/Pel
36 | volvulus	VOLVULUS			ABD/Pel
37 | pneumonia	PNEUMONIA	(pneumonia|consolidation|aspiration)		Chest
38 | cancer	CANCER	cancer|metastatic(\sdisease|\slesion)?|metastases|carcinoma|sarcoma|malignancy		Spine, Neuro, ABD/Pel, Chest, Extremity
39 | NO CRITICAL FINDING	NULL_FINDING			Spine, Neuro, ABD/Pel, Chest, Extremity
40 | 0:UNREVIEWED	NULL_FINDING			Spine, Neuro, ABD/Pel, Chest, Extremity
41 | thrombosis	THROMBOSIS	(thromb(us|i|osis|osed)\b|clob\b)		Spine, Neuro, ABD/Pel, Extremity
42 | 


--------------------------------------------------------------------------------
/KB/utah_crit.yml:
--------------------------------------------------------------------------------
  1 | Comments: Chest
  2 | Direction: ''
  3 | Lex: pulmonary embolism
  4 | Regex: pulmonary\s(artery )?(embol[a-z]+)|\bpe\b|pulmonary thromboembolic disease
  5 | Type: PULMONARY_EMBOLISM
  6 | ---
  7 | Comments: Chest, Neuro, ABD/Pel
  8 | Direction: ''
  9 | Lex: aneurysm
 10 | Regex: \baneurysm[a-z]*\b|(aneurysmal )?dilatation
 11 | Type: ANEURYSM
 12 | ---
 13 | Comments: Chest, ABD/Pel
 14 | Direction: ''
 15 | Lex: aortic dissection
 16 | Regex: (aortic|aorta)\s(artery\s)?dissection
 17 | Type: AORTIC_DISSECTION
 18 | ---
 19 | Comments: ABD/Pel
 20 | Direction: ''
 21 | Lex: appendicitis
 22 | Regex: ''
 23 | Type: APPENDICITIS
 24 | ---
 25 | Comments: Spine, Neuro, ABD/Pel, Extremity
 26 | Direction: ''
 27 | Lex: inflammation
 28 | Regex: inflammation|inflammatory|infection
 29 | Type: INFLAMMATION
 30 | ---
 31 | Comments: ABD/Pel
 32 | Direction: ''
 33 | Lex: bowel obstruction
 34 | Regex: ''
 35 | Type: BOWEL_OBSTRUCTION
 36 | ---
 37 | Comments: Neuro
 38 | Direction: ''
 39 | Lex: midline shift
 40 | Regex: ((subfalcian|subfalcine|tonsillar)\sherniation)|((midline|mid-line)\sshift)
 41 | Type: BRAIN_HERNIATION
 42 | ---
 43 | Comments: Neuro
 44 | Direction: ''
 45 | Lex: carotid dissection
 46 | Regex: carotid?\s*?\w*\s*dissection
 47 | Type: CAROTID_DISSECTION
 48 | ---
 49 | Comments: Neuro
 50 | Direction: ''
 51 | Lex: intracranial hemorrhage
 52 | Regex: (cerebral|intracranial|brain)\s(hemorrhage|hematoma|bleed)
 53 | Type: INTRACRANIAL_HEMORRHAGE
 54 | ---
 55 | Comments: Spine, Neuro, ABD/Pel, Chest, Extremity
 56 | Direction: ''
 57 | Lex: fracture
 58 | Regex: fracture(s)?
 59 | Type: FRACTURE
 60 | ---
 61 | Comments: ABD/Pel
 62 | Direction: ''
 63 | Lex: cholecystitis
 64 | Regex: ''
 65 | Type: CHOLECYSTITIS
 66 | ---
 67 | Comments: Extremity, Spine, Neuro
 68 | Direction: ''
 69 | Lex: cord compression
 70 | Regex: cord compression
 71 | Type: CORD_COMPRESSION
 72 | ---
 73 | Comments: Neuro
 74 | Direction: ''
 75 | Lex: depressed skull fracture
 76 | Regex: ''
 77 | Type: DEPRESSED_SKULL_FRACTURE
 78 | ---
 79 | Comments: ABD/Pel
 80 | Direction: ''
 81 | Lex: diverticulitis
 82 | Regex: ''
 83 | Type: DIVERTICULITIS
 84 | ---
 85 | Comments: ABD/Pel
 86 | Direction: 2/28/13
 87 | Lex: dvt
 88 | Regex: ((non.?)?occlusive)?\s?(thromb(us|i|osis|osed)\b|DVT|clot\b)
 89 | Type: DVT
 90 | ---
 91 | Comments: ABD/Pel
 92 | Direction: ''
 93 | Lex: ectasia
 94 | Regex: (ectasia| ectatic)
 95 | Type: ECTASIA
 96 | ---
 97 | Comments: ABD/Pel
 98 | Direction: ''
 99 | Lex: ectopic pregnancy
100 | Regex: ''
101 | Type: ECTOPIC_PREGNANCY
102 | ---
103 | Comments: Chest
104 | Direction: ''
105 | Lex: epiglottitis
106 | Regex: ''
107 | Type: EPIGLOTTITIS
108 | ---
109 | Comments: ABD/Pel
110 | Direction: ''
111 | Lex: fetal demise
112 | Regex: ''
113 | Type: FETAL_DEMISE
114 | ---
115 | Comments: Chest, ABD/Pel
116 | Direction: ''
117 | Lex: free air
118 | Regex: (pneumoperitoneum|((intraperitoneal|free)\s(gas|air)))
119 | Type: FREE_AIR
120 | ---
121 | Comments: Neuro, ABD/Pel
122 | Direction: ''
123 | Lex: infarct
124 | Regex: \b(stroke|infarct|infarction)\b
125 | Type: INFARCT
126 | ---
127 | Comments: ABD/Pel
128 | Direction: ''
129 | Lex: ischemic bowel
130 | Regex: ''
131 | Type: ISCHEMIC_BOWEL
132 | ---
133 | Comments: Neuro
134 | Direction: ''
135 | Lex: lacunar infarct
136 | Regex: ''
137 | Type: LACUNAR_INFARCT
138 | ---
139 | Comments: Chest
140 | Direction: ''
141 | Lex: mediastinal emphysema
142 | Regex: (mediastinal emphysema|pneumomediastinum)
143 | Type: MEDIASTINAL_EMPHYSEMA
144 | ---
145 | Comments: ABD/Pel
146 | Direction: ''
147 | Lex: omental infarct
148 | Regex: ''
149 | Type: OMENTAL_INFARCT
150 | ---
151 | Comments: Extremity
152 | Direction: ''
153 | Lex: bone infarct
154 | Regex: (bone infarct|osteonecrosis)
155 | Type: OSTEONECROSIS
156 | ---
157 | Comments: Chest
158 | Direction: ''
159 | Lex: pneumothorax
160 | Regex: pneumothorax|hydropneumothorax|pneumothoraces
161 | Type: PNEUMOTHORAX
162 | ---
163 | Comments: ABD/Pel
164 | Direction: ''
165 | Lex: portal venous air
166 | Regex: portal (venous\s)?(gas|air)
167 | Type: PORTAL_VENOUS_AIR
168 | ---
169 | Comments: ABD/Pel
170 | Direction: ''
171 | Lex: renal infarct
172 | Regex: ''
173 | Type: RENAL_INFARCT
174 | ---
175 | Comments: ABD/Pel
176 | Direction: ''
177 | Lex: retroperitoneal hemorrhage
178 | Regex: (retro|intra)?peritoneal\s(hemorrhage|hematoma|bleed)
179 | Type: RETROPERITONEAL_HEMORRHAGE
180 | ---
181 | Comments: Chest
182 | Direction: ''
183 | Lex: retropharyngeal abscess
184 | Regex: ''
185 | Type: RETROPHARYNGEAL_ABSCESS
186 | ---
187 | Comments: Chest, Neuro, ABD/Pel
188 | Direction: ''
189 | Lex: ruptured aneurysm
190 | Regex: (ruptured aneurysm|aortic rupture)
191 | Type: RUPTURED_ANEURYSM
192 | ---
193 | Comments: Chest, ABD/Pel
194 | Direction: ''
195 | Lex: splenic infarct
196 | Regex: ''
197 | Type: SPLENIC_INFARCT
198 | ---
199 | Comments: ABD/Pel
200 | Direction: ''
201 | Lex: torsion
202 | Regex: ''
203 | Type: TORSION
204 | ---
205 | Comments: ABD/Pel
206 | Direction: ''
207 | Lex: volvulus
208 | Regex: ''
209 | Type: VOLVULUS
210 | ---
211 | Comments: Chest
212 | Direction: ''
213 | Lex: pneumonia
214 | Regex: (pneumonia|consolidation|aspiration)
215 | Type: PNEUMONIA
216 | ---
217 | Comments: Spine, Neuro, ABD/Pel, Chest, Extremity
218 | Direction: ''
219 | Lex: cancer
220 | Regex: cancer|metastatic(\sdisease|\slesion)?|metastases|carcinoma|sarcoma|malignancy
221 | Type: CANCER
222 | ---
223 | Comments: Spine, Neuro, ABD/Pel, Chest, Extremity
224 | Direction: ''
225 | Lex: NO CRITICAL FINDING
226 | Regex: ''
227 | Type: NULL_FINDING
228 | ---
229 | Comments: Spine, Neuro, ABD/Pel, Chest, Extremity
230 | Direction: ''
231 | Lex: 0:UNREVIEWED
232 | Regex: ''
233 | Type: NULL_FINDING
234 | ---
235 | Comments: Spine, Neuro, ABD/Pel, Extremity
236 | Direction: ''
237 | Lex: thrombosis
238 | Regex: (thromb(us|i|osis|osed)\b|clob\b)
239 | Type: THROMBOSIS
240 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | pyConTextNLP
 2 | ============
 3 | 
 4 | This package has been in *de facto* retirement for many years. The direct successor to this project is `MedSpaCy <https://github.com/medspacy/medspacy>`_.
 5 | 
 6 | This code has been validated using the included notebooks on Python v 3.7.2. Python 2.x is no longer supported.
 7 | 
 8 | pyConTextNLP is a Python implementation/extension/modification of the
 9 | ConText algorithm described in `"Context: An Algorithm for Determining Negation, Experiencer, and Temporal Status from Clinical Reports" <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2757457/>`_ which is itself a
10 | generalization of the NegEx algorithm described in `"A simple algorithm for identifying negated findings and diseases in discharge summaries" <https://pubmed.ncbi.nlm.nih.gov/12123149/>`_.
11 | 
12 | The package is maintained by Brian Chapman at the University of Utah.
13 | Other active and past developers include:
14 | 
15 | -  Wendy W. Chapman
16 | -  Glenn Dayton
17 | -  Danielle Mowery
18 | 
19 | Introduction
20 | ------------
21 | 
22 | pyConTextNLP is a partial implementation of the ConText algorithm using
23 | Python. The original description of pyConTextNLP was provided in Chapman
24 | BE, Lee S, Kang HP, Chapman WW, "Document-level classification of CT
25 | pulmonary angiography reports based on an extension of the ConText
26 | algorithm." `J Biomed Inform. 2011
27 | Oct;44(5):728-37 <http://www.sciencedirect.com/science/article/pii/S1532046411000621>`__
28 | 
29 | Other publications/presentations based on pyConText include: \* Wilson
30 | RA, et al. "Automated ancillary cancer history classification for
31 | mesothelioma patients from free-text clinical reports." J Pathol Inform.
32 | 2010 Oct 11;1:24. \* Chapman BE, Lee S, Kang HP, Chapman WW. "Using
33 | ConText to Identify Candidate Pulmonary Embolism Subjects Based on
34 | Dictated Radiology Reports." (Presented at AMIA Clinical Research
35 | Informatics Summit 2011) \* Wilson RA, Chapman WW, DeFries SJ, Becich
36 | MJ, Chapman BE. "Identifying History of Ancillary Cancers in
37 | Mesothelioma Patients from Free-Text Clinical Reports." (Presented at
38 | AMIA 2010).
39 | 
40 | Note: we changed the package name from pyConText to pyConTextNLP because
41 | of a name conflict on pypi.
42 | 
43 | Installation
44 | ------------
45 | 
46 | Latest Version
47 | ~~~~~~~~~~~~~~
48 | 
49 | The latest version of pyConTextNLP is available on [github](https://github.com/chapmanbe/pyConTextNLP). 
50 | The package can be installed by either cloning the repository and running `python setup.py install`. 
51 | Alternatively or by
52 | .. code:: shell
53 |     pip install git+https://github.com/chapmanbe/pyConTextNLP.git 
54 | 
55 | PyPi
56 | ~~~~~
57 | 
58 | pyConTextNLP is also available via the Python Package Index and can be installed via
59 | 
60 | .. code:: shell
61 | 
62 |     pip install pyConTextNLP
63 | 
64 | Dependencies include 
65 | 
66 | * networkx
67 | * PyYAML
68 | 
69 | 
70 | Tutorials
71 | ---------
72 | 
73 | See the `notebooks folder <./notebooks>`__ for a series of walkthroughs
74 | demonstrating pyConTextNLP core concepts with example data.
75 | 
76 | 
77 | How to Use
78 | ----------
79 | 
80 | I am working on improving the documentation and (hopefully) adding some
81 | testing to the code.
82 | 
83 | Some preliminary comments:
84 | 
85 | -  pyConTextNLP works marks up text on a sentence by sentence level.
86 | -  pyConTextNLP assumes the sentence is a string not a list of words
87 | - Our preferred way to represent knowledge is now with YAML files rather than TSV files.
88 | 
89 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 49 | 
 50 | .PHONY: clean
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | .PHONY: html
 55 | html:
 56 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 57 | 	@echo
 58 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 59 | 
 60 | .PHONY: dirhtml
 61 | dirhtml:
 62 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 63 | 	@echo
 64 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 65 | 
 66 | .PHONY: singlehtml
 67 | singlehtml:
 68 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 69 | 	@echo
 70 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 71 | 
 72 | .PHONY: pickle
 73 | pickle:
 74 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 75 | 	@echo
 76 | 	@echo "Build finished; now you can process the pickle files."
 77 | 
 78 | .PHONY: json
 79 | json:
 80 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 81 | 	@echo
 82 | 	@echo "Build finished; now you can process the JSON files."
 83 | 
 84 | .PHONY: htmlhelp
 85 | htmlhelp:
 86 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 89 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 90 | 
 91 | .PHONY: qthelp
 92 | qthelp:
 93 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 94 | 	@echo
 95 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 96 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 97 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pyConTextNLP.qhcp"
 98 | 	@echo "To view the help file:"
 99 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pyConTextNLP.qhc"
100 | 
101 | .PHONY: applehelp
102 | applehelp:
103 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | 	@echo
105 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | 	@echo "N.B. You won't be able to view it unless you put it in" \
107 | 	      "~/Library/Documentation/Help or install it in your application" \
108 | 	      "bundle."
109 | 
110 | .PHONY: devhelp
111 | devhelp:
112 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | 	@echo
114 | 	@echo "Build finished."
115 | 	@echo "To view the help file:"
116 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/pyConTextNLP"
117 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pyConTextNLP"
118 | 	@echo "# devhelp"
119 | 
120 | .PHONY: epub
121 | epub:
122 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | 	@echo
124 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 | 
126 | .PHONY: latex
127 | latex:
128 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | 	@echo
130 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | 	      "(use \`make latexpdf' here to do that automatically)."
133 | 
134 | .PHONY: latexpdf
135 | latexpdf:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo "Running LaTeX files through pdflatex..."
138 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 | 
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
145 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 | 
148 | .PHONY: text
149 | text:
150 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | 	@echo
152 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
153 | 
154 | .PHONY: man
155 | man:
156 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | 	@echo
158 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 | 
160 | .PHONY: texinfo
161 | texinfo:
162 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | 	@echo
164 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
166 | 	      "(use \`make info' here to do that automatically)."
167 | 
168 | .PHONY: info
169 | info:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo "Running Texinfo files through makeinfo..."
172 | 	make -C $(BUILDDIR)/texinfo info
173 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 | 
175 | .PHONY: gettext
176 | gettext:
177 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | 	@echo
179 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 | 
181 | .PHONY: changes
182 | changes:
183 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | 	@echo
185 | 	@echo "The overview file is in $(BUILDDIR)/changes."
186 | 
187 | .PHONY: linkcheck
188 | linkcheck:
189 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | 	@echo
191 | 	@echo "Link check complete; look for any errors in the above output " \
192 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
193 | 
194 | .PHONY: doctest
195 | doctest:
196 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | 	@echo "Testing of doctests in the sources finished, look at the " \
198 | 	      "results in $(BUILDDIR)/doctest/output.txt."
199 | 
200 | .PHONY: coverage
201 | coverage:
202 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | 	@echo "Testing of coverage in the sources finished, look at the " \
204 | 	      "results in $(BUILDDIR)/coverage/python.txt."
205 | 
206 | .PHONY: xml
207 | xml:
208 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | 	@echo
210 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 | 
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | 	@echo
216 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 | 


--------------------------------------------------------------------------------
/docs/source/bibliography.md:
--------------------------------------------------------------------------------
 1 | ## Publications Based on pyConTextNLP
 2 | 
 3 | * Chapman BE, Lee S, Kang HP, Chapman WW, "Document-level classification of CT pulmonary angiography reports based on an extension of the ConText algorithm." [J Biomed Inform. 2011 Oct;44(5):728-37](http://www.sciencedirect.com/science/article/pii/S1532046411000621)
 4 | * Wilson RA, et al. "Automated ancillary cancer history classification for mesothelioma patients from free-text clinical reports." J Pathol Inform. 2010 Oct 11;1:24.
 5 | * Chapman BE, Lee S, Kang HP, Chapman WW. Using ConText to Identify Candidate Pulmonary Embolism Subjects Based on Dictated Radiology Reports. (Presented at AMIA Clinical Research Informatics Summit 2011)
 6 | * Wilson RA, Chapman WW, DeFries SJ, Becich MJ, Chapman BE. Identifying History of Ancillary Cancers in Mesothelioma Patients from Free-Text Clinical Reports. (Presented at AMIA 2010).
 7 | * Chapman WW, Hillert D, Velupillai S, Kvist M, Skeppstedt M, Chapman BE, Conway M, Tharp M, Mowery DL, Deleger L (2013). Extending the NegEx Lexicon for Multiple Languages. Stud Health Technol Inform,192, 677-81
 8 | * Velupillai S, Skeppstedt M, Kvist M, Mowery D, Chapman BE, Dalianis H, and Chapman WW. Porting a Rule-based Assertion Classifier for Clinical Text from English to Swedish. The 4th International Louhi Workshop on Health Document Text Mining and Information Analysis (Louhi 2013), edited by Hanna Suominen.
 9 | * Chapman WW, Hilert D, Velupillai S, Kvist M, Skeppstedt M, Chapman BE, Conway M, Tharp M, Mowery DL, Deleger L. Extending the NegEx lexicon for multiple languages. (In press Proc Medinfo 2013)
10 | * Velupillai S, Skeppstedt M, Kvist M, Mowery D, Chapman BE, Dalianis H, Chapman WW (July 2014). Cue-based assertion classification for Swedish clinical text--developing a lexicon for pyConTextSwe. Artif Intell Med, 61(3), 137-144.
11 | * Mowery D, Chapman WW, Chapman BE, Conway MA, South BR, Madden E; Keyhani S. Extracting a Stroke Phenotype Risk Factor from Veteran Health Administration Clinical Reports: An Information Content Analysis. Journal of Biomedical Semantics (accepted)
12 | * Wilson RA, Chapman BE. Automated Capture of Pulmonary Embolism Spatial Location in Dictated Reports Using the ConText Algorithm. (Presented at RSNA 2011; poster)
13 | * Gentili A. Chapman BE. Use of pyConText to Classify Reports Containing Critical Results. (Presented at RSNA 2011; oral).
14 | * Gentili A. Chapman BE. Use of pyConText to Assist in Auditing for Chest Biopsy Complications. (Presented RSNA 2012)
15 | * Chapman BE, Wei W, Chapman WW. The Frequency of ConText Lexical Items in Diverse Medical Texts. (Presented IEEE HISB 2012, poster)
16 | * Gentili A, Chapman BE. Use of Natural Language Processing to Classify Radiology Reports Containing Description of the Abdominal Aorta. (Presented at RSNA 2013).
17 | * Chapman BE, Gentili A, Chen J, Miyakoshi A, Chapman W. Measuring Expressions of Uncertainty in Radiology Texts for Natural Language Processing Applications. (Presented at  RSNA 2013).
18 | * Chapman BE, Chen J, Miyakoshi A, Chapman WW, Gentili A. Measuring How Perceived meanings of Uncertainty Cues Differs with and Without Sentence-Level Context in Radiology Reports. (Presented at RSNA 2013).
19 | * Chapman BE, Heilbrun M. Lexical Disparities Between Reports Authored by Residents and Reports Authored by Attending Radiologists Using Natural Language Processing. (Presented at RSNA 2015).
20 | * Taggart M, Chapman WW, Steinberg BA, Ruckel S, Pregenzer-Wenzler A, Du Y, Ferraro F, Bucher BT, Lloyd-Jones DM, Rondina MT, Shah RU. Comparison of 2 Natural Language Processing Methods for Identification of Bleeding Among Critically Ill Patients [JAMA Netw Open. 2018;1(6):e183451](https://jamanetwork.com/journals/jamanetworkopen/fullarticle/2706498).
21 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # pyConTextNLP documentation build configuration file, created by
  5 | # sphinx-quickstart on Thu Mar 17 21:09:29 2016.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | #sys.path.insert(0, os.path.abspath('.'))
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #needs_sphinx = '1.0'
 28 | 
 29 | # Add any Sphinx extension module names here, as strings. They can be
 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 31 | # ones.
 32 | extensions = [
 33 |     'sphinx.ext.autodoc',
 34 |     'sphinx.ext.doctest',
 35 |     'sphinx.ext.mathjax',
 36 |     'sphinx.ext.viewcode',
 37 | ]
 38 | 
 39 | # Add any paths that contain templates here, relative to this directory.
 40 | templates_path = ['_templates']
 41 | 
 42 | # The suffix(es) of source filenames.
 43 | # You can specify multiple suffix as a list of string:
 44 | # source_suffix = ['.rst', '.md']
 45 | #source_suffix = '.rst'
 46 | from recommonmark.parser import CommonMarkParser
 47 | 
 48 | source_parsers = {
 49 |     '.md': CommonMarkParser,
 50 | }
 51 | 
 52 | source_suffix = ['.rst', '.md']
 53 | # The encoding of source files.
 54 | #source_encoding = 'utf-8-sig'
 55 | 
 56 | # The master toctree document.
 57 | master_doc = 'index'
 58 | 
 59 | # General information about the project.
 60 | project = 'pyConTextNLP'
 61 | copyright = '2016, Brian E. Chapman, Ph.D.'
 62 | author = 'Brian E. Chapman, Ph.D.'
 63 | 
 64 | # The version info for the project you're documenting, acts as replacement for
 65 | # |version| and |release|, also used in various other places throughout the
 66 | # built documents.
 67 | #
 68 | # The short X.Y version.
 69 | version = '0.6.0.9'
 70 | # The full version, including alpha/beta/rc tags.
 71 | release = '0.6.0.9'
 72 | 
 73 | # The language for content autogenerated by Sphinx. Refer to documentation
 74 | # for a list of supported languages.
 75 | #
 76 | # This is also used if you do content translation via gettext catalogs.
 77 | # Usually you set "language" from the command line for these cases.
 78 | language = None
 79 | 
 80 | # There are two options for replacing |today|: either, you set today to some
 81 | # non-false value, then it is used:
 82 | #today = ''
 83 | # Else, today_fmt is used as the format for a strftime call.
 84 | #today_fmt = '%B %d, %Y'
 85 | 
 86 | # List of patterns, relative to source directory, that match files and
 87 | # directories to ignore when looking for source files.
 88 | exclude_patterns = []
 89 | 
 90 | # The reST default role (used for this markup: `text`) to use for all
 91 | # documents.
 92 | #default_role = None
 93 | 
 94 | # If true, '()' will be appended to :func: etc. cross-reference text.
 95 | #add_function_parentheses = True
 96 | 
 97 | # If true, the current module name will be prepended to all description
 98 | # unit titles (such as .. function::).
 99 | #add_module_names = True
100 | 
101 | # If true, sectionauthor and moduleauthor directives will be shown in the
102 | # output. They are ignored by default.
103 | #show_authors = False
104 | 
105 | # The name of the Pygments (syntax highlighting) style to use.
106 | pygments_style = 'sphinx'
107 | 
108 | # A list of ignored prefixes for module index sorting.
109 | #modindex_common_prefix = []
110 | 
111 | # If true, keep warnings as "system message" paragraphs in the built documents.
112 | #keep_warnings = False
113 | 
114 | # If true, `todo` and `todoList` produce output, else they produce nothing.
115 | todo_include_todos = False
116 | 
117 | 
118 | # -- Options for HTML output ----------------------------------------------
119 | 
120 | # The theme to use for HTML and HTML Help pages.  See the documentation for
121 | # a list of builtin themes.
122 | html_theme = 'alabaster'
123 | 
124 | # Theme options are theme-specific and customize the look and feel of a theme
125 | # further.  For a list of options available for each theme, see the
126 | # documentation.
127 | #html_theme_options = {}
128 | 
129 | # Add any paths that contain custom themes here, relative to this directory.
130 | #html_theme_path = []
131 | 
132 | # The name for this set of Sphinx documents.  If None, it defaults to
133 | # "<project> v<release> documentation".
134 | #html_title = None
135 | 
136 | # A shorter title for the navigation bar.  Default is the same as html_title.
137 | #html_short_title = None
138 | 
139 | # The name of an image file (relative to this directory) to place at the top
140 | # of the sidebar.
141 | #html_logo = None
142 | 
143 | # The name of an image file (within the static path) to use as favicon of the
144 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
145 | # pixels large.
146 | #html_favicon = None
147 | 
148 | # Add any paths that contain custom static files (such as style sheets) here,
149 | # relative to this directory. They are copied after the builtin static files,
150 | # so a file named "default.css" will overwrite the builtin "default.css".
151 | html_static_path = ['_static']
152 | 
153 | # Add any extra paths that contain custom files (such as robots.txt or
154 | # .htaccess) here, relative to this directory. These files are copied
155 | # directly to the root of the documentation.
156 | #html_extra_path = []
157 | 
158 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
159 | # using the given strftime format.
160 | #html_last_updated_fmt = '%b %d, %Y'
161 | 
162 | # If true, SmartyPants will be used to convert quotes and dashes to
163 | # typographically correct entities.
164 | #html_use_smartypants = True
165 | 
166 | # Custom sidebar templates, maps document names to template names.
167 | #html_sidebars = {}
168 | 
169 | # Additional templates that should be rendered to pages, maps page names to
170 | # template names.
171 | #html_additional_pages = {}
172 | 
173 | # If false, no module index is generated.
174 | #html_domain_indices = True
175 | 
176 | # If false, no index is generated.
177 | #html_use_index = True
178 | 
179 | # If true, the index is split into individual pages for each letter.
180 | #html_split_index = False
181 | 
182 | # If true, links to the reST sources are added to the pages.
183 | #html_show_sourcelink = True
184 | 
185 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
186 | #html_show_sphinx = True
187 | 
188 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
189 | #html_show_copyright = True
190 | 
191 | # If true, an OpenSearch description file will be output, and all pages will
192 | # contain a <link> tag referring to it.  The value of this option must be the
193 | # base URL from which the finished HTML is served.
194 | #html_use_opensearch = ''
195 | 
196 | # This is the file name suffix for HTML files (e.g. ".xhtml").
197 | #html_file_suffix = None
198 | 
199 | # Language to be used for generating the HTML full-text search index.
200 | # Sphinx supports the following languages:
201 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
202 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
203 | #html_search_language = 'en'
204 | 
205 | # A dictionary with options for the search language support, empty by default.
206 | # Now only 'ja' uses this config value
207 | #html_search_options = {'type': 'default'}
208 | 
209 | # The name of a javascript file (relative to the configuration directory) that
210 | # implements a search results scorer. If empty, the default will be used.
211 | #html_search_scorer = 'scorer.js'
212 | 
213 | # Output file base name for HTML help builder.
214 | htmlhelp_basename = 'pyConTextNLPdoc'
215 | 
216 | # -- Options for LaTeX output ---------------------------------------------
217 | 
218 | latex_elements = {
219 | # The paper size ('letterpaper' or 'a4paper').
220 | #'papersize': 'letterpaper',
221 | 
222 | # The font size ('10pt', '11pt' or '12pt').
223 | #'pointsize': '10pt',
224 | 
225 | # Additional stuff for the LaTeX preamble.
226 | #'preamble': '',
227 | 
228 | # Latex figure (float) alignment
229 | #'figure_align': 'htbp',
230 | }
231 | 
232 | # Grouping the document tree into LaTeX files. List of tuples
233 | # (source start file, target name, title,
234 | #  author, documentclass [howto, manual, or own class]).
235 | latex_documents = [
236 |     (master_doc, 'pyConTextNLP.tex', 'pyConTextNLP Documentation',
237 |      'Brian E. Chapman, Ph.D.', 'manual'),
238 | ]
239 | 
240 | # The name of an image file (relative to this directory) to place at the top of
241 | # the title page.
242 | #latex_logo = None
243 | 
244 | # For "manual" documents, if this is true, then toplevel headings are parts,
245 | # not chapters.
246 | #latex_use_parts = False
247 | 
248 | # If true, show page references after internal links.
249 | #latex_show_pagerefs = False
250 | 
251 | # If true, show URL addresses after external links.
252 | #latex_show_urls = False
253 | 
254 | # Documents to append as an appendix to all manuals.
255 | #latex_appendices = []
256 | 
257 | # If false, no module index is generated.
258 | #latex_domain_indices = True
259 | 
260 | 
261 | # -- Options for manual page output ---------------------------------------
262 | 
263 | # One entry per manual page. List of tuples
264 | # (source start file, name, description, authors, manual section).
265 | man_pages = [
266 |     (master_doc, 'pycontextnlp', 'pyConTextNLP Documentation',
267 |      [author], 1)
268 | ]
269 | 
270 | # If true, show URL addresses after external links.
271 | #man_show_urls = False
272 | 
273 | 
274 | # -- Options for Texinfo output -------------------------------------------
275 | 
276 | # Grouping the document tree into Texinfo files. List of tuples
277 | # (source start file, target name, title, author,
278 | #  dir menu entry, description, category)
279 | texinfo_documents = [
280 |     (master_doc, 'pyConTextNLP', 'pyConTextNLP Documentation',
281 |      author, 'pyConTextNLP', 'One line description of project.',
282 |      'Miscellaneous'),
283 | ]
284 | 
285 | # Documents to append as an appendix to all manuals.
286 | #texinfo_appendices = []
287 | 
288 | # If false, no module index is generated.
289 | #texinfo_domain_indices = True
290 | 
291 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
292 | #texinfo_show_urls = 'footnote'
293 | 
294 | # If true, do not generate a @detailmenu in the "Top" node's menu.
295 | #texinfo_no_detailmenu = False
296 | 


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | # pyConTextNLP
 2 | ## Python ConText implementation for NLP
 3 | 
 4 | ## What is pyConTextNLP?
 5 | 
 6 | pyConTextNLP is a partial implementation of the ConText algorithm using Python. The original description of  pyConTextNLP was provided in Chapman BE, Lee S, Kang HP, Chapman WW, "Document-level classification of CT pulmonary angiography reports based on an extension of the ConText algorithm." [J Biomed Inform. 2011 Oct;44(5):728-37](http://www.sciencedirect.com/science/article/pii/S1532046411000621)
 7 | 
 8 | Since that publication pyConTextNLP has undergone several important revisions:
 9 | 
10 | 1. Incorporating NetworkX to describe target/modifier relationships.
11 | 1. Porting from Python 2.x to Python 3.x
12 |     * This is a work in progress. pyConTextNLP does not have a clean transition for handling unicode in Python 2.x in my attempts to port to 3.x
13 | 1. Rewriting pyConTextNLP to have a more functional style.
14 |     * This has been motivated by both the need to incorporate parallel processing into the algorithm for speed and to reduce unintended side effects.
15 |     * This work currently lies in the subpackage ``functional``.
16 | 
17 | 
18 | 
19 | 
20 | ## Dependencies
21 | * [NetworkX](https://pypi.python.org/pypi/networkx/) for relating ConText relationships.
22 | * [TextBlob](https://pypi.python.org/pypi/textblob) for sentence splitting.
23 | * [nose](https://pypi.python.org/pypi/nose/) for unit testing.
24 | 
25 | ## Installation
26 | 
27 | pyConTextNLP is hosted on [GitHub](https://github.com/chapmanbe/pyConTextNLP) and is index in pypi so can be installed with pip:
28 | 
29 | ``pip install pyConTextNLP``
30 | 
31 | ## [Bibliography](./bibliography.md)
32 | 


--------------------------------------------------------------------------------
/notebooks/BasicSentenceMarkup.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Demonstration of Basic Sentence Markup with pyConTextNLP"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "pyConTextNLP uses NetworkX directional graphs to represent the markup: nodes in the graph will be the concepts that are identified in the sentence and edges in the graph will be the relationships between those concepts. "
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import pyConTextNLP.pyConText as pyConText\n",
 24 |     "import pyConTextNLP.itemData as itemData\n",
 25 |     "import networkx as nx"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "* ``pyConText`` contains the bulk of the pyConTextNLP functionality, including basic class definitions such as the ``ConTextMarkup`` class that represents the markup of a sentence.\n",
 33 |     "* ``itemData`` contains a class definition for an itemData and functions for reading itemData definitions which are assumed to be in a tab seperated file that is specified as either a local file or a remote resource. In this example we will read definitions straight from the GitHub repository.\n",
 34 |     "    * An ``itemData`` in its most basic form is a four-tuple consisting of \n",
 35 |     "        1. A **literal** (e.g. \"pulmonary embolism\", \"no definite evidence of\")\n",
 36 |     "        1. A **category** (e.g. \"CRITICAL_FINDING\", \"PROBABLE_EXISTENCE\")\n",
 37 |     "        1. A **regular expression** that defines how to identify the literal concept. If no regular expression is specified, a regular expression will be built directly from the literal by wrapping it with word boundaries (e.g. r\"\"\"\\bpulmonary embolism\\b\"\"\")\n",
 38 |     "        1. A **rule** that defines how the concept works in the sentence (e.g. a negation term that looks **forward** in the sentence)."
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "### Sentences\n",
 46 |     "\n",
 47 |     "These example reports are taken from (with modification) the [MIMIC2 demo data set](https://physionet.org/mimic2/) that is a publically available database of de-identified medical records for deceased individuals. "
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "reports = [\n",
 57 |     "    \"\"\"IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of\n",
 58 |     "      bowel obstruction or mass identified within the abdomen or pelvis. Non-specific interstitial opacities and bronchiectasis seen at the right\n",
 59 |     "     base, suggestive of post-inflammatory changes.\"\"\",\n",
 60 |     "    \"\"\"IMPRESSION: Evidence of early pulmonary vascular congestion and interstitial edema. Probable scarring at the medial aspect of the right lung base, with no\n",
 61 |     "     definite consolidation.\"\"\"\n",
 62 |     "    ,\n",
 63 |     "    \"\"\"IMPRESSION:\n",
 64 |     "     \n",
 65 |     "     1.  2.0 cm cyst of the right renal lower pole.  Otherwise, normal appearance\n",
 66 |     "     of the right kidney with patent vasculature and no sonographic evidence of\n",
 67 |     "     renal artery stenosis.\n",
 68 |     "     2.  Surgically absent left kidney.\"\"\",\n",
 69 |     "    \"\"\"IMPRESSION:  No pneumothorax.\"\"\",\n",
 70 |     "    \"\"\"IMPRESSION: No definite pneumothorax\"\"\"\n",
 71 |     "    \"\"\"IMPRESSION:  New opacity at the left lower lobe consistent with pneumonia.\"\"\"\n",
 72 |     "]"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "### Read the ``itemData`` definitions\n",
 80 |     "\n",
 81 |     "We're reading directly from GitHub. You could read from a local file using a `file://` URL."
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "modifiers = itemData.get_items(\n",
 91 |     "    \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.yml\")\n",
 92 |     "targets = itemData.get_items(\n",
 93 |     "    \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.yml\")\n"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "### Example function to analyze each sentence\n",
101 |     "\n",
102 |     "This the function we'll use for each report. The following section of this document steps through each line."
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "def markup_sentence(s, modifiers, targets, prune_inactive=True):\n",
112 |     "    \"\"\"\n",
113 |     "    \"\"\"\n",
114 |     "    markup = pyConText.ConTextMarkup()\n",
115 |     "    markup.setRawText(s)\n",
116 |     "    markup.cleanText()\n",
117 |     "    markup.markItems(modifiers, mode=\"modifier\")\n",
118 |     "    markup.markItems(targets, mode=\"target\")\n",
119 |     "    markup.pruneMarks()\n",
120 |     "    markup.dropMarks('Exclusion')\n",
121 |     "    # apply modifiers to any targets within the modifiers scope\n",
122 |     "    markup.applyModifiers()\n",
123 |     "    markup.pruneSelfModifyingRelationships()\n",
124 |     "    if prune_inactive:\n",
125 |     "        markup.dropInactiveModifiers()\n",
126 |     "    return markup"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "### We're going to start with our simplest of sentences"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "reports[3]"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "### marking up a sentence\n",
150 |     "\n",
151 |     "We start by creating an instance of the ``ConTextMarkup`` class. This is a subclass of a NetworkX DiGraph. Information will be stored in the nodes and edges. "
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "markup = pyConText.ConTextMarkup()"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "isinstance(markup,nx.DiGraph)"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "#### Set the text to be processed"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "markup.setRawText(reports[3].lower())\n",
186 |     "print(markup)\n",
187 |     "print(len(markup.getRawText()))\n"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "#### Clean the text\n",
195 |     "\n",
196 |     "Prior to processing we do some basic cleaning of the text, sucha s replacing multiple white spaces with a single space. You'll notice this in the spacing between the colon and \"no\" in the ``raw`` and ``clean`` versions of the text."
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "markup.cleanText()\n",
206 |     "print(markup)\n",
207 |     "print(len(markup.getText()))"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {},
213 |    "source": [
214 |     "#### Identify concepts in the sentence\n",
215 |     "\n",
216 |     "The ``markItems`` method takes a list of itemData and uses the regular expressions to identify any instances of the itemData in the sentence. With the ``mode`` keyword we specify whether these ``itemData`` are targets or modifiers. This value will be stored as a data attribute of the node that is created in the graph for any identified concepts."
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "[(<id> 193409165385789347928701545101362172235 </id> <phrase> no </phrase> <category> ['definite_negated_existence'] </category> , {'category': 'modifier'})]\n",
229 |       "<class 'pyConTextNLP.tagObject.tagObject'>\n"
230 |      ]
231 |     }
232 |    ],
233 |    "source": [
234 |     "markup.markItems(modifiers, mode=\"modifier\")\n",
235 |     "print(markup.nodes(data=True))\n",
236 |     "print(type(list(markup.nodes())[0]))"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "metadata": {},
242 |    "source": [
243 |     "#### What does our initial markup look like?\n",
244 |     "\n",
245 |     "* We've identified one concept in the sentence: ``no``\n",
246 |     "* We've created a ``tagObject`` for this concept which keeps track of the actual phrase identified by the regular expression, what the category of the itemData was (``definite_negated_existence``), this is a list because there can be multiple categories. There is also an absurdly long identifier for the node. Note that our mode ``modifier`` has been stored as a data element of the node. In NetworkX each node (or edge) has a dictionary for data."
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {},
252 |    "source": [
253 |     "#### Now let's markup the targets"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {},
260 |    "outputs": [],
261 |    "source": [
262 |     "markup.markItems(targets, mode=\"target\")"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": null,
268 |    "metadata": {},
269 |    "outputs": [
270 |     {
271 |      "name": "stdout",
272 |      "output_type": "stream",
273 |      "text": [
274 |       "(<id> 193409165385789347928701545101362172235 </id> <phrase> no </phrase> <category> ['definite_negated_existence'] </category> , {'category': 'modifier'})\n",
275 |       "(<id> 199069516875446444699146669155103248715 </id> <phrase> pneumothorax </phrase> <category> ['pneumothorax'] </category> , {'category': 'target'})\n"
276 |      ]
277 |     }
278 |    ],
279 |    "source": [
280 |     "for node in markup.nodes(data=True):\n",
281 |     "    print(node)"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "#### What does our markup look like now?\n",
289 |     "\n",
290 |     "We've added another node to the graph. This time the ``target`` ``pneumothorax``."
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "#### Prune Marks\n",
298 |     "\n",
299 |     "After identifying concepts, we prune concepts that are a subset of another identified concept. This results in no changes here, but the importance will be shown later with a different sentence."
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {},
306 |    "outputs": [],
307 |    "source": [
308 |     "markup.pruneMarks()\n",
309 |     "for node in markup.nodes(data=True):\n",
310 |     "    print(node)"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "markdown",
315 |    "metadata": {},
316 |    "source": [
317 |     "#### Are there any relationships in our markup?\n",
318 |     "\n",
319 |     "We do not yet have any relationships (edges) between our concepts (target and modifier edges)"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": null,
325 |    "metadata": {},
326 |    "outputs": [],
327 |    "source": [
328 |     "print(markup.edges())"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "metadata": {},
334 |    "source": [
335 |     "#### Apply modifiers\n",
336 |     "\n",
337 |     "We now call the ``applyModifiers`` method of the ConTextMarkup object to identify any relationships between the nodes."
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": null,
343 |    "metadata": {},
344 |    "outputs": [],
345 |    "source": [
346 |     "markup.applyModifiers()\n",
347 |     "for edge in markup.edges():\n",
348 |     "    print(edge)"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "markdown",
353 |    "metadata": {},
354 |    "source": [
355 |     "#### We now have a relationship!\n",
356 |     "\n",
357 |     "We now have a directed edge between our ``no`` node and our ``pneumothorax`` node. This will be interepreted as ``pneumothorax`` being a definitely negated concept in the sentence."
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "markdown",
362 |    "metadata": {},
363 |    "source": [
364 |     "## What's next?\n",
365 |     "\n",
366 |     "The value of pruning is shown in [this](./BasicSentencemarkupPart2.ipynb) notebook."
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": null,
372 |    "metadata": {},
373 |    "outputs": [],
374 |    "source": []
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": null,
379 |    "metadata": {},
380 |    "outputs": [],
381 |    "source": []
382 |   }
383 |  ],
384 |  "metadata": {
385 |   "kernelspec": {
386 |    "display_name": "Python 3",
387 |    "language": "python",
388 |    "name": "python3"
389 |   },
390 |   "language_info": {
391 |    "codemirror_mode": {
392 |     "name": "ipython",
393 |     "version": 3
394 |    },
395 |    "file_extension": ".py",
396 |    "mimetype": "text/x-python",
397 |    "name": "python",
398 |    "nbconvert_exporter": "python",
399 |    "pygments_lexer": "ipython3",
400 |    "version": "3.5.23.6.8"
401 |   }
402 |  },
403 |  "nbformat": 4,
404 |  "nbformat_minor": 1
405 | }
406 | 


--------------------------------------------------------------------------------
/notebooks/BasicSentenceMarkupPart2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Demonstration of Basic Sentence Markup with pyConTextNLP, Part 2.\n",
  8 |     "## An ever-so-slightly more complex sentence\n",
  9 |     "\n",
 10 |     "### Let's use a slightly more complex sentence that will illustrate pruning."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import pyConTextNLP.pyConText as pyConText\n",
 20 |     "import pyConTextNLP.itemData as itemData\n",
 21 |     "import networkx as nx"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "### Sentences\n",
 29 |     "\n",
 30 |     "These example reports are taken from (with modification) the [MIMIC2 demo data set](https://physionet.org/mimic2/) that is a publically available database of de-identified medical records for deceased individuals. "
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "reports = [\n",
 40 |     "    \"\"\"IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of\n",
 41 |     "      bowel obstruction or mass identified within the abdomen or pelvis. Non-specific interstitial opacities and bronchiectasis seen at the right\n",
 42 |     "     base, suggestive of post-inflammatory changes.\"\"\",\n",
 43 |     "    \"\"\"IMPRESSION: Evidence of early pulmonary vascular congestion and interstitial edema. Probable scarring at the medial aspect of the right lung base, with no\n",
 44 |     "     definite consolidation.\"\"\"\n",
 45 |     "    ,\n",
 46 |     "    \"\"\"IMPRESSION:\n",
 47 |     "     \n",
 48 |     "     1.  2.0 cm cyst of the right renal lower pole.  Otherwise, normal appearance\n",
 49 |     "     of the right kidney with patent vasculature and no sonographic evidence of\n",
 50 |     "     renal artery stenosis.\n",
 51 |     "     2.  Surgically absent left kidney.\"\"\",\n",
 52 |     "    \"\"\"IMPRESSION:  No pneumothorax.\"\"\",\n",
 53 |     "    \"\"\"IMPRESSION: No definite pneumothorax\"\"\",\n",
 54 |     "    \"\"\"IMPRESSION:  New opacity at the left lower lobe consistent with pneumonia.\"\"\"\n",
 55 |     "]"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "### Read the ``itemData`` definitions"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "modifiers = itemData.get_items(\n",
 72 |     "    \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.yml\")\n",
 73 |     "targets = itemData.get_items(\n",
 74 |     "    \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.yml\")\n"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "### We're going to start with our simplest of sentences"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "reports[4]"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "### marking up a sentence\n",
 98 |     "\n",
 99 |     "We start by creating an instance of the ``ConTextMarkup`` class. This is a subclass of a NetworkX DiGraph. Information will be stored in the nodes and edges. "
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "markup = pyConText.ConTextMarkup()"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "markup.setRawText(reports[4].lower())\n",
118 |     "print(markup)\n",
119 |     "print(len(markup.getRawText()))\n",
120 |     "\n",
121 |     "markup.cleanText()\n",
122 |     "print(markup)\n",
123 |     "print(len(markup.getText()))"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "#### Identify concepts in the sentence\n"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "name": "stdout",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "(<id> 256833253737566050220546835725615337803 </id> <phrase> no </phrase> <category> ['definite_negated_existence'] </category> , {'category': 'modifier'})\n",
143 |       "(<id> 256833892316555915191107839689855045963 </id> <phrase> no definite </phrase> <category> ['definite_negated_existence'] </category> , {'category': 'modifier'})\n",
144 |       "(<id> 256826997881853923908450449495296807243 </id> <phrase> definite </phrase> <category> ['definite_existence'] </category> , {'category': 'modifier'})\n",
145 |       "(<id> 256849716557454889207255398223055655243 </id> <phrase> pneumothorax </phrase> <category> ['pneumothorax'] </category> , {'category': 'target'})\n"
146 |      ]
147 |     }
148 |    ],
149 |    "source": [
150 |     "markup.markItems(modifiers, mode=\"modifier\")\n",
151 |     "markup.markItems(targets, mode=\"target\")\n",
152 |     "for node in markup.nodes(data=True):\n",
153 |     "    print(node)\n"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "#### What does our initial markup look like?\n",
161 |     "\n",
162 |     "* We've identified three concepts in the sentence: \n",
163 |     "    1. \"no\"\n",
164 |     "    1. \"no definite\"\n",
165 |     "    1. \"pneumothorax\"\n",
166 |     "* Here \"no\" is not a true concept in the sentence; it is a subset of the concept \"no definite\""
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "#### Prune Marks\n",
174 |     "\n",
175 |     "After identifying concepts, we prune concepts that are a subset of another identified concept."
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "markup.pruneMarks()\n",
185 |     "for node in markup.nodes(data=True):\n",
186 |     "    print(node)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "#### What is the effect of ``pruneMarks``\n",
194 |     "\n",
195 |     "We've correctly dropped ``no`` as an identified concept."
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {},
201 |    "source": [
202 |     "#### Apply modifiers\n",
203 |     "\n",
204 |     "We now call the ``applyModifiers`` method of the ConTextMarkup object to identify any relationships between the nodes."
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {},
211 |    "outputs": [],
212 |    "source": [
213 |     "markup.applyModifiers()\n",
214 |     "for edge in markup.edges():\n",
215 |     "    print(edge)"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "### Here is a notebook for [Multisentence Documents](./MultiSentenceDocuments.ipynb)"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": []
231 |   }
232 |  ],
233 |  "metadata": {
234 |   "kernelspec": {
235 |    "display_name": "Python 3",
236 |    "language": "python",
237 |    "name": "python3"
238 |   },
239 |   "language_info": {
240 |    "codemirror_mode": {
241 |     "name": "ipython",
242 |     "version": 3
243 |    },
244 |    "file_extension": ".py",
245 |    "mimetype": "text/x-python",
246 |    "name": "python",
247 |    "nbconvert_exporter": "python",
248 |    "pygments_lexer": "ipython3",
249 |    "version": "3.7.2"
250 |   }
251 |  },
252 |  "nbformat": 4,
253 |  "nbformat_minor": 1
254 | }
255 | 


--------------------------------------------------------------------------------
/notebooks/MultiSentenceDocuments.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Processing Multisentence Documents\n",
  8 |     "\n",
  9 |     "This notebook uses [TextBlob](https://pypi.org/project/textblob/) to do sentence splitting. If you do not have TextBlob installed, you can install it by uncomment the line in the cell below and executing that cell. "
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "#!pip install textblob"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import pyConTextNLP.pyConText as pyConText\n",
 28 |     "import pyConTextNLP.itemData as itemData\n",
 29 |     "from textblob import TextBlob\n",
 30 |     "import networkx as nx\n",
 31 |     "import pyConTextNLP.display.html as html\n",
 32 |     "from IPython.display import display, HTML"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "reports = [\n",
 42 |     "    \"\"\"IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of\n",
 43 |     "      bowel obstruction or mass identified within the abdomen or pelvis. Non-specific interstitial opacities and bronchiectasis seen at the right\n",
 44 |     "     base, suggestive of post-inflammatory changes.\"\"\",\n",
 45 |     "    \"\"\"IMPRESSION: Evidence of early pulmonary vascular congestion and interstitial edema. Probable scarring at the medial aspect of the right lung base, with no\n",
 46 |     "     definite consolidation.\"\"\"\n",
 47 |     "    ,\n",
 48 |     "    \"\"\"IMPRESSION:\n",
 49 |     "     \n",
 50 |     "     1.  2.0 cm cyst of the right renal lower pole.  Otherwise, normal appearance\n",
 51 |     "     of the right kidney with patent vasculature and no sonographic evidence of\n",
 52 |     "     renal artery stenosis.\n",
 53 |     "     2.  Surgically absent left kidney.\"\"\",\n",
 54 |     "    \"\"\"IMPRESSION:  No pneumothorax.\"\"\",\n",
 55 |     "    \"\"\"IMPRESSION: No definite pneumothorax\"\"\"\n",
 56 |     "    \"\"\"IMPRESSION:  New opacity at the left lower lobe consistent with pneumonia.\"\"\"\n",
 57 |     "]"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "modifiers = itemData.get_items(\n",
 67 |     "    \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.yml\")\n",
 68 |     "targets = itemData.get_items(\n",
 69 |     "    \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.yml\")\n"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "## Define ``markup_sentence``\n",
 77 |     "\n",
 78 |     "We are putting the functionality we went through in the previous two notebooks ([BasicSentenceMarkup](./BasicSentenceMarkup.ipynb) and [BasicSentenceMarkupPart2](BasicSentenceMarkupPart2.ipynb)) into a function ``markup_sentence``. We add one step to the function: ``dropInactiveModifiers`` will delete any modifier node that does not get attached to a target node."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "def markup_sentence(s, modifiers, targets, prune_inactive=True):\n",
 88 |     "    \"\"\"\n",
 89 |     "    \"\"\"\n",
 90 |     "    markup = pyConText.ConTextMarkup()\n",
 91 |     "    markup.setRawText(s)\n",
 92 |     "    markup.cleanText()\n",
 93 |     "    markup.markItems(modifiers, mode=\"modifier\")\n",
 94 |     "    markup.markItems(targets, mode=\"target\")\n",
 95 |     "    markup.pruneMarks()\n",
 96 |     "    markup.dropMarks('Exclusion')\n",
 97 |     "    # apply modifiers to any targets within the modifiers scope\n",
 98 |     "    markup.applyModifiers()\n",
 99 |     "    markup.pruneSelfModifyingRelationships()\n",
100 |     "    if prune_inactive:\n",
101 |     "        markup.dropInactiveModifiers()\n",
102 |     "    return markup"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "report = reports[0]\n",
112 |     "print(report)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "#### Create a ``ConTextDocument``\n",
120 |     "\n",
121 |     "``ConTextDocument`` is a class for organizing the markup of multiple sentences. It has a private attribute that is NetworkX DiGraph that represents the document structure. In this exmaple we only use the ``ConTextDocument`` class to collect multiple sentence markups."
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "context = pyConText.ConTextDocument()"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "####  Split the document into sentences and process each sentence\n",
138 |     "\n",
139 |     "pyConTextNLP comes with a simple sentence splitter in ``helper.py``. I have not been maintaining this and have recently been using TextBlob to split sentences. A known problem with either sentence splitting solution is enumerated lists that don't use periods."
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "blob = TextBlob(report.lower())\n",
149 |     "count = 0\n",
150 |     "rslts = []\n",
151 |     "for s in blob.sentences:\n",
152 |     "    m = markup_sentence(s.raw, modifiers=modifiers, targets=targets)\n",
153 |     "    rslts.append(m)\n",
154 |     "\n",
155 |     "for r in rslts:\n",
156 |     "    context.addMarkup(r)"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "### Displaying pyConTextNLP Markups\n",
164 |     "\n",
165 |     "The ``display`` subpackage contains some functionality for visualizing the markups. Here I use HTML to color-code identified concepts."
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "clrs = {\\\n",
175 |     "    \"bowel_obstruction\": \"blue\",\n",
176 |     "    \"inflammation\": \"blue\",\n",
177 |     "    \"definite_negated_existence\": \"red\",\n",
178 |     "    \"probable_negated_existence\": \"indianred\",\n",
179 |     "    \"ambivalent_existence\": \"orange\",\n",
180 |     "    \"probable_existence\": \"forestgreen\",\n",
181 |     "    \"definite_existence\": \"green\",\n",
182 |     "    \"historical\": \"goldenrod\",\n",
183 |     "    \"indication\": \"pink\",\n",
184 |     "    \"acute\": \"golden\"\n",
185 |     "}"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "display(HTML(html.mark_document_with_html(context,colors = clrs, default_color=\"black\")))"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {},
200 |    "source": [
201 |     "### There is also a rich XML description of the ``ConTextDocument``"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "print(context.getXML())"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": []
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "metadata": {},
224 |    "outputs": [],
225 |    "source": []
226 |   }
227 |  ],
228 |  "metadata": {
229 |   "kernelspec": {
230 |    "display_name": "Python 3",
231 |    "language": "python",
232 |    "name": "python3"
233 |   },
234 |   "language_info": {
235 |    "codemirror_mode": {
236 |     "name": "ipython",
237 |     "version": 3
238 |    },
239 |    "file_extension": ".py",
240 |    "mimetype": "text/x-python",
241 |    "name": "python",
242 |    "nbconvert_exporter": "python",
243 |    "pygments_lexer": "ipython3",
244 |    "version": "3.7.2"
245 |   }
246 |  },
247 |  "nbformat": 4,
248 |  "nbformat_minor": 1
249 | }
250 | 


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | # Read in order
2 | 
3 | These notebooks walk through pyConTextNLP concepts in order of complexity. Recommended reading order is:
4 | 
5 | 1. [BasicSentenceMarkup](./BasicSentenceMarkup.ipynb)
6 | 2. [BasicSentenceMarkupPart2](./BasicSentenceMarkupPart2.ipynb)
7 | 3. [MultiSentenceDocuments](./MultiSentenceDocuments.ipynb)
8 | 


--------------------------------------------------------------------------------
/notebooks/cherrypy_pyConText.py:
--------------------------------------------------------------------------------
  1 | import pyConTextNLP.pyConTextGraph as pyConText
  2 | import pyConTextNLP.itemData as itemData
  3 | from textblob import TextBlob
  4 | import networkx as nx
  5 | import pyConTextNLP.display.html as html
  6 | import json
  7 | import cherrypy
  8 | 
  9 | 
 10 | class pyConTextNLP_REST(object):
 11 |     
 12 |     mod = itemData.instantiateFromCSVtoitemData(
 13 |     "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.tsv")
 14 |     tar = itemData.instantiateFromCSVtoitemData(
 15 |     "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.tsv")
 16 | 
 17 |     clrs ={\
 18 |     "bowel_obstruction": "blue",
 19 |     "inflammation": "blue",
 20 |     "definite_negated_existence": "red",
 21 |     "probable_negated_existence": "indianred",
 22 |     "ambivalent_existence": "orange",
 23 |     "probable_existence": "forestgreen",
 24 |     "definite_existence": "green",
 25 |     "historical": "goldenrod",
 26 |     "indication": "pink",
 27 |     "acute": "golden"
 28 |     } 
 29 | 
 30 |     @cherrypy.expose
 31 |     def index(self):
 32 |         return "Welcome to pyConTextNLP REST API. To start go to /markup_report."
 33 | 
 34 |     @cherrypy.expose
 35 |     def markup_report(self, report='''IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of
 36 |                                     bowel obstruction or mass identified within the abdomen or pelvis. 
 37 |                                     Non-specific interstitial opacities and bronchiectasis seen at the right
 38 |                                     base, suggestive of post-inflammatory changes.
 39 |                                     ''',
 40 |                                     modifiers=None,
 41 |                                     targets=None):
 42 |         print("type of modifiers",type(modifiers))
 43 |         print("len of modifiers",len(modifiers))
 44 |         print(modifiers)
 45 |         for m in modifiers:
 46 |             print(m)
 47 | 
 48 |         if modifiers==None:
 49 |             _modifiers = self.mod 
 50 |         else:
 51 |             _modifiers = itemData.itemData()
 52 |             _modifiers.extend(json.loads(modifiers))
 53 |         if targets==None:
 54 |             _targets=self.tar 
 55 |         else:
 56 |             _targets = itemData.itemData()
 57 |             _targets.extend(json.loads(targets))
 58 | 
 59 | 
 60 |         
 61 |         context = self.split_sentences(report, _modifiers, _targets) 
 62 |         clrs = self.get_colors_dict(_modifiers, _targets)
 63 |         return html.mark_document_with_html(context, colors=clrs)
 64 | 
 65 | 
 66 |     def markup_sentence(self, s, modifiers, targets, prune_inactive=True):
 67 |         """
 68 |         """
 69 | 
 70 | 
 71 | 
 72 |         markup = pyConText.ConTextMarkup()
 73 |         markup.setRawText(s)
 74 |         markup.cleanText()
 75 |         markup.markItems(modifiers, mode="modifier")
 76 |         markup.markItems(targets, mode="target")
 77 |         markup.pruneMarks()
 78 |         markup.dropMarks('Exclusion')
 79 |         # apply modifiers to any targets within the modifiers scope
 80 |         markup.applyModifiers()
 81 |         markup.pruneSelfModifyingRelationships()
 82 |         if prune_inactive:
 83 |             markup.dropInactiveModifiers()
 84 |         return markup
 85 | 
 86 |     def split_sentences(self, report, modifiers, targets):
 87 |         blob = TextBlob(report.lower())
 88 |         count = 0
 89 |         rslts = []
 90 |         for s in blob.sentences:
 91 |             m = self.markup_sentence(s.raw, modifiers, targets)
 92 |             rslts.append(m)
 93 |         
 94 |         context = pyConText.ConTextDocument()
 95 |         for r in rslts:
 96 |             context.addMarkup(r)
 97 | 
 98 |         return context
 99 | 
100 | 
101 |     def get_colors_dict(self, modifiers, targets):
102 |         # this method will basically assign blue to all targets 
103 |         # and then assigns a different color  for each modifier category         
104 |         #import colorsys
105 |         import randomcolor
106 |         colors = {}
107 |         rcol = randomcolor.RandomColor()
108 |         for t in targets:
109 |             colors[t.getCategory()[0]] = 'blue'
110 |         mm = set([c.getCategory()[0] for c in modifiers]) 
111 |         #HSV = [(x*1.0/len(mm), 0.5, 0.5) for x in range(len(mm))]
112 |         #RGB = map(lambda x: colorsys.hsv_to_rgb(*x), HSV)
113 |         #RGB = lambda: random.randint(0,255) 
114 |         #for m,rgb in zip(mm,RGB):
115 |         for m in mm:
116 |             colors[m] =  rcol.generate()[0]#"rgb{0}".format(rgb)#"rgb({0},{1},{2})".format(RGB(),RGB(),RGB())
117 | 
118 |         return colors
119 | 
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     cherrypy.config.update({'server.socket_port': 3030,})
124 |     cherrypy.quickstart(pyConTextNLP_REST())
125 | 


--------------------------------------------------------------------------------
/notebooks/functional/2/Reading_ConTextItems.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### This notebook generates how to generate ConTextItems"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import os\n",
 19 |     "import pyConTextNLP.functional.conTextItem as CI\n"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 3,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/plain": [
 30 |        "'/Users/brian/anaconda/envs/NLP/lib/python2.7/site-packages/pyConTextNLP-0.6.0.9-py2.7.egg/pyConTextNLP/functional/conTextItem.pyc'"
 31 |       ]
 32 |      },
 33 |      "execution_count": 3,
 34 |      "metadata": {},
 35 |      "output_type": "execute_result"
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "CI.__file__"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "### ConTextItems can be read from the web"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 2,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "literal<<are ruled out>>; category<<definite_negated_existence>>; re<<are ruled out>>; rule<<backward>>\n",
 59 |       "literal<<be ruled out>>; category<<indication>>; re<<be ruled out>>; rule<<backward>>\n",
 60 |       "literal<<being ruled out>>; category<<indication>>; re<<being ruled out>>; rule<<backward>>\n",
 61 |       "literal<<can be ruled out>>; category<<definite_negated_existence>>; re<<can be ruled out>>; rule<<backward>>\n",
 62 |       "literal<<cannot be excluded>>; category<<ambivalent_existence>>; re<<cannot be excluded>>; rule<<backward>>\n",
 63 |       "literal<<cannot totally be excluded>>; category<<probable_negated_existence>>; re<<cannot totally be excluded>>; rule<<backward>>\n",
 64 |       "literal<<could be ruled out>>; category<<definite_negated_existence>>; re<<could be ruled out>>; rule<<backward>>\n",
 65 |       "literal<<examination>>; category<<indication>>; re<<\\b(examination|exam|study)\\b>>; rule<<backward>>\n",
 66 |       "literal<<free>>; category<<definite_negated_existence>>; re<<free>>; rule<<backward>>\n",
 67 |       "literal<<has been ruled out>>; category<<definite_negated_existence>>; re<<has been ruled out>>; rule<<backward>>\n"
 68 |      ]
 69 |     }
 70 |    ],
 71 |    "source": [
 72 |     "kb = [\"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_04292013.tsv\", \n",
 73 |     "      \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/criticalfinder_generalized_modifiers.tsv\"]\n",
 74 |     "items = []\n",
 75 |     "for k in kb:\n",
 76 |     "    items.extend(CI.readConTextItems(k)[0])\n",
 77 |     "for i in items[0:10]:\n",
 78 |     "    print(i)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "### ConTextItems can also be read from local files"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 5,
 91 |    "metadata": {},
 92 |    "outputs": [
 93 |     {
 94 |      "name": "stdout",
 95 |      "output_type": "stream",
 96 |      "text": [
 97 |       "literal<<are ruled out>>; category<<definite_negated_existence>>; re<<are ruled out>>; rule<<backward>>\n",
 98 |       "literal<<be ruled out>>; category<<indication>>; re<<be ruled out>>; rule<<backward>>\n",
 99 |       "literal<<being ruled out>>; category<<indication>>; re<<being ruled out>>; rule<<backward>>\n",
100 |       "literal<<can be ruled out>>; category<<definite_negated_existence>>; re<<can be ruled out>>; rule<<backward>>\n",
101 |       "literal<<cannot be excluded>>; category<<ambivalent_existence>>; re<<cannot be excluded>>; rule<<backward>>\n",
102 |       "literal<<cannot totally be excluded>>; category<<probable_negated_existence>>; re<<cannot totally be excluded>>; rule<<backward>>\n",
103 |       "literal<<could be ruled out>>; category<<definite_negated_existence>>; re<<could be ruled out>>; rule<<backward>>\n",
104 |       "literal<<examination>>; category<<indication>>; re<<\\b(examination|exam|study)\\b>>; rule<<backward>>\n",
105 |       "literal<<free>>; category<<definite_negated_existence>>; re<<free>>; rule<<backward>>\n",
106 |       "literal<<has been ruled out>>; category<<definite_negated_existence>>; re<<has been ruled out>>; rule<<backward>>\n"
107 |      ]
108 |     }
109 |    ],
110 |    "source": [
111 |     "PCDIR = os.path.join(os.path.expanduser(\"~\"),\n",
112 |     "                        \"Documents\",\"NLP\",\"pyConTextNLP\")\n",
113 |     "kb_local = [\"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_04292013.tsv\",\n",
114 |     "            os.path.join(PCDIR,\"KB\",\"quality_artifacts.tsv\")]\n",
115 |     "items_local = []\n",
116 |     "for k in kb_local:\n",
117 |     "    items_local.extend(CI.readConTextItems(k)[0])\n",
118 |     "for i in items_local[0:10]:\n",
119 |     "    print(i)"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {
126 |     "collapsed": true
127 |    },
128 |    "outputs": [],
129 |    "source": []
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {
135 |     "collapsed": true
136 |    },
137 |    "outputs": [],
138 |    "source": []
139 |   }
140 |  ],
141 |  "metadata": {
142 |   "kernelspec": {
143 |    "display_name": "Python 3",
144 |    "language": "python",
145 |    "name": "python3"
146 |   },
147 |   "language_info": {
148 |    "codemirror_mode": {
149 |     "name": "ipython",
150 |     "version": 3
151 |    },
152 |    "file_extension": ".py",
153 |    "mimetype": "text/x-python",
154 |    "name": "python",
155 |    "nbconvert_exporter": "python",
156 |    "pygments_lexer": "ipython3",
157 |    "version": "3.5.4"
158 |   }
159 |  },
160 |  "nbformat": 4,
161 |  "nbformat_minor": 1
162 | }
163 | 


--------------------------------------------------------------------------------
/notebooks/functional/3/Reading_ConTextItems.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### This notebook generates how to generate ConTextItems"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import os\n",
 19 |     "import pyConTextNLP.functional.conTextItem as CI\n"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "### ConTextItems can be read from the web"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [
 34 |     {
 35 |      "name": "stdout",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "conTextItem(literal='are ruled out', category=('definite_negated_existence',), re='are ruled out', rule='backward')\n",
 39 |       "conTextItem(literal='be ruled out', category=('indication',), re='be ruled out', rule='backward')\n",
 40 |       "conTextItem(literal='being ruled out', category=('indication',), re='being ruled out', rule='backward')\n",
 41 |       "conTextItem(literal='can be ruled out', category=('definite_negated_existence',), re='can be ruled out', rule='backward')\n",
 42 |       "conTextItem(literal='cannot be excluded', category=('ambivalent_existence',), re='cannot be excluded', rule='backward')\n",
 43 |       "conTextItem(literal='cannot totally be excluded', category=('probable_negated_existence',), re='cannot totally be excluded', rule='backward')\n",
 44 |       "conTextItem(literal='could be ruled out', category=('definite_negated_existence',), re='could be ruled out', rule='backward')\n",
 45 |       "conTextItem(literal='examination', category=('indication',), re='\\\\b(examination|exam|study)\\\\b', rule='backward')\n",
 46 |       "conTextItem(literal='free', category=('definite_negated_existence',), re='free', rule='backward')\n",
 47 |       "conTextItem(literal='has been ruled out', category=('definite_negated_existence',), re='has been ruled out', rule='backward')\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "kb = [\"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_04292013.tsv\", \n",
 53 |     "      \"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/criticalfinder_generalized_modifiers.tsv\"]\n",
 54 |     "items = []\n",
 55 |     "for k in kb:\n",
 56 |     "    items.extend(CI.readConTextItems(k)[0])\n",
 57 |     "for i in items[0:10]:\n",
 58 |     "    print(i)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "### ConTextItems can also be read from local files"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 3,
 71 |    "metadata": {},
 72 |    "outputs": [
 73 |     {
 74 |      "name": "stdout",
 75 |      "output_type": "stream",
 76 |      "text": [
 77 |       "conTextItem(literal='are ruled out', category=('definite_negated_existence',), re='are ruled out', rule='backward')\n",
 78 |       "conTextItem(literal='be ruled out', category=('indication',), re='be ruled out', rule='backward')\n",
 79 |       "conTextItem(literal='being ruled out', category=('indication',), re='being ruled out', rule='backward')\n",
 80 |       "conTextItem(literal='can be ruled out', category=('definite_negated_existence',), re='can be ruled out', rule='backward')\n",
 81 |       "conTextItem(literal='cannot be excluded', category=('ambivalent_existence',), re='cannot be excluded', rule='backward')\n",
 82 |       "conTextItem(literal='cannot totally be excluded', category=('probable_negated_existence',), re='cannot totally be excluded', rule='backward')\n",
 83 |       "conTextItem(literal='could be ruled out', category=('definite_negated_existence',), re='could be ruled out', rule='backward')\n",
 84 |       "conTextItem(literal='examination', category=('indication',), re='\\\\b(examination|exam|study)\\\\b', rule='backward')\n",
 85 |       "conTextItem(literal='free', category=('definite_negated_existence',), re='free', rule='backward')\n",
 86 |       "conTextItem(literal='has been ruled out', category=('definite_negated_existence',), re='has been ruled out', rule='backward')\n"
 87 |      ]
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "PCDIR = os.path.join(os.path.expanduser(\"~\"),\n",
 92 |     "                        \"Documents\",\"NLP\",\"pyConTextNLP\")\n",
 93 |     "kb_local = [\"https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_04292013.tsv\",\n",
 94 |     "            os.path.join(PCDIR,\"KB\",\"quality_artifacts.tsv\")]\n",
 95 |     "items_local = []\n",
 96 |     "for k in kb_local:\n",
 97 |     "    items_local.extend(CI.readConTextItems(k)[0])\n",
 98 |     "for i in items_local[0:10]:\n",
 99 |     "    print(i)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {
106 |     "collapsed": true
107 |    },
108 |    "outputs": [],
109 |    "source": []
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {
115 |     "collapsed": true
116 |    },
117 |    "outputs": [],
118 |    "source": []
119 |   }
120 |  ],
121 |  "metadata": {
122 |   "kernelspec": {
123 |    "display_name": "Python 3",
124 |    "language": "python",
125 |    "name": "python3"
126 |   },
127 |   "language_info": {
128 |    "codemirror_mode": {
129 |     "name": "ipython",
130 |     "version": 3
131 |    },
132 |    "file_extension": ".py",
133 |    "mimetype": "text/x-python",
134 |    "name": "python",
135 |    "nbconvert_exporter": "python",
136 |    "pygments_lexer": "ipython3",
137 |    "version": "3.5.4"
138 |   }
139 |  },
140 |  "nbformat": 4,
141 |  "nbformat_minor": 1
142 | }
143 | 


--------------------------------------------------------------------------------
/pyConTextNLP/ConTextMarkup.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module defining ConTextMarkup class
  3 | """
  4 | import re
  5 | import uuid
  6 | from . io.xml import xmlScrub
  7 | from . tagObject import tagObject
  8 | 
  9 | import networkx as nx
 10 | 
 11 | REG_CLEAN1 = re.compile(r"""\W""", re.UNICODE)
 12 | REG_CLEAN2 = re.compile(r"""\s+""", re.UNICODE)
 13 | REG_CLEAN3 = re.compile(r"""\d""", re.UNICODE)
 14 | 
 15 | COMPILED_REGEXPRS = {}
 16 | 
 17 | NODE_XML_SKEL = \
 18 | """
 19 | <node>
 20 |     {0}
 21 | </node>
 22 | """
 23 | 
 24 | EDGE_XML_SKEL = \
 25 | """
 26 | <edge>
 27 |     <startNode> {0} </startNode>
 28 |     <endNode> {1} </endNode>
 29 |     {2}
 30 | </edge>
 31 | """
 32 | 
 33 | CONTEXT_MARKUP_XML_SKEL = \
 34 | """
 35 | <ConTextMarkup>
 36 | <rawText> {0} </rawText>
 37 | <cleanText> {1} </cleanText>
 38 | <nodes>
 39 | {2}
 40 | </nodes>
 41 | <edges>
 42 | {3}
 43 | </edges>
 44 | </ConTextMarkup>
 45 | """
 46 | 
 47 | 
 48 | def create_tag_id():
 49 |     """
 50 |     get a unique identifier
 51 |     """
 52 |     return uuid.uuid1().int
 53 | 
 54 | 
 55 | 
 56 | class ConTextMarkup(nx.DiGraph):
 57 |     """
 58 |     base class for context document.
 59 |     build around markedTargets a list of termObjects representing desired terms
 60 |     found in text and markedModifiers, tagObjects found in the text
 61 |     """
 62 | 
 63 | 
 64 |     def __init__(self, txt='', unicodeEncoding='utf-8'):
 65 |         """txt is the string to parse"""
 66 |         # __document capture the document level structure
 67 |         # for each sentence and then put in the archives when the next sentence
 68 |         # is processed
 69 |         super(ConTextMarkup, self).__init__(__txt=None,
 70 |                                             __rawtxt=txt,
 71 |                                             __scope=None,
 72 |                                             __SCOPEUPDATED=False)
 73 |         self.__document = nx.DiGraph()
 74 |         self.__document.add_node("top", category="document")
 75 |         self.__VERBOSE = False
 76 |         self.__tagID = 0
 77 |         self.__unicodeEncoding = unicodeEncoding
 78 | 
 79 | 
 80 |     def getUnicodeEncoding(self):
 81 |         """
 82 |         return the unicode encoding used for the class
 83 |         """
 84 |         return self.__unicodeEncoding
 85 | 
 86 | 
 87 |     def toggleVerbose(self):
 88 |         """toggles the boolean value for verbose mode"""
 89 |         self.__VERBOSE = not self.__VERBOSE
 90 | 
 91 | 
 92 |     def getVerbose(self):
 93 |         """
 94 |         return the verbose setting
 95 |         """
 96 |         return self.__VERBOSE
 97 | 
 98 | 
 99 |     def setRawText(self, txt=''):
100 |         """
101 |         sets the current txt to txt and resets the current attributes to empty
102 |         values, but does not modify the object archive
103 |         """
104 |         if self.getVerbose():
105 |             print("Setting text to", txt)
106 |         self.graph["__rawTxt"] = txt
107 |         self.graph["__txt"] = None
108 |         self.graph["__scope"] = None
109 |         self.graph["__SCOPEUPDATED"] = False
110 | 
111 | 
112 |     def getText(self):
113 |         """
114 |         return the cleaned text values
115 |         """
116 |         return self.graph.get("__txt", '')
117 | 
118 | 
119 |     def getScope(self):
120 |         """
121 |         return the scope of the markup
122 |         """
123 |         return self.graph.get("__scope", '')
124 | 
125 | 
126 |     def getScopeUpdated(self):
127 |         """
128 |         return boolean whether the scope has been updated
129 |         """
130 |         return self.graph.get("__SCOPEUPDATED")
131 | 
132 | 
133 |     def getRawText(self):
134 |         """
135 |         get the original (uncleaned) text
136 |         """
137 |         return self.graph.get("__rawTxt", '')
138 | 
139 | 
140 |     def getNumod_byerSentences(self): # !!! Need to rewrite this to match graph
141 |         """
142 |         get the numod_byer o sentences in the context
143 |         """
144 |         return len(self.__document)
145 | 
146 | 
147 |     def cleanText(self, stripNonAlphaNumeric=False, stripNumod_byers=False):
148 |         """Need to rename. applies the regular expression scrubbers to rawTxt"""
149 |         if stripNonAlphaNumeric:
150 |             txt = REG_CLEAN1.sub(" ", self.getRawText())
151 |         else:
152 |             txt = self.getRawText()
153 | 
154 |         # clean up white spaces
155 |         txt = REG_CLEAN2.sub(" ", txt)
156 |         if stripNumod_byers:
157 |             txt = REG_CLEAN3.sub("", txt)
158 | 
159 |         self.graph["__scope"] = (0, len(txt))
160 |         self.graph["__txt"] = txt
161 |         if self.getVerbose():
162 |             print("cleaned text is now", self.getText())
163 | 
164 | 
165 |     def getXML(self):
166 |         """
167 |         return an XML representation of the markup
168 |         """
169 |         nodes = list(self.nodes(data=True))
170 |         nodes.sort()
171 |         node_string = ''
172 |         for n in nodes:
173 |             attribute_string = ''
174 |             keys = list(n[1].keys())
175 |             keys.sort()
176 |             for k in keys:
177 |                 attribute_string += """<{0}> {1} </{2}>\n""".format(k, n[1][k], k)
178 |             modification_string = ''
179 |             modified_by = self.predecessors(n[0])
180 |             if modified_by:
181 |                 for mod in modified_by:
182 |                     modification_string += """<modified_by>\n"""
183 |                     modification_string += \
184 |                         """<modifyingNode> %s </modifyingNode>\n"""%mod.getTagID()
185 |                     modification_string += \
186 |                         """<modifyingCategory> %s </modifyingCategory>\n"""%mod.getCategory()
187 |                     modification_string += """</modified_by>\n"""
188 |             modifies = self.successors(n[0])
189 |             if modifies:
190 |                 for modified in modifies:
191 |                     modification_string += """<modifies>\n"""
192 |                     modification_string += \
193 |                         """<modifiedNode> {0} </modifiedNode>\n""".format(modified.getTagID())
194 |                     modification_string += \
195 |                         """</modifies>\n"""
196 |             node_string += \
197 |                     NODE_XML_SKEL.format(attribute_string+"{0}".format(n[0].getXML()) +\
198 |                         modification_string)
199 |         edges = list(self.edges(data=True))
200 |         edges.sort()
201 |         edge_string = ''
202 |         for edge in edges:
203 |             keys = list(edge[2].keys())
204 |             keys.sort()
205 |             attribute_string = ''
206 |             for key in keys:
207 |                 attribute_string += """<{0}> {1} </{2}>\n""".format(key, edge[2][key], key)
208 |             edge_string += "{0}".format(EDGE_XML_SKEL.format(edge[0].getTagID(),
209 |                                                              edge[1].getTagID(),
210 |                                                              attribute_string))
211 | 
212 |         return CONTEXT_MARKUP_XML_SKEL.format(xmlScrub(self.getRawText()),
213 |                                               xmlScrub(self.getText()),
214 |                                               node_string,
215 |                                               edge_string)
216 | 
217 | 
218 |     def __unicode__(self):
219 |         txt = '_'*42+"\n"
220 |         txt += 'rawText: {0}\n'.format(self.getRawText())
221 |         txt += 'cleanedText: {0}\n'.format(self.getText())
222 |         nodes = [n for n in self.nodes(data=True) if n[1].get('category', '') == 'target']
223 |         nodes.sort()
224 |         for n in nodes:
225 |             txt += "*"*32+"\n"
226 |             txt += "TARGET: {0}\n".format(n[0].__unicode__())
227 |             modifiers = list(self.predecessors(n[0]))
228 |             modifiers.sort()
229 |             for mod in modifiers:
230 |                 txt += "-"*4+"MODIFIED BY: {0}\n".format(mod.__unicode__())
231 |                 modifiers = self.predecessors(mod)
232 |                 if modifiers:
233 |                     for modifier in modifiers:
234 |                         txt += "-"*8+"MODIFIED BY: %s\n"%modifier.__unicode__()
235 | 
236 |         txt += "_"*42+"\n"
237 |         return txt
238 | 
239 | 
240 |     def __str__(self):
241 |         return self.__unicode__()
242 |     def __repr__(self):
243 |         return self.__unicode__()
244 | 
245 | 
246 |     def getConTextModeNodes(self, mode):
247 |         """
248 |         get the numod_byer of nodes of type mode
249 |         """
250 |         nodes = [n[0] for n in self.nodes(data=True) if n[1]['category'] == mode]
251 |         nodes.sort()
252 |         return nodes
253 | 
254 | 
255 |     def updateScopes(self):
256 |         """
257 |         update the scopes of all the marked modifiers in the txt. The scope
258 |         of a modifier is limited by its own span, the span of modifiers in the
259 |         same category marked in the text, and modifiers with rule 'terminate'.
260 |         """
261 |         if self.getVerbose():
262 |             print("updating scopes")
263 |         self.__SCOPEUPDATED = True
264 |         # make sure each tag has its own self-limited scope
265 |         modifiers = self.getConTextModeNodes("modifier")
266 |         for modifier in modifiers:
267 |             if self.getVerbose():
268 |                 print("old scope for {0} is {1}".format(modifier.__str__(), modifier.getScope()))
269 |             modifier.setScope()
270 |             if self.getVerbose():
271 |                 print("new scope for {0} is {1}".format(modifier.__str__(), modifier.getScope()))
272 | 
273 | 
274 |         # Now limit scope based on the domains of the spans of the other
275 |         # modifier
276 |         for i in range(len(modifiers)-1):
277 |             modifier = modifiers[i]
278 |             for j in range(i+1, len(modifiers)):
279 |                 modifier2 = modifiers[j]
280 |                 if modifier.limitScope(modifier2) and \
281 |                    modifier2.getRule().lower() == 'terminate':
282 |                     self.add_edge(modifier2, modifier)
283 |                 if modifier2.limitScope(modifier) and \
284 |                    modifier.getRule().lower() == 'terminate':
285 |                     self.add_edge(modifier, modifier2)
286 | 
287 | 
288 |     def markItems(self, items, mode="target"):
289 |         """tags the sentence for a list of items
290 |         items: a list of contextItems"""
291 |         if not items:
292 |             return
293 |         for item in items:
294 |             self.add_nodes_from(self.markItem(item, ConTextMode=mode), category=mode)
295 | 
296 | 
297 |     def markItem(self, item, ConTextMode="target", ignoreCase=True):
298 |         """
299 |         markup the current text with the current item.
300 |         If ignoreCase is True (default), the regular expression is compiled with
301 |         IGNORECASE."""
302 | 
303 |         if not self.getText():
304 |             self.cleanText()
305 | 
306 |         # See if we have already created a regular expression
307 | 
308 |         if not item.getLiteral() in COMPILED_REGEXPRS:
309 |             if not item.getRE():
310 |                 reg_exp = r"\b{}\b".format(item.getLiteral())
311 |                 if self.getVerbose():
312 |                     print("generating regular expression", reg_exp)
313 |             else:
314 |                 reg_exp = item.getRE()
315 |                 if self.getVerbose():
316 |                     print("using provided regular expression", reg_exp)
317 |             if ignoreCase:
318 |                 regex = re.compile(reg_exp, re.IGNORECASE|re.UNICODE)
319 |             else:
320 |                 regex = re.compile(reg_exp, re.UNICODE)
321 |             COMPILED_REGEXPRS[item.getLiteral()] = regex
322 |         else:
323 |             regex = COMPILED_REGEXPRS[item.getLiteral()]
324 |         _iter = regex.finditer(self.getText())
325 |         terms = []
326 |         for i in _iter:
327 |             tag_0 = tagObject(item,
328 |                               ConTextMode,
329 |                               tagid=create_tag_id(),
330 |                               scope=self.getScope())
331 | 
332 |             tag_0.setSpan(i.span())
333 |             tag_0.setPhrase(i.group())
334 |             tag_0.setMatchedGroupDictionary(i.groupdict())
335 |             if self.getVerbose():
336 |                 print("marked item", tag_0)
337 |             terms.append(tag_0)
338 |         return terms
339 | 
340 | 
341 |     def pruneMarks(self):
342 |         """
343 |         prune Marked objects by deleting any objects that lie within the span of
344 |         another object. Currently modifiers and targets are treated separately
345 |         """
346 |         self.__prune_marks(self.nodes(data=True))
347 | 
348 | 
349 |     def dropInactiveModifiers(self):
350 |         """
351 |         drop modifiers that are not modifying any targets
352 |         """
353 |         if self.getNumMarkedTargets() == 0:
354 |             if self.getVerbose():
355 |                 print("No targets in this sentence; dropping ALL modifiers.")
356 |             mnodes = self.getConTextModeNodes("modifier")
357 |         else:
358 |             mnodes = [n for n in self.getConTextModeNodes("modifier") if self.degree(n) == 0]
359 | 
360 |         if self.getVerbose() and mnodes:
361 |             print("dropping the following inactive modifiers")
362 |             for node in mnodes:
363 |                 print(node)
364 |         self.remove_nodes_from(mnodes)
365 | 
366 | 
367 |     def pruneModifierRelationships(self):
368 |         """Initially modifiers may be applied to multiple targets. This function
369 |         computes the text difference between the modifier and each modified
370 |         target and keeps only the minimum distance relationship
371 | 
372 |         Finally, we make sure that there are no self modifying modifiers present (e.g. "free" in
373 |         the phrase "free air" modifying the target "free air").
374 |         """
375 |         modifiers = self.getConTextModeNodes("modifier")
376 |         for modifier in modifiers:
377 |             modified_by = self.successors(modifier)
378 |             if modified_by and len(modified_by) > 1:
379 |                 minm = min([(modifier.dist(mod_by), mod_by) for mod_by in modified_by])
380 |                 edgs = self.edges(modifier)
381 |                 edgs.remove((modifier, minm[1]))
382 |                 if self.getVerbose():
383 |                     print("deleting relationship(s)", edgs)
384 | 
385 |                 self.remove_edges_from(edgs)
386 | 
387 | 
388 |     def pruneSelfModifyingRelationships(self):
389 |         """
390 |         We make sure that there are no self modifying modifiers present (e.g. "free" in
391 |         the phrase "free air" modifying the target "free air").
392 |         modifiers = self.getConTextModeNodes("modifier")
393 |         """
394 |         modifiers = self.getConTextModeNodes("modifier")
395 |         nodes_to_remove = []
396 |         for modifier in modifiers:
397 |             modified_by = self.successors(modifier)
398 |             if modified_by:
399 |                 for mod_by in modified_by:
400 |                     if self.getVerbose():
401 |                         print(mod_by, modifier, mod_by.encompasses(modifier))
402 |                     if mod_by.encompasses(modifier):
403 |                         nodes_to_remove.append(modifier)
404 |         if self.getVerbose():
405 |             print("removing the following self modifying nodes", nodes_to_remove)
406 |         self.remove_nodes_from(nodes_to_remove)
407 | 
408 | 
409 |     def __prune_marks(self, _marks):
410 |         if len(_marks) < 2:
411 |             return
412 |         # this can surely be done faster
413 |         marks = list(_marks)
414 |         marks.sort()
415 |         nodes_to_remove = []
416 |         for i in range(len(marks)-1):
417 |             mark1 = marks[i]
418 |             if mark1[0] not in nodes_to_remove:
419 |                 for j in range(i+1, len(marks)):
420 |                     mark2 = marks[j]
421 |                     if mark1[0].encompasses(mark2[0]) and \
422 |                        mark1[1]['category'] == mark2[1]['category']:
423 |                         nodes_to_remove.append(mark2[0])
424 |                     elif mark2[0].encompasses(mark1[0]) and \
425 |                          mark2[1]['category'] == mark1[1]['category']:
426 |                         nodes_to_remove.append(mark1[0])
427 |                         break
428 |         if self.getVerbose():
429 |             print("pruning the following nodes")
430 |             for node in nodes_to_remove:
431 |                 print(node)
432 |         self.remove_nodes_from(nodes_to_remove)
433 | 
434 | 
435 |     def dropMarks(self, category="exclusion"):
436 |         """Drop any targets that have the category equal to category"""
437 |         if self.getVerbose():
438 |             print("in dropMarks")
439 |             for n in self.nodes():
440 |                 print(n.getCategory(), n.isA(category.lower()))
441 |         dnodes = [n for n in self.nodes() if n.isA(category)]
442 |         if self.getVerbose() and dnodes:
443 |             print("droping the following markedItems")
444 |             for n in dnodes:
445 |                 print(n)
446 |         self.remove_nodes_from(dnodes)
447 | 
448 | 
449 |     def applyModifiers(self):
450 |         """
451 |         If the scope has not yet been updated, do this first.
452 | 
453 |         Loop through the marked targets and for each target apply the modifiers
454 |         """
455 |         if not self.getScopeUpdated():
456 |             self.updateScopes()
457 |         targets = self.getConTextModeNodes("target")
458 |         modifiers = self.getConTextModeNodes("modifier")
459 |         for target in targets:
460 |             for modifier in modifiers:
461 |                 if modifier.applyRule(target):
462 |                     if self.getVerbose():
463 |                         print("applying relationship between", modifier, target)
464 | 
465 |                     self.add_edge(modifier, target)
466 | 
467 | 
468 |     def getMarkedTargets(self):
469 |         """
470 |         Return the list of marked targets in the current sentence. List is sorted by span
471 |         """
472 |         targets = self.getConTextModeNodes("target")
473 |         targets.sort()
474 |         return targets
475 | 
476 | 
477 |     def getNumMarkedTargets(self):
478 |         """
479 |         Return the numod_byer of marked targets in the current sentence
480 |         """
481 |         return len(self.getConTextModeNodes("target"))
482 | 
483 | 
484 |     def getModifiers(self, node):
485 |         """
486 |         return immediate predecessorts of node. The returned list is sorted by node span.
487 |         """
488 |         modifiers = self.predecessors(node)
489 |         modifiers.sort()
490 |         return modifiers
491 | 
492 | 
493 |     def isModifiedByCategory(self, node, queryCategory):
494 |         """
495 |         tests whether node in markUp is modified by a tagObject
496 |         with category equal to queryCategory.  Return modifier if True
497 |         """
498 |         predecessors = self.getModifiers(node)
499 |         for predecessor in predecessors:
500 |             if predecessor.isA(queryCategory):
501 |                 return True
502 | 
503 |         return False
504 | 
505 | 
506 |     def getTokenDistance(self, node1, node2):
507 |         """returns the numod_byer of tokens (word) between node1 and node2"""
508 |         txt = self.getText()
509 |         if node1 < node2:
510 |             start = node1.getSpan()[1]+1
511 |             end = node2.getSpan()[0]
512 |             direction = 1
513 |         else:
514 |             start = node2.getSpan()[1]+1
515 |             end = node1.getSpan()[0]
516 |             direction = -1
517 | 
518 |         sub_txt = txt[start:end]
519 |         tokens = sub_txt.split()
520 |         return len(tokens)*direction
521 | 


--------------------------------------------------------------------------------
/pyConTextNLP/__init__.py:
--------------------------------------------------------------------------------
 1 | #Copyright 2010 Brian E. Chapman
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | """This is an alternative implementation of the pyConText package where I make
15 | use of graphs to indicate relationships between targets and modifiers. Nodes of
16 | thegraphs are the targets and modifiers identified in the text; edges of the
17 | graphs are relationships between the targets. This provides for much simpler
18 | code than what exists in the other version of pyConText where each object has a
19 | dictionary of __modifies and __modifiedby that must be kept in sync with each
20 | other.
21 | 
22 | Also it is hoped that the use of a directional graph could ultimately simplify
23 | our itemData structures as we could chain together items"""
24 | 
25 | import os
26 | version = {}
27 | with open(os.path.join(os.path.dirname(__file__),"version.py")) as f0:
28 |     exec(f0.read(), version)
29 | 
30 | __version__ = version['__version__']
31 | 


--------------------------------------------------------------------------------
/pyConTextNLP/display/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/pyConTextNLP/display/__init__.py


--------------------------------------------------------------------------------
/pyConTextNLP/display/_bokeh.py:
--------------------------------------------------------------------------------
 1 | """This module provides pyConText visualizaiton tools using Bokeh"""
 2 | 
 3 | from bokeh.plotting import ColumnDataSource
 4 | import bokeh.plotting as bp
 5 | from bokeh.models import HoverTool
 6 | from collections import OrderedDict
 7 | import networkx as nx
 8 | 
 9 | def graph2DataSource(g):
10 | 
11 |     tmp = [(n.getSpan(),
12 |             n.getCategory(),
13 |             n.getPhrase(),
14 |             n.getTagID(),
15 |             n.getLiteral(),
16 |             n.getScope()) for n in g.nodes()]
17 |     span, category, text, ids, literals, scopes = zip(*tmp)
18 | 
19 |     return ColumnDataSource(data=dict(text=text,
20 |                                       literal=literals,
21 |                                       id=ids,
22 |                                       span=span,
23 |                                       scope=scopes,
24 |                                       category=category))
25 | 
26 | 
27 | def graphDocumentBokeh(g, width=600, height=300, title=""):
28 |     """
29 |     Returns a bokeh plotting figure of the pyConTextNLP graph g
30 |     """
31 |     colors = {'target':'blue', 'modifier':'red'}
32 |     TOOLS = "pan, box_zoom, reset, hover, previewsave"
33 | 
34 |     try:
35 |         pos = nx.graphviz_layout(g)
36 |     except:
37 |         pos = nx.spring_layout(g)
38 |     try:
39 |         xs = [p[0] for p in pos.values()]
40 |         ys = [p[1] for p in pos.values()]
41 |         delta = 75
42 |         minx, maxx = int(min(xs)-delta), int(max(xs)+delta)
43 |         miny, maxy = int(min(ys)-delta), int(max(ys)+delta)
44 |         radius = 0.1*(maxx-maxy)
45 |         p = bp.figure(plot_width=width, plot_height=height, title="",
46 |                       x_axis_type=None, y_axis_type=None,
47 |                       x_range=[minx, maxx],
48 |                       y_range=[miny, maxy],
49 |                       min_border=0, outline_line_color=None,
50 |                       tools=TOOLS)
51 |         xpos = [pos[n][0] for n in g.nodes()]
52 |         ypos = [pos[n][1] for n in g.nodes()]
53 |         tcolors = [colors[g.node[n]['category']] for n in g.nodes()]
54 |         text = [n.getPhrase() for n in g.nodes()]
55 |         source = graph2DataSource(g)
56 |         for e in g.edges():
57 |             p.line([pos[e[0]][0], pos[e[1]][0]],
58 |                    [pos[e[0]][1], pos[e[1]][1]],
59 |                    line_cap="round",
60 |                    line_width=3,
61 |                    line_alpha=0.4)
62 |             p.diamond([pos[e[1]][0]], [pos[e[1]][1]],
63 |                        alpha=0.4, size=[10])
64 |         p.text(xpos, ypos,
65 |                text=text, text_color=tcolors,
66 |                angle=0, text_font_size="12pt",
67 |                text_align='center', text_baseline='middle')
68 |         p.circle(xpos, ypos,
69 |                  radius=radius, source=source,
70 |                  fill_color=None, fill_alpha=0.1, line_color=None)
71 |         hover = p.select(dict(type=HoverTool))
72 |         hover.tooltips = OrderedDict([
73 |             ("index", "$index"),
74 |             ("id", "@id"),
75 |             ("phrase", "@text"),
76 |             ("literal", "@literal"),
77 |             ("span", "@span"),
78 |             ("scope", "@scope"),
79 |             ("category", "@category"),
80 |             ])
81 |         bp.show(p)
82 |     except Exception as error:
83 |         print(error, ": Cannot render graph with %d nodes and %d edges"%
84 |               (g.number_of_nodes(), g.number_of_edges()))
85 | 


--------------------------------------------------------------------------------
/pyConTextNLP/display/_mpld3.py:
--------------------------------------------------------------------------------
 1 | """This module provides pyConTextNLP visualization using matplotlib and mpld3"""
 2 | 
 3 | import networkx as nx
 4 | 
 5 | def graphDocument(g):
 6 |     """ """
 7 |     try:
 8 |         pos=nx.graphviz_layout(g)
 9 |     except:
10 |         pos=nx.spring_layout(g,iterations=20)
11 | 
12 |     nx.draw_networkx_edges(g,pos,alpha=0.3, edge_color='r')
13 |     xpos = [pos[n][0] for n in g.nodes()]
14 |     ypos = [pos[n][1] for n in g.nodes()]
15 |     text = [n.getPhrase() for n in g.nodes()]
16 | 


--------------------------------------------------------------------------------
/pyConTextNLP/display/html.py:
--------------------------------------------------------------------------------
 1 | """Module containing functions for generating various display options for pyConTextNLP"""
 2 | import copy
 3 | from ..utils import get_document_markups
 4 | from ..utils import get_section_markups
 5 | 
 6 | def __sort_by_span(_nodes):
 7 |     n = list(copy.copy(_nodes))
 8 |     n.sort(key=lambda x: x.getSpan())
 9 |     return n
10 | def __insert_color(txt,s,c):
11 |     """insert HTML span style into txt. The span will change the color of the
12 |     text located between s[0] and s[1]:
13 |     txt: txt to be modified
14 |     s: span of where to insert tag
15 |     c: color to set the span to"""
16 |     return txt[:s[0]]+'<span style="color: {0};">'.format(c)+\
17 |            txt[s[0]:s[1]]+'</span>'+txt[s[1]:]
18 | 
19 | def mark_text(txt,nodes,colors = {"name":"red","pet":"blue"},default_color="black"):
20 |     if not nodes:
21 |         return txt
22 |     else:
23 |         n = nodes.pop(-1)
24 |         return mark_text(__insert_color(txt,
25 |                                         n.getSpan(),
26 |                                         colors.get(n.getCategory()[0],default_color)),
27 |                          nodes,
28 |                          colors=colors)
29 | 
30 | def mark_document_with_html(doc,colors = {"name":"red","pet":"blue"}, default_color="black"):
31 |     """takes a ConTextDocument object and returns an HTML paragraph with marked phrases in the
32 |     object highlighted with the colors coded in colors
33 | 
34 |     doc: ConTextDocument
35 |     colors: dictionary keyed by ConText category with values valid HTML colors
36 | 
37 |     """
38 |     return """<p> {0} </p>""".format(" ".join([mark_text(m.graph['__txt'],
39 |                                                  __sort_by_span(m.nodes()),
40 |                                                  colors=colors,
41 |                                                  default_color=default_color) for m in get_document_markups(doc)]))
42 | 
43 | 
44 | 
45 | def mark_document_with_html_sections(doc,colors = {"name":"red","pet":"blue"}, default_color="black"):
46 |     """takes a ConTextDocument object and returns
47 |     a series of sections marked in HTML header tags followed by
48 |     HTML paragraphs with marked phrases in the
49 |     object highlighted with the colors coded in colors
50 | 
51 |     doc: ConTextDocument
52 |     colors: dictionary keyed by ConText category with values valid HTML colors
53 |     """
54 |     h = """"""
55 | 
56 |     for hierarchy in doc.getDocumentSections():
57 |         if hierarchy == 'document':
58 |             continue
59 |         for section in hierarchy:
60 |             h += """<h2> {0} </h2>""".format(section)
61 |             # print("## h2: ", section)
62 |         # print("#### Nodes:", context.getSectionNodes(section))
63 |         # print("#### Marked up sentences:", context.getSectionMarkups(section))
64 |     #     h += html.mark_document_with_html(context.getSectionMarkups(section),colors = colors, default_color="black")
65 |     #     print(h)
66 |     #
67 |             h += """<p> {0} </p>""".format(" ".join([mark_text(
68 |                         m.graph['__txt'], __sort_by_span(m.nodes()), colors=colors, default_color=default_color
69 |                     ) for m in get_section_markups(doc,section)
70 |                 ]))
71 | 
72 |     return h
73 | 


--------------------------------------------------------------------------------
/pyConTextNLP/helpers.py:
--------------------------------------------------------------------------------
  1 | #Copyright 2010 Brian E. Chapman
  2 | #
  3 | #Licensed under the Apache License, Version 2.0 (the "License");
  4 | #you may not use this file except in compliance with the License.
  5 | #You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #Unless required by applicable law or agreed to in writing, software
 10 | #distributed under the License is distributed on an "AS IS" BASIS,
 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #See the License for the specific language governing permissions and
 13 | #limitations under the License.
 14 | 
 15 | """
 16 | The module defines a class sentenceSplitter that defines how sentence splitting is to be done including
 17 | exception terms that include sentence termination terms but do not indicate a termination (e.g. Mrs.).
 18 | The exception terms are contained in the attribute exceptionTerms. Terms can be added or deleted
 19 | through the class methods addExceptionTerms and deleteExceptionTerms. A short list of default terms
 20 | common in English texts are included in the attribute defaultExceptions. By default these are
 21 | used when a sentenceSplitter instance is created.
 22 | """
 23 | class sentenceSplitter(object):
 24 |     """Class for splitting sentences"""
 25 | 
 26 |     def __init__(self, useDefaults=True, useCaseVariants=True):
 27 |         """
 28 |         useDefaults: Populate the exceptionTerms with default values
 29 |         useCaseVariants: add upper and lower case variants of terms also
 30 |         """
 31 | 
 32 |         self.defaultExceptions = ['.','Dr.','Mr.','Mrs.','Ms.','M.D.', 'D.O.',
 33 |                                'Ph.D.','D.M.D.','R.N.','B.A.','A.B.',
 34 |                                'B.S.','M.S.','q.','viz.','e.g.']
 35 |         self.exceptionTerms = set(())
 36 | 
 37 |         self.digits = set('0123456789')
 38 |         if useDefaults:
 39 |             for term in self.defaultExceptions:
 40 |                 try:
 41 |                     self.exceptionTerms.add(term)
 42 |                     if useCaseVariants:
 43 |                         self.exceptionTerms.add(term.lower())
 44 |                         self.exceptionTerms.add(term.upper())
 45 |                 except TypeError:
 46 |                     print("Terms must be of type string. You provided {0} which is a {1}".format(term,type(term)))
 47 | 
 48 |     def addExceptionTerms(self, *terms, **kwargs): #addCaseVariants=True):
 49 |         """add exception terms to list of terms not to terminate sentence at.
 50 |         If keyword argument addCaseVariants = True is provided, then also add the lower and upper case variants to the list """
 51 |         addCaseVariants = kwargs.pop('addCaseVariants', False)
 52 |         for t in terms:
 53 |             self.exceptionTerms.add(t)
 54 |             if addCaseVariants:
 55 |                 self.exceptionTerms.add(t.lower())
 56 |                 self.exceptionTerms.add(t.upper())
 57 | 
 58 |     def getExceptionTerms(self):
 59 |         return self.exceptionTerms
 60 |     def deleteExceptionTerms(self,*terms, **kwargs): #deleteCaseVariants=True):
 61 |         """delete exception terms from list of terms not to terminate sentence at.
 62 |         If keyword argument deleteCaseVariants = True is provided, then also delete the lower and upper case variants from the list"""
 63 |         deleteCaseVariants = kwargs.pop('deleteCaseVariants', False)
 64 |         for t in terms:
 65 |             self.exceptionTerms.discard(t)
 66 |             if deleteCaseVariants:
 67 |                 self.exceptionTerms.discard(t.lower())
 68 |                 self.exceptionTerms.discard(t.upper())
 69 | 
 70 |     def splitSentences(self,txt):
 71 |         """
 72 |         Splt txt into sentences a list of sentences is returned
 73 |         """
 74 |         txt = txt.split()
 75 |         sentences = []
 76 |         wordLoc = 0
 77 | 
 78 |         while wordLoc < len(txt):
 79 |             currentWord = txt[wordLoc]
 80 |             if currentWord[-1] in '.?!':
 81 |                 if currentWord in  self.exceptionTerms:
 82 |                     wordLoc += 1
 83 |                 # per discussion with A.G. dropped this exception, since assuming numbers only use decimal points if there
 84 |                 # are actual decimal point digits expressed and thus the period would not be the last character of the word.
 85 |                 #elif( self.digits.intersection(currentWord) and
 86 |                         #not set('()').intersection(currentWord)): # word doesn't include parentheses. Is this necessary?
 87 |                     #wordLoc += 1
 88 |                 else:
 89 |                     sentences.append(' '.join(txt[:wordLoc+1]))
 90 |                     txt = txt[wordLoc+1:]
 91 |                     wordLoc = 0
 92 |             else:
 93 |                 wordLoc += 1
 94 | 
 95 |         # if any texts remains (due to failure to identify a final sentence termination,
 96 |         # then take all remaining text and put into a sentence
 97 |         if txt:
 98 |             sentences.append(' '.join(txt) )
 99 | 
100 |         return sentences
101 | 


--------------------------------------------------------------------------------
/pyConTextNLP/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/pyConTextNLP/io/__init__.py


--------------------------------------------------------------------------------
/pyConTextNLP/io/xml.py:
--------------------------------------------------------------------------------
 1 | """
 2 | module for creating XML files
 3 | """
 4 | import re
 5 | 
 6 | rlt = re.compile(r"""<""", re.UNICODE)
 7 | ramp = re.compile(r"""&""", re.UNICODE)
 8 | 
 9 | 
10 | def xmlScrub(tmp):
11 |     return rlt.sub(r"&lt;",ramp.sub(r"&amp;",u"{0}".format(tmp)))
12 | 
13 | 


--------------------------------------------------------------------------------
/pyConTextNLP/itemData.py:
--------------------------------------------------------------------------------
 1 | #Copyright 2010 Brian E. Chapman
 2 | #
 3 | #Licensed under the Apache License, Version 2.0 (the "License");
 4 | #you may not use this file except in compliance with the License.
 5 | #You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #Unless required by applicable law or agreed to in writing, software
10 | #distributed under the License is distributed on an "AS IS" BASIS,
11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #See the License for the specific language governing permissions and
13 | #limitations under the License.
14 | 
15 | """
16 | A module defining the contextItem class.
17 | """
18 | import yaml
19 | import urllib.request, urllib.error, urllib.parse
20 | 
21 | 
22 | def _get_fileobj(_file):
23 |     if not urllib.parse.urlparse(_file).scheme:
24 |         _file = "file://"+_file
25 |     return urllib.request.urlopen(_file, data=None)
26 | 
27 | def get_items(_file):
28 |     f0 = _get_fileobj(_file)
29 |     context_items =  [contextItem((d["Lex"],
30 |                                    d["Type"],
31 |                                    r"%s"%d["Regex"],
32 |                                    d["Direction"])) for d in yaml.load_all(f0)]
33 |     f0.close()
34 |     return context_items
35 | 
36 | 
37 | class contextItem(object):
38 | 
39 | 
40 |     def __init__(self, args):
41 |         self.__literal = args[0]
42 |         cs = args[1].split(",")
43 |         self.__category = []
44 |         for c in cs:
45 |             self.__category.append(c.lower().strip())
46 |         self.__re = r"%s"%args[2] # I need to figure out how to read this raw string in properly
47 |         self.__rule = args[3].lower()
48 | 
49 |         # generate regex from literal if no regex provided
50 |         if not self.__re:
51 |             self.__re = r"\b{}\b".format(self.__literal)
52 | 
53 |     def getLiteral(self):
54 |         """return the literal associated with this item"""
55 |         return self.__literal
56 |     def getCategory(self):
57 |         """return the list of categories associated with this item"""
58 |         return self.__category[:]
59 |     def categoryString(self):
60 |         """return the categories as a string delimited by '_'"""
61 |         return '_'.join(self.__category)
62 | 
63 | 
64 |     def isA(self,testCategory):
65 |         """test whether testCategory is one of the categories associated with self"""
66 |         try:
67 |             return testCategory.lower().strip() in self.__category
68 |         except:
69 |             for tc in testCategory:
70 |                 if( tc.lower().strip() in self.__category ):
71 |                     return True
72 |             return False
73 | 
74 |     def getRE(self):
75 |         return self.__re
76 |     def getRule(self):
77 |         return self.__rule
78 |     def __str__(self):
79 |         txt = """literal<<{0}>>; category<<{1}>>; re<<{2}>>; rule<<{3}>>""".format(
80 |             self.__literal,self.__category,self.__re, self.__rule)
81 |         return txt
82 |     def __repr__(self):
83 |         return self.__str__()
84 | 
85 | 


--------------------------------------------------------------------------------
/pyConTextNLP/pyConText.py:
--------------------------------------------------------------------------------
  1 | #Copyright 2010 Brian E. Chapman
  2 | #
  3 | #Licensed under the Apache License, Version 2.0 (the "License");
  4 | #you may not use this file except in compliance with the License.
  5 | #You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #Unless required by applicable law or agreed to in writing, software
 10 | #distributed under the License is distributed on an "AS IS" BASIS,
 11 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #See the License for the specific language governing permissions and
 13 | #limitations under the License.
 14 | """
 15 | This module contains three class definitions that are used in the pyConText
 16 | algorithm. The pyConText algorithm relies on regular expressions to identify
 17 | sub-texts of interest
 18 | 
 19 | 1) termObject: a class that describes terms of interest within the text
 20 | 2) tagObject: a class inherited from termObject that describes modifiers
 21 | 3) pyConText: a class that implements the context algorithm
 22 | 
 23 | """
 24 | import re
 25 | from .ConTextMarkup import ConTextMarkup
 26 | from .io.xml import xmlScrub
 27 | import networkx as nx
 28 | 
 29 | 
 30 | ConTextDocumentXMLSkel=\
 31 | """
 32 | <ConTextDocument>
 33 | {0}
 34 | </ConTextDocument>
 35 | """
 36 | 
 37 | 
 38 | class ConTextDocument(object):
 39 |     """
 40 |     base class for context.
 41 |     build around markedTargets a list of termObjects representing desired terms
 42 |     found in text and markedModifiers, tagObjects found in the text
 43 |     """
 44 |     rb = re.compile(r"""\b""",re.UNICODE)
 45 |     def __init__(self,unicodeEncoding='utf-8'):
 46 |         """txt is the string to parse"""
 47 |         # __document capture the document level structure
 48 |         # for each sentence and then put in the archives when the next sentence
 49 |         # is processed
 50 |         self.__unicodeEncoding = unicodeEncoding
 51 |         self.__document = nx.DiGraph()
 52 |         self.__currentSentenceNum = 0
 53 |         self.__currentSectionNum = 0
 54 |         self.__document.add_node("document", category="section", __sectionNumber = self.__currentSectionNum)
 55 |         self.__currentSectionNum += 1
 56 |         self.__currentParent = "document"
 57 |         self.__root = "document"
 58 |         self.__documentGraph = None
 59 | 
 60 |     def insertSection(self,sectionLabel,setToParent=False):
 61 |         self.__document.add_edge(self.__currentParent,sectionLabel,category="section",__sectionNumber=self.__currentSectionNum)
 62 |         self.__currentSectionNum += 1
 63 |         if setToParent:
 64 |             self.__currentParent = sectionLabel
 65 | 
 66 |     def getDocument(self):
 67 |         return self.__document
 68 |     def getCurrentSentenceNumber(self):
 69 |         return self.__currentSentenceNum
 70 |     def getCurrentSectionNumber(self):
 71 |         return self.__currentSectionNum
 72 |     def setParent(self, label=None):
 73 |         self.__currentParent = label
 74 |     def getCurrentparent(self):
 75 |         return self.__currentParent
 76 |     def addSectionattributes(self,**kwargs):
 77 |         for key in kwargs.keys():
 78 |             self.__document.node[self.__currentParent][key] = kwargs[key]
 79 |     def getUnicodeEncoding(self):
 80 |         return self.__unicodeEncoding
 81 | 
 82 |     def addMarkup(self, markup):
 83 |         """
 84 |         add the markup as a node in the document attached to the current parent.
 85 |         """
 86 |         # I'm not sure if I want to be using copy here
 87 |         self.__document.add_edge(self.__currentParent,markup,
 88 |                 category="markup",
 89 |                 sentenceNumber=self.__currentSentenceNum)
 90 | 
 91 |         self.__currentSentenceNum += 1
 92 |     def retrieveMarkup(self,sentenceNumber):
 93 |         """
 94 |         retrieve the markup corresponding to sentenceNumber
 95 |         """
 96 |         edge = [e for e in self.__document.edges(data=True) if e[2]['category'] == "markup" and e[2]['sentenceNumber'] == sentenceNumber]
 97 |         if edge:
 98 |             return edge[0]
 99 | 
100 |     def getSectionNodes(self,sectionLabel = None, category="markup"):
101 |         if not sectionLabel:
102 |             sectionLabel = self.__currentParent
103 |         successors = [(e[2]['__sectionNumber'],e[1]) for e in self.__document.out_edges(sectionLabel, data=True)
104 |                                                             if e[2].get("category") == category]
105 |         successors.sort()
106 |         tmp = list(zip(*successors))
107 |         return tmp[1]
108 | 
109 |     def getSectionMarkups(self, sectionLabel = None, returnSentenceNumbers=True ):
110 |         """return the markup graphs for the section ordered by sentence number"""
111 |         if not sectionLabel:
112 |             sectionLabel = self.__currentParent
113 |         successors = [(e[2]['sentenceNumber'],e[1]) for e in self.__document.out_edges(sectionLabel, data=True)
114 |                                                             if e[2].get("category") == "markup"]
115 |         successors.sort()
116 |         if returnSentenceNumbers:
117 |             return successors
118 |         else:
119 |             tmp = list(zip(*successors))
120 |             return tmp[1]
121 | 
122 |     def getDocumentSections(self):
123 |         edges = [ (e[2]['__sectionNumber'],e[1]) for e in self.__document.edges(data=True) if e[2].get("category") == "section"]
124 |         edges.sort()
125 |         tmp = list(zip(*edges))
126 |         if len(tmp) > 1:
127 |             tmp = [self.__root, tmp[1]]
128 |         else:
129 |             tmp = [self.__root]
130 |         return tmp
131 | 
132 |     def getSectionText(self,sectionLabel = None ):
133 |         """
134 |         """
135 |         markups = self.getSectionMarkups(sectionLabel,returnSentenceNumbers = False)
136 |         txt = " ".join([ m.getText() for m in markups])
137 |         return txt
138 | 
139 |     def getDocumentGraph(self):
140 |         if not self.__documentGraph:
141 |             self.computeDocumentGraph()
142 |         return self.__documentGraph
143 | 
144 |     def getXML(self):
145 |         txt = ""
146 | # first generate string for all the sentences from the document in order to compute document level offsets
147 |         documentString = ""
148 |         sentenceOffsets = {}
149 |         sections = self.getDocumentSections()
150 |         for s in sections:
151 |             markups = self.getSectionMarkups(s)
152 |             for m in markups:
153 |                 sentenceOffsets[m[0]] = len(documentString)
154 |                 documentString = documentString + m[1].getText()+" "
155 | 
156 |         txt += xmlScrub(documentString)
157 |         # get children sections of root
158 | 
159 | 
160 |         for s in sections:
161 |             txt += """<section>\n<sectionLabel> {0} </sectionLabel>\n""".format(s)
162 |             markups = self.getSectionMarkups(s)
163 |             for m in markups:
164 |                 txt += "<sentence>\n<sentenceNumber> %d </sentenceNumber>\n<sentenceOffset> %d </sentenceOffset></sentence>\n%s"%(
165 |                     (m[0],sentenceOffsets[m[0]],m[1].getXML()))
166 |             txt += """</section>\n"""
167 | 
168 |         return ConTextDocumentXMLSkel.format(txt)
169 |     def __unicode__(self):
170 |         txt = '_'*42+"\n"
171 |         return txt
172 |     def __str__(self):
173 |         return self.__unicode__()
174 |     def __repr__(self):
175 |         return self.__unicode__()#.encode('utf-8')
176 | 
177 |     def computeDocumentGraph(self, verbose=False):
178 |         """Create a single document graph from the union of the graphs created
179 |            for each sentence in the archive. Note that the algorithm in NetworkX
180 |            is different based on whether the Python version is greater than or
181 |            equal to 2.6"""
182 |         # Note that this as written does not include the currentGraph in the DocumentGraph
183 |         # Maybe this should be changed
184 |         self.__documentGraph = ConTextMarkup()
185 |         if verbose:
186 |             print("Document markup has {0d} edges".format(self.__document.number_of_edges()))
187 |         markups = [e[1] for e in self.__document.edges(data=True) if e[2].get('category') == 'markup']
188 |         if verbose:
189 |             print("Document markup has {0d} conTextMarkup objects".format(len(markups)))
190 |         for i in range(len(markups)):
191 |         #for m in markups:
192 |             m = markups[i]
193 |             if verbose:
194 |                 print("markup {0d} has {1d} total items including {2d} targets".format(i,m.number_of_nodes(),m.getNumMarkedTargets()))
195 | 
196 |             self.__documentGraph = nx.union(m,self.__documentGraph)
197 |             if verbose:
198 |                 print("documentGraph now has {0d} nodes".format(self.__documentGraph.number_of_nodes()))
199 | 
200 | 
201 | 
202 | 
203 | 


--------------------------------------------------------------------------------
/pyConTextNLP/tagObject.py:
--------------------------------------------------------------------------------
  1 | """
  2 | tabObject module
  3 | """
  4 | 
  5 | import uuid
  6 | import copy
  7 | from .io.xml import xmlScrub
  8 | 
  9 | tagObjectXMLSkel=\
 10 | """
 11 | <tagObject>
 12 | <id> {0} </id>
 13 | <phrase> {1} </phrase>
 14 | <literal> {2} </literal>
 15 | <category> {3} </category>
 16 | <spanStart> {4:d} </spanStart>
 17 | <spanStop> {5:d} </spanStop>
 18 | <scopeStart> {6:d} </scopeStart>
 19 | <scopeStop> {7:d} </scopeStop>
 20 | </tagObject>
 21 | """
 22 | 
 23 | 
 24 | class tagObject(object):
 25 |     """
 26 |     A class that describes terms of interest in the text.
 27 |     tagObject is characterized by the following attributes
 28 |     1) The contextItem defining the tag
 29 |     3) The location of the tag within the text being parsed
 30 | 
 31 |     """
 32 |     def __init__(self, item, ConTextCategory, scope=None, tagid=None, **kwargs):
 33 |         """
 34 |         item: contextItem used to generate term
 35 |         ConTextCategory: category this term is being used for in pyConText
 36 | 
 37 |         variants
 38 |         """
 39 |         self.__item = item
 40 |         self.__category = self.__item.getCategory()
 41 |         self.__spanStart = 0
 42 |         self.__spanEnd = 0
 43 |         self.__foundPhrase = ''
 44 |         self.__foundDict = {}
 45 |         self.__ConTextCategory = ConTextCategory
 46 |         if not tagid:
 47 |             tagid = uuid.uid1().int
 48 |         self.__tagID = tagid
 49 |         if scope == None:
 50 |             self.__scope = []
 51 |         else:
 52 |             self.__scope = list(scope)
 53 |         self.__SCOPEUPDATED = False
 54 | 
 55 | 
 56 |     def setScope(self):
 57 |         """
 58 |         applies the objects own rule and span to modify the object's scope
 59 |         Currently only "forward" and "backward" rules are implemented
 60 |         """
 61 | 
 62 |         if 'forward' in self.__item.getRule().lower():
 63 |             self.__scope[0] = self.getSpan()[1]
 64 |         elif 'backward' in self.__item.getRule().lower():
 65 |             self.__scope[1] = self.getSpan()[0]
 66 | 
 67 | 
 68 |     def getTagID(self):
 69 |         return self.__tagID
 70 | 
 71 | 
 72 |     def parseRule(self):
 73 |         """parse the rule for the associated"""
 74 |         pass
 75 | 
 76 | 
 77 |     def getScope(self):
 78 |         return self.__scope
 79 | 
 80 | 
 81 |     def getRule(self):
 82 |         return self.__item.getRule()
 83 | 
 84 | 
 85 |     def limitScope(self, obj):
 86 |         """If self and obj are of the same category or if obj has a rule of
 87 |         'terminate', use the span of obj to
 88 |         update the scope of self
 89 |         returns True if a obj modified the scope of self"""
 90 |         if not self.getRule() or self.getRule()== 'terminate' or \
 91 |              (not self.isA(obj.getCategory()) and obj.getRule() != 'terminate'):
 92 |             return False
 93 |         originalScope = copy.copy((self.getScope()))
 94 |         if 'forward' in self.getRule().lower() or \
 95 |             'bidirectional' in self.getRule().lower():
 96 |             if obj > self:
 97 |                 self.__scope[1] = min(self.__scope[1],obj.getSpan()[0])
 98 |         elif 'backward' in self.getRule().lower() or \
 99 |               'bidirectional' in self.getRule().lower():
100 |             if obj < self:
101 |                 self.__scope[0] = max(self.__scope[0],obj.getSpan()[1])
102 |         if originalScope != self.__scope:
103 |             return True
104 |         else:
105 |             return False
106 | 
107 | 
108 |     def applyRule(self, term):
109 |         """applies self's rule to term. If the start of term lines within
110 |         the span of self, then term may be modified by self"""
111 |         if not self.getRule() or self.getRule() == 'terminate':
112 |             return False
113 |         if self.__scope[0] <= term.getSpan()[0] <= self.__scope[1]:
114 |             return True 
115 | 
116 | 
117 |     def getConTextCategory(self):
118 |         return self.__ConTextCategory
119 | 
120 | 
121 |     def getXML(self):
122 |         return   tagObjectXMLSkel.format(self.getTagID(),xmlScrub(self.getPhrase()),
123 |                                    xmlScrub(self.getLiteral()),xmlScrub(self.getCategory()),
124 |                                    self.getSpan()[0],self.getSpan()[1],
125 |                                    self.getScope()[0],self.getScope()[1])
126 | 
127 | 
128 |     def getBriefDescription(self):
129 |         description = u"""<id> {0} </id> """.format(self.getTagID())
130 |         description+= u"""<phrase> {0} </phrase> """.format(self.getPhrase())
131 |         description+= u"""<category> {0} </category> """.format(self.getCategory())
132 |         return description
133 | 
134 | 
135 |     def getLiteral(self):
136 |         """returns the term defining this object"""
137 |         return self.__item.getLiteral()
138 | 
139 | 
140 |     def getCategory(self):
141 |         """returns the category (e.g. CONJUNCTION) for this object"""
142 |         return self.__category[:]
143 | 
144 | 
145 |     def categoryString(self):
146 |         return u'_'.join(self.__category)
147 | 
148 | 
149 |     def isA(self,category):
150 |         return self.__item.isA(category)
151 | 
152 | 
153 |     def setCategory(self,category):
154 |         self.__category = category
155 | 
156 | 
157 |     def replaceCategory(self,oldCategory, newCategory):
158 |         for index, item in enumerate(self.__category):
159 |             if item == oldCategory.lower().strip():
160 |                 try:
161 |                     self.__category[index] = newCategory.lower().strip()
162 |                 except:
163 |                     del self.__category[index]
164 |                     self.__category.extend([nc.lower().strip() for nc in newCategory])
165 | 
166 | 
167 |     def setSpan(self, span):
168 |         """set the span within the associated text for this object"""
169 |         self.__spanStart = span[0]
170 |         self.__spanEnd = span[1]
171 | 
172 | 
173 |     def getSpan(self):
174 |         """return the span within the associated text for this object"""
175 |         return self.__spanStart,self.__spanEnd
176 | 
177 |     def setPhrase(self, phrase):
178 |         """set the actual matched phrase used to generate this object"""
179 |         self.__foundPhrase = phrase
180 | 
181 | 
182 |     def getPhrase(self):
183 |         """return the actual matched phrase used to generate this object"""
184 |         return self.__foundPhrase
185 | 
186 | 
187 |     def setMatchedGroupDictionary(self, mdict):
188 |         """set the foundDict variable to mdict. This gets the name/value pair for each NAMED group within the regular expression"""
189 |         self.__foundDict = mdict.copy()
190 | 
191 | 
192 |     def getMatchedGroupDictionary(self):
193 |         """return a copy of the matched group dictionary"""
194 |         return self.__foundDict.copy()
195 | 
196 | 
197 |     def dist(self, obj):
198 |         """returns the minimum distance from the current object and obj.
199 |         Distance is measured as current start to object end or current end to object start"""
200 |         return min(abs(self.__spanEnd-obj.__spanStart), abs(self.__spanStart-obj.__spanEnd))
201 | 
202 |     def __lt__(self, other): return self.__spanStart < other.__spanStart
203 |     def __le__(self, other): return self.__spanStart <= other.__spanStart
204 |     def __eq__(self, other):
205 |         return (self.__spanStart == other.__spanStart and
206 |                 self.__spanEnd == other.__spanEnd)
207 |     def __ne__(self, other): return self.__spanStart != other.__spanStart
208 |     def __gt__(self, other): return self.__spanStart > other.__spanStart
209 |     def __ge__(self, other): return self.__spanStart >= other.__spanStart
210 | 
211 |     def __hash__(self):
212 |         return hash(repr(self))
213 | 
214 | 
215 |     def encompasses(self, other):
216 |         """tests whether other is completely encompassed with the current object
217 |            ??? should we not prune identical span tagObjects???"""
218 |         if self.__spanStart <= other.__spanStart and \
219 |            self.__spanEnd >= other.__spanEnd:
220 |             return True
221 |         else:
222 |              return False
223 | 
224 | 
225 |     def overlap(self, other):
226 |         """
227 |         tests whether other overlaps with self
228 |         """
229 |         if (other.__spanStart >= self.__spanStart and other.__spanStart <= self.__spanEnd ) or \
230 |            (other.__spanEnd >= self.__spanStart and other.__spanEnd <= self.__spanEnd):
231 |             return True
232 |         else:
233 |             return False
234 | 
235 | 
236 |     def leftOverlap(self, other):
237 |         """
238 |         tests whether other has partial overlap to the left with self.
239 |         """
240 |         if self.encompasses(other):
241 |             return False
242 |         if self.overlap(other) and self.__gt__(other):
243 |             return True
244 |         else:
245 |             return False
246 | 
247 | 
248 |     def rightOverlap(self, other):
249 |         """
250 |         tests whether other has partial overlap to the right with self
251 |         """
252 |         if self.encompasses(other):
253 |             return False
254 |         if self.overlap(other) and self.__lt__(other):
255 |             return True
256 |         else:
257 |             return False
258 | 
259 | 
260 |     def __unicode__(self):
261 |         txt = self.getBriefDescription()
262 |         return txt
263 | 
264 | 
265 |     def __str__(self):
266 |         return self.__unicode__()
267 |     def __repr__(self):
268 |         return self.__unicode__()
269 | 
270 | 
271 | 
272 | 


--------------------------------------------------------------------------------
/pyConTextNLP/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/pyConTextNLP/tests/__init__.py


--------------------------------------------------------------------------------
/pyConTextNLP/tests/test_base.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | """
 4 | """Modifying the testing structure to include a class setup and teardown"""
 5 | from unittest import TestCase
 6 | import warnings
 7 | 
 8 | 
 9 | class TestClass(TestCase):
10 |     def setUp(self):
11 |         """setUp is called before each test is run, tearDown is called after"""
12 |         pass
13 |     def tearDown(self):
14 |         pass
15 | 


--------------------------------------------------------------------------------
/pyConTextNLP/tests/test_contextitem.py:
--------------------------------------------------------------------------------
 1 | import pyConTextNLP.itemData as itemData
 2 | import pytest
 3 | 
 4 | @pytest.fixture(scope="module")
 5 | def items():
 6 | 
 7 |     return [ ["pulmonary embolism",
 8 |               "PULMONARY_EMBOLISM",
 9 |              r"""pulmonary\s(artery )?(embol[a-z]+)""",
10 |               ""],
11 |              ["no gross evidence of",
12 |               "PROBABLE_NEGATED_EXISTENCE",
13 |               "",
14 |               "forward"]]
15 | 
16 | def test_instantiate_contextItem0(items):
17 |     for item in items:
18 |         assert itemData.contextItem(item)
19 | 
20 | 
21 | def test_contextItem_rule(items):
22 |     cti = itemData.contextItem(items[1])
23 | 
24 |     assert cti.getRule() == "forward"
25 | 
26 | 
27 | def test_contextItem_literal(items):
28 |     cti = itemData.contextItem(items[0])
29 | 
30 |     assert cti.getLiteral() == "pulmonary embolism"
31 | 
32 | 
33 | def test_contextItem_category(items):
34 |     cti = itemData.contextItem(items[1])
35 |     assert cti.getCategory() == ["probable_negated_existence"]
36 | 
37 | def test_contextItem_isa(items):
38 |     cti = itemData.contextItem(items[0])
39 |     assert cti.isA("pulmonary_embolism")
40 | 
41 | 
42 | def test_contextItem_isa1(items):
43 |     cti = itemData.contextItem(items[0])
44 |     assert cti.isA("PULMONARY_EMBOLISM")
45 | 
46 | 
47 | def test_contextItem_isa2(items):
48 |     cti = itemData.contextItem(items[1])
49 |     assert cti.isA("PROBABLE_NEGATED_EXISTENCE")
50 | 
51 | 
52 | def test_contextItem_getRE(items):
53 |     cti = itemData.contextItem(items[1])
54 |     assert cti.getRE() == r'\b%s\b'%items[1][0]
55 | 
56 | 
57 | def test_contextItem_getRE1(items):
58 |     cti = itemData.contextItem(items[0])
59 |     assert cti.getRE() == r"""pulmonary\s(artery )?(embol[a-z]+)"""
60 | 
61 | 


--------------------------------------------------------------------------------
/pyConTextNLP/tests/test_contextmarkup.py:
--------------------------------------------------------------------------------
 1 | from pyConTextNLP.ConTextMarkup import ConTextMarkup
 2 | import pytest
 3 | 
 4 | @pytest.fixture(scope="module")
 5 | def sent1():
 6 |     return 'kanso <Diagnosis>**diabetes**</Diagnosis> utesl\xf6t eller diabetes men inte s\xe4kert. Vi siktar p\xe5 en r\xf6ntgenkontroll. kan det vara nej panik\xe5ngesten\n?'
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def sent2():
10 |     return 'IMPRESSION: 1. LIMITED STUDY DEMONSTRATING NO GROSS EVIDENCE OF SIGNIFICANT PULMONARY EMBOLISM.'
11 | @pytest.fixture(scope="module")
12 | def sent3():
13 |     return 'This is a sentence that does not end with a number. But this sentence ends with 1. So this should be recognized as a third sentence.'
14 | 
15 | @pytest.fixture(scope="module")
16 | def sent4():
17 |     return 'This is a sentence with a numeric value equal to 1.43 and should not be split into two parts.'
18 | 
19 | @pytest.fixture(scope="module")
20 | def items():
21 |     return [ ["pulmonary embolism",
22 |                     "PULMONARY_EMBOLISM",
23 |                     r"""pulmonary\s(artery )?(embol[a-z]+)""",
24 |                     ""],
25 |                     ["no gross evidence of",
26 |                     "PROBABLE_NEGATED_EXISTENCE",
27 |                     "",
28 |                     "forward"]]
29 | 
30 | def test_setRawText1(sent1):
31 |     context = ConTextMarkup()
32 |     context.setRawText(sent1)
33 |     assert context.getRawText() == sent1
34 | 
35 | def test_scrub_preserve_unicode(sent1):
36 |     context = ConTextMarkup()
37 |     context.setRawText(sent1)
38 |     context.cleanText(stripNonAlphaNumeric=True)
39 |     assert context.getText().index(u'\xf6') == 40
40 | 
41 | def test_scrub_text(sent2):
42 |     context = ConTextMarkup()
43 |     context.setRawText(sent2)
44 |     context.cleanText(stripNonAlphaNumeric=True)
45 |     assert context.getText().rfind(u'.') == -1
46 | 


--------------------------------------------------------------------------------
/pyConTextNLP/tests/test_env.py:
--------------------------------------------------------------------------------
 1 | def test_yaml():
 2 |     import yaml
 3 |     assert yaml
 4 | 
 5 | def test_networkx():
 6 |     import networkx as nx
 7 |     assert nx
 8 | 
 9 | def test_networkx_v2x():
10 |     import networkx as nx
11 |     assert nx.__version__[0] == '2'
12 | 


--------------------------------------------------------------------------------
/pyConTextNLP/tests/test_helpers.py:
--------------------------------------------------------------------------------
 1 | import pyConTextNLP.helpers as helpers
 2 | import pytest
 3 | 
 4 | @pytest.fixture(scope="module")
 5 | def splitter():
 6 |     return helpers.sentenceSplitter()
 7 | 
 8 | 
 9 | def test_createSentenceSplitter():
10 |         assert helpers.sentenceSplitter()
11 | 
12 | 
13 | def test_getExceptionTerms(splitter):
14 |     assert splitter.getExceptionTerms()
15 | 
16 | 
17 | def test_addExceptionTermsWithoutCaseVariants(splitter):
18 |     splitter.addExceptionTerms("D.D.S.", "D.O.")
19 |     assert ("D.O." in splitter.getExceptionTerms())
20 |     #assert ("d.o." in splitter.getExceptionTerms())
21 | 
22 | 
23 | def test_addExceptionTermsWithCaseVariants(splitter):
24 |     splitter.addExceptionTerms("D.D.S.", "D.O.",addCaseVariants=True)
25 |     assert ("d.o." in splitter.getExceptionTerms())
26 |    
27 | 
28 | def test_deleteExceptionTermsWithoutCaseVariants(splitter):
29 |     splitter.deleteExceptionTerms("M.D.")
30 |     assert ("M.D." not in splitter.getExceptionTerms())
31 |     assert ("m.d." in splitter.getExceptionTerms())
32 | 


--------------------------------------------------------------------------------
/pyConTextNLP/tests/test_itemData.py:
--------------------------------------------------------------------------------
 1 | import pyConTextNLP.itemData as itemData
 2 | from pathlib import PurePath
 3 | import os
 4 | import pytest
 5 | 
 6 | 
 7 | @pytest.fixture(scope="session")
 8 | def get_tmp_dirs():
 9 |     pass
10 | 
11 | def test_get_fileobj_1():
12 |     fobj = PurePath(PurePath(os.path.abspath(__file__)).parent, "..", "..", "KB", "test.yml")
13 |     yaml_fo = itemData.get_fileobj(str(fobj))
14 |     assert yaml_fo
15 | 
16 | def test_get_fileobj_2():
17 |     wdir = PurePath(os.path.abspath(__file__))#, "..", "..", "KB")
18 |     fobj = PurePath(wdir.parent, "..", "..", "KB", "test.yml")
19 |     yfo = itemData.get_fileobj("file://"+str(fobj))
20 |     assert yfo
21 | 
22 | def test_get_fileobj_3():
23 |     yfo = itemData.get_fileobj(
24 |             "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/test.yml")
25 |     assert yfo
26 | 


--------------------------------------------------------------------------------
/pyConTextNLP/utils.py:
--------------------------------------------------------------------------------
 1 | """ """
 2 | 
 3 | def get_document_markups(document):
 4 |     """ Given a ConTextDocument return an ordered list of the ConTextmarkup objects consistituting the document"""
 5 |     tmp = [(e[1],e[2]['sentenceNumber']) for e in document.getDocument().edges(data=True) if
 6 |            e[2].get('category') == 'markup']
 7 |     tmp.sort(key=lambda x:x[1])
 8 |     return [t[0] for t in tmp]
 9 | 
10 | def get_section_markups(document, sectionLabel):
11 |     """ Given a ConTextDocument and sectionLabel, return an ordered list of the ConTextmarkup objects in that section"""
12 |     tmp = [(e[1],e[2]['sentenceNumber']) for e in document.getDocument().out_edges(sectionLabel, data=True) if
13 |            e[2].get('category') == 'markup']
14 |     tmp.sort(key=lambda x:x[1])
15 |     return [t[0] for t in tmp]
16 | 
17 | def conceptInDocument(document, concept):
18 |     """tests whether concept is in any nodes of document"""
19 |     pass
20 | 


--------------------------------------------------------------------------------
/pyConTextNLP/version.py:
--------------------------------------------------------------------------------
1 | __version__="0.7.0.0"
2 | 


--------------------------------------------------------------------------------
/requirements-py2.txt:
--------------------------------------------------------------------------------
1 | unicodecsv
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [wheel]
2 | universal = 1
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | from setuptools import setup, find_packages  # Always prefer setuptools over distutils
  2 | from codecs import open  # To use a consistent encoding
  3 | from os import path
  4 | 
  5 | version = {}
  6 | here = path.abspath(path.dirname(__file__))
  7 | 
  8 | 
  9 | with open(path.join("pyConTextNLP","version.py")) as f0:
 10 |     exec(f0.read(), version)
 11 | print(version)
 12 | 
 13 | # Get the long description from the relevant file
 14 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
 15 |     long_description = f.read()
 16 | 
 17 | setup(
 18 |     name='pyConTextNLP',
 19 | 
 20 |     # Versions should comply with PEP440.  For a discussion on single-sourcing
 21 |     # the version across setup.py and the project code, see
 22 |     # https://packaging.python.org/en/latest/single_source_version.html
 23 |     version=version["__version__"],
 24 | 
 25 |     description='A Python implementation of the ConText algorithm',
 26 |     long_description=long_description,
 27 | 
 28 |     # The project's main homepage.
 29 |     url='https://github.com/chapmanbe/pyConTextNLP',
 30 | 
 31 |     # Author details
 32 |     author='Brian Chapman',
 33 | 
 34 |     author_email='brian.chapman@utah.edu',
 35 | 
 36 |     # Choose your license
 37 |     license='http://www.apache.org/licenses/LICENSE-2.0',
 38 | 
 39 |     # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
 40 |     classifiers=[
 41 |         # How mature is this project? Common values are
 42 |         #   3 - Alpha
 43 |         #   4 - Beta
 44 |         #   5 - Production/Stable
 45 |         'Development Status :: 4 - Beta',
 46 | 
 47 |         # Indicate who your project is intended for
 48 |         #'Intended Audience :: Students',
 49 |         #'Topic :: Software Development :: Build Tools',
 50 | 
 51 |         # Pick your license as you wish (should match "license" above)
 52 |         #'License :: OSI Approved :: Apache2',
 53 | 
 54 |         # Specify the Python versions you support here. In particular, ensure
 55 |         # that you indicate whether you support Python 2, Python 3 or both.
 56 |         #'Programming Language :: Python :: 2',
 57 |         #'Programming Language :: Python :: 2.6',
 58 |         'Programming Language :: Python :: 3',
 59 |     ],
 60 | 
 61 |     # What does your project relate to?
 62 |     keywords='ConText NLP',
 63 | 
 64 |     # You can just specify the packages manually here if your project is
 65 |     # simple. Or you can use find_packages().
 66 |     packages=find_packages(exclude=['contrib', 
 67 |                                     'docs', 
 68 |                                     'pyConText',
 69 |                                     'tests*']),
 70 | 
 71 |     # List run-time dependencies here.  These will be installed by pip when your
 72 |     # project is installed. For an analysis of "install_requires" vs pip's
 73 |     # requirements files see:
 74 |     # https://packaging.python.org/en/latest/requirements.html
 75 |     install_requires=['networkx', 'pyyaml'],
 76 | 
 77 |     # List additional groups of dependencies here (e.g. development dependencies).
 78 |     # You can install these using the following syntax, for example:
 79 |     # $ pip install -e .[dev,test]
 80 |     extras_require = {
 81 |         'dev': ['check-manifest'],
 82 |         'test': ['coverage'],
 83 |     },
 84 | 
 85 |     # If there are data files included in your packages that need to be
 86 |     # installed, specify them here.  If using Python 2.6 or less, then these
 87 |     # have to be included in MANIFEST.in as well.
 88 |     package_data={
 89 |     },
 90 | 
 91 |     # Although 'package_data' is the preferred approach, in some case you may
 92 |     # need to place data files outside of your packages.
 93 |     # see http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files
 94 |     # In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
 95 |     #data_files=[('my_data', ['data/data_file'])],
 96 | 
 97 |     # To provide executable scripts, use entry points in preference to the
 98 |     # "scripts" keyword. Entry points provide cross-platform support and allow
 99 |     # pip to create the appropriate form of executable for the target platform.
100 |     #entry_points={
101 |     #    'console_scripts': [
102 |     #        'sample=sample:main',
103 |     #    ],
104 |     #},
105 | )
106 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/tests/__init__.py


--------------------------------------------------------------------------------
/tests/pyConTextNLP/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/tests/pyConTextNLP/__init__.py


--------------------------------------------------------------------------------
/tests/pyConTextNLP/display/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/74e5b9bc4de092dd8bef8e04b297369a7f3ea6d9/tests/pyConTextNLP/display/__init__.py


--------------------------------------------------------------------------------
/tests/pyConTextNLP/tests2.py:
--------------------------------------------------------------------------------
 1 | from pyConTextNLP import pyConTextGraph as pyConText
 2 | import networkx as nx
 3 | 
 4 | def test_ConTextMarkup():
 5 |     assert isinstance(pyConText.ConTextMarkup(), nx.DiGraph)
 6 | 
 7 | 
 8 | def markup_sentence(s, modifiers, targets, prune_inactive=True):
 9 |     """
10 |     """
11 |     markup = pyConText.ConTextMarkup()
12 |     markup.setRawText(s)
13 |     markup.cleanText()
14 |     markup.markItems(modifiers, mode="modifier")
15 |     markup.markItems(targets, mode="target")
16 |     markup.pruneMarks()
17 |     markup.dropMarks('Exclusion')
18 |     # apply modifiers to any targets within the modifiers scope
19 |     markup.applyModifiers()
20 |     markup.pruneSelfModifyingRelationships()
21 |     if prune_inactive:
22 |         markup.dropInactiveModifiers()
23 |     return markup
24 | 


--------------------------------------------------------------------------------