├── LICENSE ├── README.md ├── __init__.py ├── clinical_sectionizer ├── __init__.py ├── _version.py ├── sectionizer.py ├── text_sectionizer.py └── util.py ├── img └── viz_ent.png ├── notebooks ├── 00-clinical_sectionizer.ipynb ├── 01-adding_sections.ipynb ├── 02-text_sectionizer.ipynb ├── 03-subsections.ipynb ├── example_discharge_summary.txt └── with_compile_flags.ipynb ├── resources ├── patrick_section_patterns.json ├── spacy_section_patterns.jsonl └── text_section_patterns.jsonl ├── setup.py └── tests ├── test_sectionizer.py └── test_textsectionizer.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 medspacy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Clinical Sectionizer 2 | This package offers a component for tagging clinical section titles in docs. 3 | 4 | # This package is deprecated! 5 | Development for `clinical_sectionizer` has been moved to [medSpaCy](https://github.com/medspacy/medspacy) and should now be installed as: 6 | 7 | ```bash 8 | pip install medspacy 9 | ``` 10 | 11 | ```python 12 | # Option 1: Load with a medspacy pipeline 13 | import medspacy 14 | nlp = medspacy.load(enable=["sectionizer"]) 15 | print(nlp.pipe_names) 16 | 17 | # Option 2: Manually add to a spaCy model 18 | import spacy 19 | from medspacy.section_detection import Sectionizer 20 | nlp = spacy.load("en_core_web_sm") 21 | nlp.add_pipe(Sectionizer(nlp)) 22 | ``` 23 | [

](https://github.com/medspacy/medspacy) 24 | 25 | Please see the [medSpaCy](https://github.com/medspacy/medspacy) GitHub page for additional information and documentation. 26 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/medspacy/sectionizer/83e1e484a3f067b105c72d838bf439ce7695ad55/__init__.py -------------------------------------------------------------------------------- /clinical_sectionizer/__init__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.simplefilter('once', DeprecationWarning) 3 | warnings.warn("clinical_sectionizer is now *deprecated*. Please use medspacy.section_detection instead: `pip install medspacy`", RuntimeWarning) 4 | 5 | from .sectionizer import Sectionizer 6 | from .text_sectionizer import TextSectionizer 7 | 8 | from ._version import __version__ 9 | 10 | __all__ = ["Sectionizer", "TextSectionizer"] 11 | -------------------------------------------------------------------------------- /clinical_sectionizer/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.0.1' 2 | -------------------------------------------------------------------------------- /clinical_sectionizer/sectionizer.py: -------------------------------------------------------------------------------- 1 | from spacy.tokens import Doc, Token, Span 2 | from spacy.matcher import Matcher, PhraseMatcher 3 | 4 | # Filepath to default rules which are included in package 5 | from os import path 6 | from pathlib import Path 7 | import re 8 | import warnings 9 | 10 | from . import util 11 | 12 | Doc.set_extension("sections", default=list(), force=True) 13 | Doc.set_extension("section_titles", getter=util.get_section_titles, force=True) 14 | Doc.set_extension( 15 | "section_headers", getter=util.get_section_headers, force=True 16 | ) 17 | Doc.set_extension("section_spans", getter=util.get_section_spans, force=True) 18 | Doc.set_extension( 19 | "section_parents", getter=util.get_section_parents, force=True 20 | ) 21 | 22 | Token.set_extension("section_span", default=None, force=True) 23 | Token.set_extension("section_title", default=None, force=True) 24 | Token.set_extension("section_header", default=None, force=True) 25 | Token.set_extension("section_parent", default=None, force=True) 26 | 27 | # Set span attributes to the attribute of the first token 28 | # in case there is some overlap between a span and a new section header 29 | Span.set_extension( 30 | "section_span", getter=lambda x: x[0]._.section_span, force=True 31 | ) 32 | Span.set_extension( 33 | "section_title", getter=lambda x: x[0]._.section_title, force=True 34 | ) 35 | Span.set_extension( 36 | "section_header", getter=lambda x: x[0]._.section_header, force=True 37 | ) 38 | Span.set_extension( 39 | "section_parent", getter=lambda x: x[0]._.section_header, force=True 40 | ) 41 | 42 | DEFAULT_RULES_FILEPATH = path.join( 43 | Path(__file__).resolve().parents[1], 44 | "resources", 45 | "spacy_section_patterns.jsonl", 46 | ) 47 | 48 | DEFAULT_ATTRS = { 49 | "past_medical_history": {"is_historical": True}, 50 | "sexual_and_social_history": {"is_historical": True}, 51 | "family_history": {"is_family": True}, 52 | "patient_instructions": {"is_hypothetical": True}, 53 | "education": {"is_hypothetical": True}, 54 | "allergy": {"is_hypothetical": True}, 55 | } 56 | from collections import namedtuple 57 | Section = namedtuple("Section", field_names=["section_title", 58 | "section_header", 59 | "section_parent", 60 | "section_span"]) 61 | 62 | 63 | class Sectionizer: 64 | name = "sectionizer" 65 | 66 | def __init__( 67 | self, 68 | nlp, 69 | patterns="default", 70 | add_attrs=False, 71 | max_scope=None, 72 | phrase_matcher_attr="LOWER", 73 | require_start_line=False, 74 | require_end_line=False, 75 | newline_pattern=r"[\n\r]+[\s]*$", 76 | ): 77 | """Create a new Sectionizer component. The sectionizer will search for spans in the text which 78 | match section header patterns, such as 'Past Medical History:'. Sections will be represented 79 | in custom attributes as: 80 | section_title (str): A normalized title of the section. Example: 'past_medical_history' 81 | section_header (Span): The Span of the doc which was matched as a section header. 82 | Example: 'Past Medical History:' 83 | section (Span): The entire section of the note, starting with section_header and up until the end 84 | of the section, which will be either the start of the next section header of some pre-specified 85 | scope. Example: 'Past Medical History: Type II DM' 86 | 87 | Section attributes will be registered for each Doc, Span, and Token in the following attributes: 88 | Doc._.sections: A list of namedtuples of type Section with 4 elements: 89 | - section_title 90 | - section_header 91 | - section_parent 92 | - section_span. 93 | A Doc will also have attributes corresponding to lists of each 94 | (ie., Doc._.section_titles, Doc._.section_headers, Doc._.section_parents, Doc._.section_spans) 95 | (Span|Token)._.section_title 96 | (Span|Token)._.section_header 97 | (Span|Token)._.section_parent 98 | (Span|Token)._.section_span 99 | 100 | Args: 101 | nlp: A SpaCy language model object 102 | patterns (str, list, or None): Where to read patterns from. Default is "default", which will 103 | load the default patterns provided by medSpaCy, which are derived from MIMIC-II. 104 | If a list, should be a list of pattern dicts following these conventional spaCy formats: 105 | [ 106 | {"section_title": "past_medical_history", "pattern": "Past Medical History:"}, 107 | {"section_title": "problem_list", "pattern": [{"TEXT": "PROBLEM"}, {"TEXT": "LIST"}, {"TEXT": ":"}]} 108 | ] 109 | If a string other than "default", should be a path to a jsonl file containing patterns. 110 | max_scope (None or int): Optional argument specifying the maximum number of tokens following a section header 111 | which can be included in a section. This can be useful if you think your section patterns are incomplete 112 | and want to prevent sections from running too long in the note. Default is None, meaning that the scope 113 | of a section will be until either the next section header or the end of the document. 114 | phrase_matcher_attr (str): The name of the token attribute which will be used by the PhraseMatcher 115 | for any patterns with a "pattern" value of a string. 116 | require_start_line (bool): Optionally require a section header to start on a new line. Default False. 117 | require_end_line (bool): Optionally require a section header to end with a new line. Default False. 118 | newline_pattern (str): Regular expression to match the new line either preceding or following a header 119 | if either require_start_line or require_end_line are True. 120 | """ 121 | self.nlp = nlp 122 | self.add_attrs = add_attrs 123 | self.matcher = Matcher(nlp.vocab) 124 | self.max_scope = max_scope 125 | self.phrase_matcher = PhraseMatcher( 126 | nlp.vocab, attr=phrase_matcher_attr 127 | ) 128 | self.require_start_line = require_start_line 129 | self.require_end_line = require_end_line 130 | self.newline_pattern = re.compile(newline_pattern) 131 | self.assertion_attributes_mapping = None 132 | self._patterns = [] 133 | self._section_titles = set() 134 | self._parent_sections = {} 135 | self._parent_required = {} 136 | 137 | if patterns is not None: 138 | if patterns == "default": 139 | import os 140 | 141 | if not os.path.exists(DEFAULT_RULES_FILEPATH): 142 | raise FileNotFoundError( 143 | "The expected location of the default patterns file cannot be found. Please either " 144 | "add patterns manually or add a jsonl file to the following location: ", 145 | DEFAULT_RULES_FILEPATH, 146 | ) 147 | self.add(self.load_patterns_from_jsonl(DEFAULT_RULES_FILEPATH)) 148 | # If a list, add each of the patterns in the list 149 | elif isinstance(patterns, list): 150 | self.add(patterns) 151 | elif isinstance(patterns, str): 152 | import os 153 | 154 | assert os.path.exists(patterns) 155 | self.add(self.load_patterns_from_jsonl(patterns)) 156 | 157 | if add_attrs is False: 158 | self.add_attrs = False 159 | elif add_attrs is True: 160 | self.assertion_attributes_mapping = DEFAULT_ATTRS 161 | self.register_default_attributes() 162 | elif isinstance(add_attrs, dict): 163 | # Check that each of the attributes being added has been set 164 | for modifier in add_attrs.keys(): 165 | attr_dict = add_attrs[modifier] 166 | for attr_name, attr_value in attr_dict.items(): 167 | if not Span.has_extension(attr_name): 168 | raise ValueError( 169 | "Custom extension {0} has not been set. Call Span.set_extension." 170 | ) 171 | 172 | self.add_attrs = True 173 | self.assertion_attributes_mapping = add_attrs 174 | 175 | else: 176 | raise ValueError( 177 | "add_attrs must be either True (default), False, or a dictionary, not {0}".format( 178 | add_attrs 179 | ) 180 | ) 181 | 182 | @property 183 | def patterns(self): 184 | return self._patterns 185 | 186 | @property 187 | def section_titles(self): 188 | return self._section_titles 189 | 190 | @classmethod 191 | def load_patterns_from_jsonl(self, filepath): 192 | 193 | import json 194 | 195 | patterns = [] 196 | with open(filepath) as f: 197 | for line in f: 198 | if line.startswith("//"): 199 | continue 200 | patterns.append(json.loads(line)) 201 | 202 | return patterns 203 | 204 | def register_default_attributes(self): 205 | """Register the default values for the Span attributes defined in DEFAULT_ATTRS.""" 206 | for attr_name in [ 207 | "is_negated", 208 | "is_uncertain", 209 | "is_historical", 210 | "is_hypothetical", 211 | "is_family", 212 | ]: 213 | try: 214 | Span.set_extension(attr_name, default=False) 215 | except ValueError: # Extension already set 216 | pass 217 | 218 | def add(self, patterns): 219 | """Add a list of patterns to the clinical_sectionizer. Each pattern should be a dictionary with 220 | two keys: 221 | 'section': The normalized section name of the section, such as 'pmh'. 222 | 'pattern': The spaCy pattern matching a span of text. 223 | Either a string for exact matching (case insensitive) 224 | or a list of dicts. 225 | 226 | Example: 227 | >>> patterns = [ \ 228 | {"section_title": "past_medical_history", "pattern": "pmh"}\ 229 | {"section_title": "past_medical_history", "pattern": [{"LOWER": "past", "OP": "?"}, \ 230 | {"LOWER": "medical"}, \ 231 | {"LOWER": "history"}]\ 232 | },\ 233 | {"section_title": "assessment_and_plan", "pattern": "a/p:"}\ 234 | ] 235 | >>> clinical_sectionizer.add(patterns) 236 | """ 237 | for pattern_dict in patterns: 238 | name = pattern_dict["section_title"] 239 | pattern = pattern_dict["pattern"] 240 | parents = [] 241 | parent_required = False 242 | if "parents" in pattern_dict.keys(): 243 | parents = pattern_dict["parents"] 244 | 245 | if "parent_required" in pattern_dict.keys(): 246 | if not parents: 247 | raise ValueError( 248 | "Jsonl file incorrectly formatted for pattern name {0}. If parents are required, then at least one parent must be specified.".format( 249 | name 250 | ) 251 | ) 252 | parent_required = pattern_dict["parent_required"] 253 | 254 | if isinstance(pattern, str): 255 | self.phrase_matcher.add(name, None, self.nlp.make_doc(pattern)) 256 | else: 257 | self.matcher.add(name, [pattern]) 258 | self._patterns.append(pattern_dict) 259 | self._section_titles.add(name) 260 | 261 | if name in self._parent_sections.keys() and parents != []: 262 | warnings.warn( 263 | "Duplicate section title {0}. Merging parents. If this is not indended, please specify distinc titles.".format( 264 | name 265 | ), 266 | RuntimeWarning, 267 | ) 268 | self._parent_sections[name].update(parents) 269 | else: 270 | self._parent_sections[name] = set(parents) 271 | 272 | if ( 273 | name in self._parent_required.keys() 274 | and self._parent_required[name] != parent_required 275 | ): 276 | warnings.warn( 277 | "Duplicate section title {0} has different parent_required option. Setting parent_required to False.".format( 278 | name 279 | ), 280 | RuntimeWarning, 281 | ) 282 | self._parent_required[name] = False 283 | else: 284 | self._parent_required[name] = parent_required 285 | 286 | def set_parent_sections(self, sections): 287 | """Determine the legal parent-child section relationships from the list 288 | of in-order sections of a document and the possible parents of each 289 | section as specified during rule creation. 290 | 291 | Args: 292 | sections: a list of spacy match tuples found in the doc 293 | """ 294 | sections_final = [] 295 | removed_sections = 0 296 | for i, (match_id, start, end) in enumerate(sections): 297 | name = self.nlp.vocab.strings[match_id] 298 | required = self._parent_required[name] 299 | i_a = i - removed_sections # adjusted index for removed values 300 | if required and i_a == 0: 301 | removed_sections += 1 302 | continue 303 | elif i_a == 0 or name not in self._parent_sections.keys(): 304 | sections_final.append((match_id, start, end, None)) 305 | else: 306 | parents = self._parent_sections[name] 307 | identified_parent = None 308 | for parent in parents: 309 | # go backwards through the section "tree" until you hit a root or the start of the list 310 | candidate = self.nlp.vocab.strings[ 311 | sections_final[i_a - 1][0] 312 | ] 313 | candidates_parent = sections_final[i_a - 1][3] 314 | candidate_i = i_a - 1 315 | while candidate: 316 | if candidate == parent: 317 | identified_parent = parent 318 | candidate = None 319 | else: 320 | # if you are at the end of the list... no parent 321 | if candidate_i < 1: 322 | candidate = None 323 | continue 324 | # if the current candidate has no parent... no parent exists 325 | if not candidates_parent: 326 | candidate = None 327 | continue 328 | # otherwise get the previous item in the list 329 | temp = self.nlp.vocab.strings[ 330 | sections_final[candidate_i - 1][0] 331 | ] 332 | temp_parent = sections_final[candidate_i - 1][3] 333 | # if the previous item is the parent of the current item 334 | # OR if the previous item is a sibling of the current item 335 | # continue to search 336 | if ( 337 | temp == candidates_parent 338 | or temp_parent == candidates_parent 339 | ): 340 | candidate = temp 341 | candidates_parent = temp_parent 342 | candidate_i -= 1 343 | # otherwise, there is no further tree traversal 344 | else: 345 | candidate = None 346 | 347 | # if a parent is required, then add 348 | if identified_parent or not required: 349 | # if the parent is identified, add section 350 | # if the parent is not required, add section 351 | # if parent is not identified and required, do not add the section 352 | sections_final.append( 353 | (match_id, start, end, identified_parent) 354 | ) 355 | else: 356 | removed_sections += 1 357 | return sections_final 358 | 359 | def set_assertion_attributes(self, ents): 360 | """Add Span-level attributes to entities based on which section they occur in. 361 | 362 | Args: 363 | edges: the edges to modify 364 | 365 | """ 366 | for ent in ents: 367 | if ent._.section_title in self.assertion_attributes_mapping: 368 | attr_dict = self.assertion_attributes_mapping[ 369 | ent._.section_title 370 | ] 371 | for (attr_name, attr_value) in attr_dict.items(): 372 | setattr(ent._, attr_name, attr_value) 373 | 374 | def __call__(self, doc): 375 | matches = self.matcher(doc) 376 | matches += self.phrase_matcher(doc) 377 | if self.require_start_line: 378 | matches = self.filter_start_lines(doc, matches) 379 | if self.require_end_line: 380 | matches = self.filter_end_lines(doc, matches) 381 | matches = prune_overlapping_matches(matches) 382 | matches = self.set_parent_sections(matches) 383 | # If this has already been processed by the sectionizer, reset the sections 384 | doc._.sections = [] 385 | if len(matches) == 0: 386 | doc._.sections.append((None, None, None, doc[0:])) 387 | return doc 388 | 389 | first_match = matches[0] 390 | section_spans = [] 391 | if first_match[1] != 0: 392 | section_spans.append(Section(None, None, None, doc[0 : first_match[1]])) 393 | for i, match in enumerate(matches): 394 | (match_id, start, end, parent) = match 395 | section_header = doc[start:end] 396 | name = self.nlp.vocab.strings[match_id] 397 | # If this is the last match, it should include the rest of the doc 398 | if i == len(matches) - 1: 399 | if self.max_scope is None: 400 | section_spans.append( 401 | Section(name, section_header, parent, doc[start:]) 402 | ) 403 | else: 404 | scope_end = min(end + self.max_scope, doc[-1].i) 405 | section_spans.append( 406 | Section(name, section_header, parent, doc[start:scope_end]) 407 | ) 408 | # Otherwise, go until the next section header 409 | else: 410 | next_match = matches[i + 1] 411 | _, next_start, _, _ = next_match 412 | if self.max_scope is None: 413 | section_spans.append( 414 | Section(name, section_header, parent, doc[start:next_start]) 415 | ) 416 | else: 417 | scope_end = min(end + self.max_scope, next_start) 418 | section_spans.append( 419 | Section(name, section_header, parent, doc[start:scope_end]) 420 | ) 421 | 422 | # section_spans_with_parent = self.set_parent_sections(section_spans) 423 | 424 | # if there are no sections after required rules remove them, add one section over the entire document and exit 425 | # if len(section_spans_with_parent) == 0: 426 | # doc._.sections.append((None, None, None, doc[0:])) 427 | # return doc 428 | 429 | for section_tuple in section_spans: 430 | name, header, parent, section = section_tuple 431 | doc._.sections.append(section_tuple) 432 | for token in section: 433 | token._.section_span = section 434 | token._.section_title = name 435 | token._.section_header = header 436 | token._.section_parent = parent 437 | 438 | # If it is specified to add assertion attributes, 439 | # iterate through the entities in doc and add them 440 | if self.add_attrs is True: 441 | self.set_assertion_attributes(doc.ents) 442 | return doc 443 | 444 | def filter_start_lines(self, doc, matches): 445 | "Filter a list of matches to only contain spans where the start token is the beginning of a new line." 446 | return [ 447 | m 448 | for m in matches 449 | if util.is_start_line(m[1], doc, self.newline_pattern) 450 | ] 451 | 452 | def filter_end_lines(self, doc, matches): 453 | "Filter a list of matches to only contain spans where the start token is followed by a new line." 454 | return [ 455 | m 456 | for m in matches 457 | if util.is_end_line(m[2] - 1, doc, self.newline_pattern) 458 | ] 459 | 460 | 461 | def prune_overlapping_matches(matches, strategy="longest"): 462 | if strategy != "longest": 463 | raise NotImplementedError() 464 | 465 | # Make a copy and sort 466 | unpruned = sorted(matches, key=lambda x: (x[1], x[2])) 467 | pruned = [] 468 | num_matches = len(matches) 469 | if num_matches == 0: 470 | return matches 471 | curr_match = unpruned.pop(0) 472 | 473 | while True: 474 | if len(unpruned) == 0: 475 | pruned.append(curr_match) 476 | break 477 | next_match = unpruned.pop(0) 478 | 479 | # Check if they overlap 480 | if overlaps(curr_match, next_match): 481 | # Choose the larger span 482 | longer_span = max( 483 | curr_match, next_match, key=lambda x: (x[2] - x[1]) 484 | ) 485 | pruned.append(longer_span) 486 | if len(unpruned) == 0: 487 | break 488 | curr_match = unpruned.pop(0) 489 | else: 490 | pruned.append(curr_match) 491 | curr_match = next_match 492 | # Recursive base point 493 | if len(pruned) == num_matches: 494 | return pruned 495 | # Recursive function call 496 | else: 497 | return prune_overlapping_matches(pruned) 498 | 499 | 500 | def overlaps(a, b): 501 | if _span_overlaps(a, b) or _span_overlaps(b, a): 502 | return True 503 | return False 504 | 505 | 506 | def _span_overlaps(a, b): 507 | _, a_start, a_end = a 508 | _, b_start, b_end = b 509 | if a_start >= b_start and a_start < b_end: 510 | return True 511 | if a_end > b_start and a_end <= b_end: 512 | return True 513 | return False 514 | 515 | 516 | def matches_to_spans(doc, matches, set_label=True): 517 | spans = [] 518 | for (rule_id, start, end) in matches: 519 | if set_label: 520 | label = doc.vocab.strings[rule_id] 521 | else: 522 | label = None 523 | spans.append(Span(doc, start=start, end=end, label=label)) 524 | return spans 525 | -------------------------------------------------------------------------------- /clinical_sectionizer/text_sectionizer.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | # Filepath to default rules which are included in package 4 | from os import path 5 | from pathlib import Path 6 | 7 | DEFAULT_RULES_FILEPATH = path.join( 8 | Path(__file__).resolve().parents[1], 9 | "resources", 10 | "text_section_patterns.jsonl", 11 | ) 12 | 13 | 14 | class TextSectionizer: 15 | name = "text_sectionizer" 16 | 17 | def __init__(self, patterns="default"): 18 | self._patterns = [] 19 | self._compiled_patterns = dict() 20 | self._section_titles = set() 21 | 22 | if patterns is not None: 23 | if patterns == "default": 24 | import os 25 | 26 | if not os.path.exists(DEFAULT_RULES_FILEPATH): 27 | raise FileNotFoundError( 28 | "The expected location of the default patterns file cannot be found. Please either " 29 | "add patterns manually or add a jsonl file to the following location: ", 30 | DEFAULT_RULES_FILEPATH, 31 | ) 32 | self.add(self.load_patterns_from_jsonl(DEFAULT_RULES_FILEPATH)) 33 | # If a list, add each of the patterns in the list 34 | elif isinstance(patterns, list): 35 | self.add(patterns) 36 | elif isinstance(patterns, str): 37 | import os 38 | 39 | assert os.path.exists(patterns) 40 | self.add(self.load_patterns_from_jsonl(patterns)) 41 | 42 | def add(self, patterns, cflags=None): 43 | """ 44 | Add compiled regular expressions defined in patterns 45 | 46 | Positional arguments: 47 | - patterns -- 48 | 49 | Keyword arguments: 50 | - cflags -- a list of regular expression compile flags 51 | If cflags==None then cflags is set to [re.I] 52 | """ 53 | 54 | def _mycomp(rex, flags=None): 55 | 56 | if flags is None: 57 | flags = [re.I] 58 | cflags = 0 59 | for f in flags: 60 | if isinstance(f, re.RegexFlag): 61 | cflags = cflags | f 62 | return re.compile(rex, flags=cflags) 63 | 64 | for pattern_dict in patterns: 65 | name = pattern_dict["section_title"] 66 | pattern = pattern_dict["pattern"] 67 | if isinstance(pattern, str): 68 | self._compiled_patterns.setdefault(name, []) 69 | self._compiled_patterns[name].append( 70 | _mycomp(pattern, flags=cflags) 71 | ) 72 | else: 73 | # TODO: Change the default patterns 74 | # continue 75 | raise ValueError( 76 | "Patterns added to the TextSectionizer must be strings", 77 | pattern_dict, 78 | ) 79 | self._patterns.append(pattern_dict) 80 | self._section_titles.add(name) 81 | 82 | @property 83 | def patterns(self): 84 | return self._patterns 85 | 86 | @property 87 | def section_titles(self): 88 | return self._section_titles 89 | 90 | @classmethod 91 | def load_patterns_from_jsonl(self, filepath): 92 | 93 | import json 94 | 95 | patterns = [] 96 | with open(filepath) as f: 97 | for line in f: 98 | patterns.append(json.loads(line)) 99 | 100 | return patterns 101 | 102 | def __call__(self, text): 103 | matches = [] 104 | for (name, patterns) in self._compiled_patterns.items(): 105 | for pattern in patterns: 106 | pattern_matches = list(pattern.finditer(text)) 107 | for match in pattern_matches: 108 | matches.append((name, match)) 109 | 110 | if len(matches) == 0: 111 | return [(None, None, text)] 112 | 113 | matches = sorted(matches, key=lambda x: (x[1].start(), 0 - x[1].end())) 114 | matches = self._dedup_matches(matches) 115 | 116 | sections = [] 117 | # If the first section doesn't start at the very beginning, 118 | # add an unknown section at the beginning 119 | if matches[0][1].start() != 0: 120 | sections.append((None, None, text[: matches[0][1].start()])) 121 | 122 | for i, (section_title, match) in enumerate(matches): 123 | section_header = match.group() 124 | # If this is the final section, it should include the rest of the text 125 | if i == len(matches) - 1: 126 | section_text = text[match.start() :] 127 | sections.append((section_title, section_header, section_text)) 128 | # Otherwise, it will include all of the text up until the next section header 129 | else: 130 | next_match = matches[i + 1][1] 131 | section_text = text[match.start() : next_match.start()] 132 | sections.append((section_title, section_header, section_text)) 133 | return sections 134 | 135 | def extract_sections(self, text): 136 | matches = [] 137 | for name, sect_patterns in self.patterns.items(): 138 | for pattern in sect_patterns: 139 | sect_matches = list(pattern.finditer(text)) 140 | for match in sect_matches: 141 | matches.append((name, match)) 142 | if len(matches) == 0: 143 | return [(None, text)] 144 | 145 | matches = sorted(matches, key=lambda x: (x[1].start(), 0 - x[1].end())) 146 | matches = self._dedup_matches(matches) 147 | 148 | sections = [] 149 | if matches[0][1].start() != 0: 150 | sections.append(("UNK", text[: matches[0][1].start()])) 151 | for i, (name, match) in enumerate(matches): 152 | if i == len(matches) - 1: 153 | sections.append((name, text[match.start() :])) 154 | else: 155 | next_match = matches[i + 1][1] 156 | sections.append( 157 | (name, text[match.start() : next_match.start()]) 158 | ) 159 | 160 | return sections 161 | 162 | def _dedup_matches(self, matches): 163 | deduped = [] 164 | # TODO: Make this smarter 165 | deduped.append(matches[0]) 166 | for i, match in enumerate(matches[1:], start=1): 167 | if not self._overlaps(deduped[-1], match): 168 | deduped.append(match) 169 | return deduped 170 | 171 | def _overlaps(self, a, b): 172 | (_, a) = a 173 | (_, b) = b 174 | if a.start() <= b.start() < a.end(): 175 | return True 176 | if b.start() <= a.start() < b.end(): 177 | return True 178 | -------------------------------------------------------------------------------- /clinical_sectionizer/util.py: -------------------------------------------------------------------------------- 1 | NEWLINE_PATTERN = r"[\n\r]+[\s]*$" 2 | 3 | 4 | def get_section_titles(doc): 5 | return [title for (title, _, _, _) in doc._.sections] 6 | 7 | 8 | def get_section_headers(doc): 9 | return [header for (_, header, _, _) in doc._.sections] 10 | 11 | 12 | def get_section_parents(doc): 13 | return [parent for (_, _, parent, _,) in doc._.sections] 14 | 15 | 16 | def get_section_spans(doc): 17 | return [span for (_, _, _, span) in doc._.sections] 18 | 19 | 20 | def is_start_line(idx, doc, pattern): 21 | # If it's the start of the doc, return True 22 | if idx == 0: 23 | return True 24 | # Otherwise, check if the preceding token ends with newlines 25 | preceding_text = doc[idx - 1].text_with_ws 26 | return pattern.search(preceding_text) is not None 27 | 28 | 29 | def is_end_line(idx, doc, pattern): 30 | # If it's the end of the doc, return True 31 | if idx == len(doc) - 1: 32 | return True 33 | 34 | # Check if either the token has trailing newlines, 35 | # or if the next token is a newline 36 | text = doc[idx].text_with_ws 37 | if pattern.search(text) is not None: 38 | return True 39 | following_text = doc[idx + 1].text_with_ws 40 | return pattern.search(following_text) is not None 41 | -------------------------------------------------------------------------------- /img/viz_ent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/medspacy/sectionizer/83e1e484a3f067b105c72d838bf439ce7695ad55/img/viz_ent.png -------------------------------------------------------------------------------- /notebooks/01-adding_sections.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Adding to the Sectionizer\n", 8 | "\n", 9 | "By default, `clinical_sectionizer` comes with a number of built-in patterns. However, this is a non-exhaustive list and your data will almost certainly contain a number of sections which aren't captured by the default patterns. \n", 10 | "\n", 11 | "In this notebook, we'll see how to add custom section patterns to our clinical sectionizer to recognize section headers which are not contained in the default knowledge base." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "%load_ext autoreload\n", 21 | "%autoreload 2" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import sys" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "sys.path.insert(0, \"..\")" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import spacy\n", 49 | "from clinical_sectionizer import Sectionizer\n", 50 | "from medspacy.visualization import visualize_ent \n" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 5, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "name": "stderr", 60 | "output_type": "stream", 61 | "text": [ 62 | "/Users/alecchapman/opt/anaconda3/envs/medspacy-37/lib/python3.7/site-packages/spacy/util.py:275: UserWarning: [W031] Model 'en_info_3700_i2b2_2012' (0.1.0) requires spaCy v2.2 and is incompatible with the current spaCy version (2.3.2). This may lead to unexpected results or runtime errors. To resolve this, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate\n", 63 | " warnings.warn(warn_msg)\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "nlp = spacy.load(\"en_info_3700_i2b2_2012\")" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 6, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "sectionizer = Sectionizer(nlp)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 7, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "nlp.add_pipe(sectionizer)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 8, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/plain": [ 97 | "['tagger', 'parser', 'ner', 'sectionizer']" 98 | ] 99 | }, 100 | "execution_count": 8, 101 | "metadata": {}, 102 | "output_type": "execute_result" 103 | } 104 | ], 105 | "source": [ 106 | "nlp.pipe_names" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "## Available default sections\n", 114 | "You can see which section titles are available in the `sectionizer` by the `sectionizer.section_titles` property:" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 9, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "{'addendum',\n", 126 | " 'allergies',\n", 127 | " 'chief_complaint',\n", 128 | " 'comments',\n", 129 | " 'diagnoses',\n", 130 | " 'family_history',\n", 131 | " 'history_of_present_illness',\n", 132 | " 'hospital_course',\n", 133 | " 'imaging',\n", 134 | " 'labs_and_studies',\n", 135 | " 'medications',\n", 136 | " 'neurological',\n", 137 | " 'observation_and_plan',\n", 138 | " 'other',\n", 139 | " 'past_medical_history',\n", 140 | " 'patient_education',\n", 141 | " 'patient_instructions',\n", 142 | " 'physical_exam',\n", 143 | " 'problem_list',\n", 144 | " 'reason_for_examination',\n", 145 | " 'signature',\n", 146 | " 'social_history'}" 147 | ] 148 | }, 149 | "execution_count": 9, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "sectionizer.section_titles" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "You can also view the patterns in `sectionizer.patterns`. This will be explained more below." 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 10, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "[{'section_title': 'addendum', 'pattern': 'ADDENDUM:'},\n", 174 | " {'section_title': 'addendum', 'pattern': 'Addendum:'},\n", 175 | " {'section_title': 'allergies', 'pattern': 'ALLERGIC REACTIONS:'},\n", 176 | " {'section_title': 'allergies', 'pattern': 'ALLERGIES:'},\n", 177 | " {'section_title': 'chief_complaint', 'pattern': 'CC:'}]" 178 | ] 179 | }, 180 | "execution_count": 10, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "sectionizer.patterns[:5]" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "In this example, we'll use a smaller section of the note before:" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 11, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "text = \"\"\"\n", 203 | "Admission Date: [**2573-5-30**] Discharge Date: [**2573-7-1**]\n", 204 | " \n", 205 | "Date of Birth: [**2498-8-19**] Sex: F\n", 206 | " \n", 207 | "Service: SURGERY\n", 208 | " \n", 209 | "Allergies: \n", 210 | "Hydrochlorothiazide\n", 211 | " \n", 212 | "Attending:[**First Name3 (LF) 1893**] \n", 213 | "Chief Complaint:\n", 214 | "Abdominal pain\n", 215 | "\n", 216 | "\n", 217 | "Pertinent Results:\n", 218 | "[**2573-5-30**] 09:10PM BLOOD WBC-19.2*# RBC-4.81 Hgb-15.5 Hct-44.0 \n", 219 | "MCV-92 MCH-32.3* MCHC-35.2* RDW-13.3 Plt Ct-230\n", 220 | "[**2573-5-30**] 09:10PM BLOOD Neuts-87* Bands-10* Lymphs-3* Monos-0 \n", 221 | "\"\"\"" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 12, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "doc = nlp(text)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 13, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/html": [ 241 | "

Admission Date: [**2573-5-30**] Discharge Date: [**2573-7-1**]

Date of Birth: [**2498-8-19**] Sex: F

\n", 242 | "\n", 243 | " Service:\n", 244 | " << OTHER >>\n", 245 | "\n", 246 | " SURGERY

\n", 247 | "\n", 248 | " Allergies:\n", 249 | " << ALLERGIES >>\n", 250 | "\n", 251 | "
\n", 252 | "\n", 253 | " Hydrochlorothiazide\n", 254 | " TREATMENT\n", 255 | "\n", 256 | "

Attending:[**First Name3 (LF) 1893**]
\n", 257 | "\n", 258 | " Chief Complaint:\n", 259 | " << CHIEF_COMPLAINT >>\n", 260 | "\n", 261 | "
\n", 262 | "\n", 263 | " Abdominal pain\n", 264 | " PROBLEM\n", 265 | "\n", 266 | "

\n", 267 | "\n", 268 | " Pertinent Results:\n", 269 | " << LABS_AND_STUDIES >>\n", 270 | "\n", 271 | "\n", 272 | "[**2573-5-30**] 09:10PM BLOOD WBC-19.2*# RBC-4.81 Hgb-15.5 Hct-44.0 \n", 273 | "MCV-92 MCH-32.3* MCHC-35.2* RDW-13.3 Plt Ct-230\n", 274 | "[**2573-5-30**] 09:10PM BLOOD Neuts-87* Bands-10* Lymphs-3* Monos-0 \n", 275 | "

" 276 | ], 277 | "text/plain": [ 278 | "" 279 | ] 280 | }, 281 | "metadata": {}, 282 | "output_type": "display_data" 283 | } 284 | ], 285 | "source": [ 286 | "visualize_ent(doc)" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 14, 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/plain": [ 297 | "[None, 'other', 'allergies', 'chief_complaint', 'labs_and_studies']" 298 | ] 299 | }, 300 | "execution_count": 14, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "doc._.section_titles" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": {}, 312 | "source": [ 313 | "The sectionizer correctly recognizes **\"Allergies\"** and **\"Chief Complaint\"** as section headers. However, some other titles which might be useful to extract are:\n", 314 | "- **\"Admission Date\"**: Many MIMIC notes start this way and you could consider this first section to be **visit_information**\n", 315 | "- **\"Pertinent Results**: This is a section of **\"lab\"** results" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "## Add patterns\n", 323 | "To recognize these sections, we can add **patterns** to the sectionizer. These patterns resemble spaCy's [rule-based matching API](https://spacy.io/usage/rule-based-matching). Each pattern is a dictionary with two keys:\n", 324 | "- `section_title`: The normalized name of the section which will be available in `ent._.section_title`\n", 325 | "- `pattern`: Either a string (for exact match, case insensitive) or a list of dictionaries (for matching on additional token attributes) which define the text to match" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 15, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "new_patterns = [\n", 335 | " {\"section_title\": \"visit_information\", \"pattern\": [{\"LOWER\": {\"REGEX\": \"admi(t|ssion)\"}}, {\"LOWER\": \"date\"}, {\"LOWER\": \":\"}]},\n", 336 | " {\"section_title\": \"labs_and_studies\", \"pattern\": \"Pertinent Results:\"}\n", 337 | "]" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "We add this list of patterns through the `sectionizer.add` method:" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 16, 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [ 353 | "sectionizer.add(new_patterns)" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "Now if we reprocess and visualize our doc, we can see that the new headers have been extracted:" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 17, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "doc = nlp(text)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 18, 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "data": { 379 | "text/html": [ 380 | "

\n", 381 | "\n", 382 | " Admission Date:\n", 383 | " << VISIT_INFORMATION >>\n", 384 | "\n", 385 | " [**2573-5-30**] Discharge Date: [**2573-7-1**]

Date of Birth: [**2498-8-19**] Sex: F

\n", 386 | "\n", 387 | " Service:\n", 388 | " << OTHER >>\n", 389 | "\n", 390 | " SURGERY

\n", 391 | "\n", 392 | " Allergies:\n", 393 | " << ALLERGIES >>\n", 394 | "\n", 395 | "
\n", 396 | "\n", 397 | " Hydrochlorothiazide\n", 398 | " TREATMENT\n", 399 | "\n", 400 | "

Attending:[**First Name3 (LF) 1893**]
\n", 401 | "\n", 402 | " Chief Complaint:\n", 403 | " << CHIEF_COMPLAINT >>\n", 404 | "\n", 405 | "
\n", 406 | "\n", 407 | " Abdominal pain\n", 408 | " PROBLEM\n", 409 | "\n", 410 | "

\n", 411 | "\n", 412 | " Pertinent Results:\n", 413 | " << LABS_AND_STUDIES >>\n", 414 | "\n", 415 | "\n", 416 | "[**2573-5-30**] 09:10PM BLOOD WBC-19.2*# RBC-4.81 Hgb-15.5 Hct-44.0 \n", 417 | "MCV-92 MCH-32.3* MCHC-35.2* RDW-13.3 Plt Ct-230\n", 418 | "[**2573-5-30**] 09:10PM BLOOD Neuts-87* Bands-10* Lymphs-3* Monos-0 \n", 419 | "

" 420 | ], 421 | "text/plain": [ 422 | "" 423 | ] 424 | }, 425 | "metadata": {}, 426 | "output_type": "display_data" 427 | } 428 | ], 429 | "source": [ 430 | "visualize_ent(doc)" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 19, 436 | "metadata": {}, 437 | "outputs": [ 438 | { 439 | "data": { 440 | "text/plain": [ 441 | "[None,\n", 442 | " 'visit_information',\n", 443 | " 'other',\n", 444 | " 'allergies',\n", 445 | " 'chief_complaint',\n", 446 | " 'labs_and_studies']" 447 | ] 448 | }, 449 | "execution_count": 19, 450 | "metadata": {}, 451 | "output_type": "execute_result" 452 | } 453 | ], 454 | "source": [ 455 | "doc._.section_titles" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": [] 464 | }, 465 | { 466 | "cell_type": "markdown", 467 | "metadata": {}, 468 | "source": [ 469 | "# Loading a blank sectionizer\n", 470 | "You can load the `sectionizer` without the default patterns and only custom patterns:" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 20, 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "blank_sectionizer = Sectionizer(nlp, patterns=None)" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 21, 485 | "metadata": {}, 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/plain": [ 490 | "[]" 491 | ] 492 | }, 493 | "execution_count": 21, 494 | "metadata": {}, 495 | "output_type": "execute_result" 496 | } 497 | ], 498 | "source": [ 499 | "blank_sectionizer._patterns" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 22, 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "data": { 509 | "text/plain": [ 510 | "set()" 511 | ] 512 | }, 513 | "execution_count": 22, 514 | "metadata": {}, 515 | "output_type": "execute_result" 516 | } 517 | ], 518 | "source": [ 519 | "blank_sectionizer._section_titles" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "metadata": {}, 526 | "outputs": [], 527 | "source": [] 528 | } 529 | ], 530 | "metadata": { 531 | "kernelspec": { 532 | "display_name": "Python 3", 533 | "language": "python", 534 | "name": "python3" 535 | }, 536 | "language_info": { 537 | "codemirror_mode": { 538 | "name": "ipython", 539 | "version": 3 540 | }, 541 | "file_extension": ".py", 542 | "mimetype": "text/x-python", 543 | "name": "python", 544 | "nbconvert_exporter": "python", 545 | "pygments_lexer": "ipython3", 546 | "version": "3.7.9" 547 | } 548 | }, 549 | "nbformat": 4, 550 | "nbformat_minor": 4 551 | } 552 | -------------------------------------------------------------------------------- /notebooks/02-text_sectionizer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TextSectionizer\n", 8 | "Sometimes, you may not want to process an entire document with spaCy. You may instead want to extract specific sections and then process them independently. To do this, you can use the `TextSectionizer` and process a text. Just like the `Sectionizer`, this class comes with default patterns which can be modified or added to." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "with open(\"./example_discharge_summary.txt\") as f:\n", 18 | " text = f.read()" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from clinical_sectionizer import TextSectionizer" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "sectionizer = TextSectionizer()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "{'allergy',\n", 48 | " 'chief_complaint',\n", 49 | " 'ed_course',\n", 50 | " 'education',\n", 51 | " 'family_history',\n", 52 | " 'hiv_screening',\n", 53 | " 'imaging',\n", 54 | " 'labs_and_studies',\n", 55 | " 'medication',\n", 56 | " 'observation_and_plan',\n", 57 | " 'other',\n", 58 | " 'past_medical_history',\n", 59 | " 'patient_instructions',\n", 60 | " 'physical_exam',\n", 61 | " 'present_illness',\n", 62 | " 'problem_list',\n", 63 | " 'sexual_and_social_history',\n", 64 | " 'signature'}" 65 | ] 66 | }, 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "output_type": "execute_result" 70 | } 71 | ], 72 | "source": [ 73 | "sectionizer.section_titles" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "Unlike the `Sectionizer` patterns, the `pattern` value can only be a string, which will be interpreted as a case-insensitive regular expression. You can add to the `TextSectionizer` with the same `.add()` method:" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "[{'section_title': 'past_medical_history',\n", 92 | " 'pattern': '(past )?medical (history|hx)'},\n", 93 | " {'section_title': 'past_medical_history', 'pattern': 'mhx?'},\n", 94 | " {'section_title': 'past_medical_history', 'pattern': 'mh:'},\n", 95 | " {'section_title': 'past_medical_history', 'pattern': 'pmh:'},\n", 96 | " {'section_title': 'past_medical_history', 'pattern': 'pohx:'}]" 97 | ] 98 | }, 99 | "execution_count": 5, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "sectionizer.patterns[:5]" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 6, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "new_patterns = [\n", 115 | " {\"section_title\": \"visit_information\", \"pattern\": \"admi(t|ssion) date:\"},\n", 116 | " {\"section_title\": \"labs_and_studies\", \"pattern\": \"pertinent results:\"}\n", 117 | "]" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 7, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "sectionizer.add(new_patterns)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "# Using Text Sectionizer\n", 134 | "We can get the split up document by calling `sectionizer(text)`. This returns a list of 3-tuples which contain:\n", 135 | "- `section_title`: The string of the section title\n", 136 | "- `section_header`: The span of text matched by the pattern\n", 137 | "- `section_text`: The span of text contained in the entire section" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 8, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "sections = sectionizer(text)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 9, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "('visit_information', 'Admission Date:', 'Admission Date: [**2573-5-30**] Discharge Date: [**2573-7-1**]\\n \\nDate of Birth: [**2498-8-19**] Sex: F\\n \\nService: SURGERY\\n \\n')\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "print(sections[1])" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 10, 169 | "metadata": { 170 | "scrolled": true 171 | }, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "None\n", 178 | "None\n", 179 | "\n", 180 | "\n", 181 | "\n", 182 | "---------------\n", 183 | "visit_information\n", 184 | "Admission Date:\n", 185 | "\n", 186 | "Admission Date: [**2573-5-30**] Discharge Date: [**2573-7-1**]\n", 187 | " \n", 188 | "Date of Birth: [**2498-8-19**] Sex: F\n", 189 | " \n", 190 | "Service: SURGERY\n", 191 | " \n", 192 | "\n", 193 | "---------------\n", 194 | "allergy\n", 195 | "Allergies:\n", 196 | "\n", 197 | "Allergies: \n", 198 | "Hydrochlorothiazide\n", 199 | " \n", 200 | "Attending:[**First Name3 (LF) 1893**] \n", 201 | "Chief Complaint:\n", 202 | "Abdominal pain\n", 203 | " \n", 204 | "Major Surgical or Invasive \n", 205 | "---------------\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "for (section_title, section_header, section_text) in sections[:3]:\n", 211 | " print(section_title)\n", 212 | " print(section_header)\n", 213 | " print()\n", 214 | " print(section_text)\n", 215 | " print(\"---\"*5)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "You can unpack these tuples by using the Python `zip(*tuples)` function:" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 11, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "section_titles, section_headers, section_texts = zip(*sections)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 12, 237 | "metadata": {}, 238 | "outputs": [ 239 | { 240 | "data": { 241 | "text/plain": [ 242 | "(None,\n", 243 | " 'visit_information',\n", 244 | " 'allergy',\n", 245 | " 'other',\n", 246 | " 'present_illness',\n", 247 | " 'past_medical_history',\n", 248 | " 'sexual_and_social_history',\n", 249 | " 'family_history',\n", 250 | " 'physical_exam',\n", 251 | " 'labs_and_studies',\n", 252 | " 'observation_and_plan',\n", 253 | " 'medication',\n", 254 | " 'medication',\n", 255 | " 'observation_and_plan',\n", 256 | " 'patient_instructions',\n", 257 | " 'patient_instructions',\n", 258 | " 'signature')" 259 | ] 260 | }, 261 | "execution_count": 12, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "section_titles" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 13, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/plain": [ 278 | "(None,\n", 279 | " 'Admission Date:',\n", 280 | " 'Allergies:',\n", 281 | " 'Procedure:',\n", 282 | " 'History of Present Illness:',\n", 283 | " 'Past Medical History:',\n", 284 | " 'Social History:',\n", 285 | " 'Family History:',\n", 286 | " 'Physical Exam:',\n", 287 | " 'Pertinent Results:',\n", 288 | " 'IMPRESSION:',\n", 289 | " 'Medications on Admission:',\n", 290 | " 'Discharge Medications:',\n", 291 | " 'Discharge Diagnosis:',\n", 292 | " 'Discharge Instructions:',\n", 293 | " 'Followup Instructions:',\n", 294 | " 'Signed electronically by:')" 295 | ] 296 | }, 297 | "execution_count": 13, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "section_headers" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 14, 309 | "metadata": {}, 310 | "outputs": [ 311 | { 312 | "data": { 313 | "text/plain": [ 314 | "('\\n',\n", 315 | " 'Admission Date: [**2573-5-30**] Discharge Date: [**2573-7-1**]\\n \\nDate of Birth: [**2498-8-19**] Sex: F\\n \\nService: SURGERY\\n \\n',\n", 316 | " 'Allergies: \\nHydrochlorothiazide\\n \\nAttending:[**First Name3 (LF) 1893**] \\nChief Complaint:\\nAbdominal pain\\n \\nMajor Surgical or Invasive ',\n", 317 | " 'Procedure:\\nPICC line [**6-25**]\\nERCP w/ sphincterotomy [**5-31**]\\nTEE [**6-22**]\\nTracheostomy [**6-24**]\\n\\n \\n',\n", 318 | " 'History of Present Illness:\\n74y female with hypertension and a recent stroke affecting her \\nspeech, who presents with 2 days of abdominal pain. She states \\nit is constant, and radiates to her back. It started after \\neating a double cheese pizza and hard lemonade. There is no \\nprior history of such an episode. She had multiple bouts of \\nnausea and vomiting, with chills and decreased flatus.\\n \\n',\n", 319 | " \"Past Medical History:\\n1. Colon cancer dx'd in [**2554**], tx'd with hemicolectomy, XRT, \\nchemo. Last colonoscopy showed: Last CEA was in the 8 range \\n(down from 9) \\n2. Lymphedema from XRT, takes a diuretic \\n3. Cataracts \\n4. Hypertension \\n5. heart murmur - TTE in [**2567**] showed LA mod dilated, LV mildly \\nhypertrophied, aortic sclerosis, mild AI, mild MR. \\n6. Anxiety \\n7. CAD \\n8. Left corona radiata stroke with right facial droop and \\ndysathria [**1-/2573**]\\n9. gallstones\\n10. scoliosis\\n11. rectus sheath hematoma\\n12. history of sacral ulcer status post z-plasty\\n13. ectopic pregnancy x2\\n\\n \\n\",\n", 320 | " 'Social History:\\nMarried, former secretary, waitress. + tobacco x 40 years at \\n4ppd, quit 30 yrs ago. No alcohol or drug use.\\n \\n',\n", 321 | " 'Family History:\\nMother with stroke at age 82. no early deaths. \\n2 daughters- healthy\\n \\n',\n", 322 | " 'Physical Exam:\\nVS: temp 101.5, HR 114, BP 213/98, RR 20, 97%RA\\nIll appearing, no distress\\nSclera mildly icteric, mucous membranes dry\\nLungs clear to auscultation bilaterally\\nAbdomen distended, soft, diffusely tender, especially in the \\nepigastrum and right upper quandrant\\nRectal tone normal with no masses, guaiac negative\\nExtremities warm, well perfused, 3+ edema\\n \\n',\n", 323 | " 'Pertinent Results:\\n[**2573-5-30**] 09:10PM BLOOD WBC-19.2*# RBC-4.81 Hgb-15.5 Hct-44.0 \\nMCV-92 MCH-32.3* MCHC-35.2* RDW-13.3 Plt Ct-230\\n[**2573-5-30**] 09:10PM BLOOD Neuts-87* Bands-10* Lymphs-3* Monos-0 \\nEos-0 Baso-0 Atyps-0 Metas-0 Myelos-0\\n[**2573-5-30**] 09:10PM BLOOD PT-13.1 PTT-23.2 INR(PT)-1.1\\n[**2573-5-30**] 09:10PM BLOOD Glucose-189* UreaN-29* Creat-1.2* Na-143 \\nK-3.5 Cl-104 HCO3-24 AnGap-19\\n[**2573-5-30**] 09:10PM BLOOD ALT-345* AST-388* AlkPhos-246* \\nAmylase-1235* TotBili-8.4*\\n[**2573-5-30**] 09:10PM BLOOD Lipase-2443*\\n[**2573-5-30**] 09:10PM BLOOD Albumin-4.2 Calcium-9.3 Mg-1.2*\\n\\n[**2573-7-1**] 03:01AM BLOOD WBC-7.5 RBC-2.95* Hgb-8.8* Hct-27.7* \\nMCV-94 MCH-29.9 MCHC-31.8 RDW-17.1* Plt Ct-213\\n[**2573-7-1**] 03:01AM BLOOD Plt Ct-213\\n[**2573-7-1**] 03:01AM BLOOD Glucose-100 UreaN-25* Creat-1.2* Na-141 \\nK-3.5 Cl-102 HCO3-31* AnGap-12\\n[**2573-6-29**] 12:45AM BLOOD ALT-22 AST-14 AlkPhos-159* Amylase-37 \\nTotBili-0.7\\n[**2573-6-29**] 12:45AM BLOOD Lipase-37\\n[**2573-7-1**] 03:01AM BLOOD Calcium-8.4 Phos-3.0 Mg-1.8\\n[**2573-7-1**] 05:25AM BLOOD Vanco-17.8*\\n\\nUltrasound [**5-30**]: ',\n", 324 | " \"IMPRESSION: 1. Dilated common bile duct with \\nmild intrahepatic biliary ductal dilatation and dilataion of the \\npancreatic duct. 2. Edematous gallbladder wall.\\n\\nERCP [**5-31**]: There was bulging of the major pailla suggestive of an \\nimpacted stone. A stone causing partial obstruction was seen in \\nthe distal CBD. \\nThere was dilation of the CBD above the stone however accurate \\nradiographic evaluation could not be obtained due to the use of \\nthe C-arm in the ICU. A sphincterotomy was performed in the 12 \\no'clock position using a sphincterotome over an existing \\nguidewire. A 15mm balloon was used to sweep the duct multiple \\ntimes wich successfully extracted stones, sludge and a large \\namount of purulent material. \\n\\n \\nBrief Hospital Course:\\nMs. [**Known patient lastname 2004**] was admitted on [**2573-5-30**]. Ultrasound at the time of \\nadmission demonstrated pancreatic duct dilitation and an \\nedematous gallbladder. She was admitted to the ICU. [**5-31**] she \\nunderwent ERCP w/ sphincterotomy, an impacted stone was removed. \\nShe has had a prolonged ICU course. Review of hospital course by \\nsystem includes.\\nNeuro: Neurology was consutled on [**6-11**] for mental status \\nchanges. The team believed the patient's MS [**First Name (Titles) 1935**] [**Last Name (Titles) 2005**] from \\nher overall metabolic and infectious conditions. A head CT was \\nperformed which was negative. A lumbar puncture was also done \\nwhich too was negative. \\n\\nCardiovascular: The cardiology team was consulted on [**6-1**] and a \\nTTE was obtained showing decreased biventricular systolic \\ndysfunction representing a diffuse process. On [**6-14**] a repeat TTE \\nfollowed by a TEE was done to rule out endocarditis; no \\nvegatation or abscess was seen. A TEE was again performed on \\n[**6-22**] there was no significant change from the prior study. The \\npatient was treated with amiodarone to control her atrial \\nfibrillation. \\n\\nPulmonary: Patient was intubated on admission and transferred to \\nthe ICU. Patient was initially extubated on hospital day six. \\nThe patient was re-intubated on [**6-12**] for hypercarbic respiratory \\nfailure and airway protection. The patient was extubated again \\non [**6-18**] and re-intubated on [**6-19**] for respiratory decompensation. \\nThe patient ultimately underwent a tracheostomy on [**6-24**]. The \\npatient tolerated a trach mask on [**6-25**]. Bronchoscopy on [**6-26**] \\nwith suctioning of bronchial plugs. CT chest on [**6-27**] showed \\ncollapse of the left lung with left sided pleural effusion. Left \\nsided thoracentesis was done on [**6-27**] to remove fluid with hope \\nof re-expanding left lung.\\n\\nGI: Patient was admitted with a diagnosis of gallstone \\npancreatitis, she underwent ERCP w/ sphincterotomy on [**5-31**]. Her \\ntube feeds were started due anticipation of a prolonged period \\nwithout orally based enteral nutrition. [**6-9**] patient had a CT \\nscan of the abdomen,it showed no gallstones or abscess. A \\npost-pyloric dobhoff was placed on [**6-14**]. The patient's caloric \\nintake was maintained by a combination of TPN and tube feeds. \\nEach nutritional replacment was employed at different times \\nindependently of the other based on the patient's tolerance for \\ntube feeds or TPN.\\n\\nFEN: Patient was dehydrated, with hypovolemia and treated with \\naggressive fluid hydration upon admission to the hospital. Tube \\nfeeds were held on [**6-30**] due to high residuals. Currently the \\npatient is not on TPN, while nutrition more recently has been \\nmaintained with tube feeds.\\n\\nRenal: Foley in place to monitor urine output.\\n\\nHeme: Blood loss and anemia in the unit requiring multiple \\ntransfusions. Currently, hematocrit is stable.\\n\\nID: Consult was obtained on [**6-12**] the ID team continued to follow \\nthe patient throughout her entire hospital stay. The patient had \\nmultiple episodes of fever and cultures which were positive for \\nthe following organisms. [**6-26**] Blood: MRSA // [**6-23**] Sputum: MRSA, \\nKlebsiella // [**6-22**] Sputum: MRSA, Klebsiella // 23: Bld/Tip- pend \\n// [**6-19**] Blood: [**Female First Name (un) **] // [**6-19**]: urine - neg // [**6-18**] Blood: \\n[**Female First Name (un) **] // [**6-16**] Blood: [**Female First Name (un) **] // [**6-14**] Blood: [**Female First Name (un) **] // [**6-13**] \\nBlood: [**Female First Name (un) **] // [**6-12**] Bladder swab: Enterococcus, [**Female First Name (un) **], \\nStaph coag Pos, GNR, Staph Coag Neg // [**6-12**] Blood: [**Female First Name (un) **] // \\n[**6-11**] Cath tip: [**Female First Name (un) **] // [**6-11**] Blood: [**Female First Name (un) **] // [**6-11**] Urine: \\nEnterococcus, Yeast // [**6-9**] Cath tip: [**Female First Name (un) **] // Urine [**5-30**] \\nKLEBSIELLA PNEUMONIAE, Viridans // Blood 5/01 KLEBSIELLA \\nPNEUMONIAE, Corynybacterium \\nAdditionally the patient had multiple line changes secondary to \\nspiking temperatures and positive cultures.The patient was \\ntreated with multiple antibiotics during her hospital course and \\nat the time of discharge he was being treated with Ambisome, \\nCaspofungin and Vancomycin. Please continue Ambisome and \\ncaspofungin until [**7-6**], Vancomycin should be continued until \\n[**7-12**].\\n\\nEndo: The patient has been maintained on an insulin sliding \\nscale through the duration of her hospital course.\\n\\nConsults: The team orderd a pysch consult on the patient on [**6-7**] \\nbecuse the patient appeared to be confused. An opthomology \\nconsult was ordered to rule out fungally related eye infection. \\nThe patient was seen and evaluated by optho; they deemed that \\nthere was no eye infection.\\n\\nHospital Procedures while in the SICU\\nPICC line [**6-25**]\\nMulitple bronchoscopies\\nLeft thoracocentesis [**6-27**]\\nTransesophageal echo [**6-22**]: normal\\nERCP with sphincterotomy [**5-31**]\\nTracheostomy [**6-24**]\\n\\n \\n\",\n", 325 | " 'Medications on Admission:\\nASA 325mg daily\\nbuspirone 5mg TID\\ncolace 100mg BID\\nlasix 20mg daily\\nlipitor 10mg daily\\nlisinopril 20mg daily\\nneurontin 100mg BID\\nomeprazole 20mg daily\\nroxicet prn\\nzinc 220mg daily\\nvit C\\n \\n',\n", 326 | " 'Discharge Medications:\\n1. Miconazole Nitrate 2 % Powder Sig: One (1) Appl Topical BID \\n(2 times a day) as needed. \\n2. Heparin Sodium (Porcine) 5,000 unit/mL Solution Sig: One (1) \\nInjection TID (3 times a day). \\n3. Acetaminophen 160 mg/5 mL Elixir Sig: One (1) PO Q4-6H \\n(every 4 to 6 hours) as needed. \\n4. Terbinafine HCl 1 % Cream Sig: One (1) Appl Topical BID (2 \\ntimes a day). \\n5. Insulin Regular Human 100 unit/mL Solution Sig: One (1) \\nInjection ASDIR (AS DIRECTED). \\n6. Albuterol-Ipratropium 103-18 mcg/Actuation Aerosol Sig: [**1-31**] \\nPuffs Inhalation Q6H (every 6 hours) as needed. \\n7. Lansoprazole 30 mg Capsule, Delayed Release(E.C.) Sig: One \\n(1) Capsule, Delayed Release(E.C.) PO DAILY (Daily). \\n8. Metoprolol Tartrate 50 mg Tablet Sig: 1.5 Tablets PO TID (3 \\ntimes a day). \\n9. Sertraline HCl 50 mg Tablet Sig: One (1) Tablet PO DAILY \\n(Daily). \\n10. Amiodarone HCl 200 mg Tablet Sig: Two (2) Tablet PO BID (2 \\ntimes a day). \\n11. Hydralazine HCl 10 mg IV Q6 PRN \\n12. Fentanyl Citrate 25-50 mcg IV Q2H:PRN \\n13. Caspofungin 50 mg IV Q24H \\n14. Ambisome 300 mg IV Q24H \\n15. Furosemide 40 mg IV BID \\n16. Vancomycin HCl 1000 mg IV Q24H \\nvanco level 17 \\n17. Dolasetron Mesylate 12.5 mg IV Q4H:PRN \\n\\n \\nDischarge Disposition:\\nExtended Care\\n \\nFacility:\\n[**Hospital3 1446**] & Rehab Center - [**Hospital1 370**]\\n \\n',\n", 327 | " 'Discharge Diagnosis:\\nAtrial Fibrillation\\nPancreatitis\\nHTN\\nhyperlipidemia\\nh/o aspiration respiratory distress\\nbacteremia ([**Female First Name (un) 929**])\\nUTI (klebsiella)\\n\\n \\nDischarge Condition:\\nGood\\n \\n',\n", 328 | " 'Discharge Instructions:\\nPatient may shower. Please call your surgeon or return to the \\nemergency room if [**Doctor First Name **] experience fever >101.5, nausea, vomiting, \\nabdominal pain, shortness of breath, abdominal pain or any \\nsignificant change in your medical condition. Ambisome and \\ncaspofungin should be continued til [**7-6**] while vanco should be \\ncontinued til [**7-12**].\\n \\n',\n", 329 | " 'Followup Instructions:\\nPlease follow up with Dr. [**Last Name (STitle) **] in 2 weeks. Upon discharge \\nplease call Dr.[**Initials (NamePattern4) 1895**] [**Last Name (NamePattern4) 2006**] in order to schedule your \\nfollow up appointment.([**Telephone/Fax (1) 2007**]\\n \\nProvider: [**Name10 (NameIs) 296**] [**Last Name (NamePattern4) 340**], M.D. [**MD Number 60**]: [**Hospital6 64**] \\n[**Hospital3 2008**] CENTER Phone:[**Telephone/Fax (1) 341**] Date/Time:[**2573-11-24**] 9:15\\n \\nProvider: [**First Name11 (Name Pattern1) **] [**Last Name (NamePattern1) 2009**], MD [**MD Number 60**]: [**Hospital6 64**] \\nHEMATOLOGY/ONCOLOGY Phone:[**Telephone/Fax (1) 936**] Date/Time:[**2574-5-11**] 10:00\\n \\n\\n \\n\\n \\n [**First Name11 (Name Pattern1) **] [**Last Name (NamePattern4) **] MD [**MD Number 1896**]\\n \\nCompleted by: [**First Name11 (Name Pattern1) 2010**] [**Last Name (NamePattern1) 2011**] MD [**MD Number 2012**] [**2573-7-1**] @ 1404\\n',\n", 330 | " 'Signed electronically by: DR. [**First Name8 (NamePattern2) **] [**Last Name (NamePattern1) **]\\n on: FRI [**2573-7-2**] 8:03 AM\\n(End of Report)\\n')" 331 | ] 332 | }, 333 | "execution_count": 14, 334 | "metadata": {}, 335 | "output_type": "execute_result" 336 | } 337 | ], 338 | "source": [ 339 | "section_texts" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "## Limiting sections\n", 354 | "Once you identify the sections in a document, you can then exclude any other sections which aren't relevant. You can then process each document separately or combine into a smaller, more selective document." 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 15, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "relevant_section_titles = [\"present_illness\", \"medication\"]\n", 364 | "relevant_sections = [section for (section_title, section_header, section) in sections \n", 365 | " if section_title in relevant_section_titles]" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 16, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "['present_illness', 'medication']" 377 | ] 378 | }, 379 | "execution_count": 16, 380 | "metadata": {}, 381 | "output_type": "execute_result" 382 | } 383 | ], 384 | "source": [ 385 | "relevant_section_titles" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 17, 391 | "metadata": {}, 392 | "outputs": [], 393 | "source": [ 394 | "relevant_text = \"\\n\\n\".join(relevant_sections)" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 18, 400 | "metadata": {}, 401 | "outputs": [], 402 | "source": [ 403 | "import spacy\n", 404 | "from cycontext.viz import visualize_ent " 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 19, 410 | "metadata": {}, 411 | "outputs": [ 412 | { 413 | "name": "stderr", 414 | "output_type": "stream", 415 | "text": [ 416 | "/Users/alecchapman/opt/anaconda3/envs/medspacy-37/lib/python3.7/site-packages/spacy/util.py:275: UserWarning: [W031] Model 'en_info_3700_i2b2_2012' (0.1.0) requires spaCy v2.2 and is incompatible with the current spaCy version (2.3.2). This may lead to unexpected results or runtime errors. To resolve this, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate\n", 417 | " warnings.warn(warn_msg)\n" 418 | ] 419 | } 420 | ], 421 | "source": [ 422 | "nlp = spacy.load(\"en_info_3700_i2b2_2012\")" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": 20, 428 | "metadata": {}, 429 | "outputs": [ 430 | { 431 | "data": { 432 | "text/plain": [ 433 | "" 434 | ] 435 | }, 436 | "execution_count": 20, 437 | "metadata": {}, 438 | "output_type": "execute_result" 439 | } 440 | ], 441 | "source": [ 442 | "nlp" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 21, 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [ 451 | "doc = nlp(relevant_text)" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": 22, 457 | "metadata": {}, 458 | "outputs": [ 459 | { 460 | "data": { 461 | "text/html": [ 462 | "

History of Present Illness:
74y female with \n", 463 | "\n", 464 | " hypertension\n", 465 | " PROBLEM\n", 466 | "\n", 467 | " and \n", 468 | "\n", 469 | " a recent stroke\n", 470 | " PROBLEM\n", 471 | "\n", 472 | " affecting her
speech, who presents with 2 days of \n", 473 | "\n", 474 | " abdominal pain\n", 475 | " PROBLEM\n", 476 | "\n", 477 | ". She states
it is constant, and \n", 478 | "\n", 479 | " radiates to her back\n", 480 | " PROBLEM\n", 481 | "\n", 482 | ". It started after
eating a double cheese pizza and \n", 483 | "\n", 484 | " hard lemonade\n", 485 | " PROBLEM\n", 486 | "\n", 487 | ". There is no
prior history of such \n", 488 | "\n", 489 | " an episode\n", 490 | " PROBLEM\n", 491 | "\n", 492 | ". She had \n", 493 | "\n", 494 | " multiple bouts\n", 495 | " PROBLEM\n", 496 | "\n", 497 | " of
\n", 498 | "\n", 499 | " nausea\n", 500 | " PROBLEM\n", 501 | "\n", 502 | " and \n", 503 | "\n", 504 | " vomiting\n", 505 | " PROBLEM\n", 506 | "\n", 507 | ", with \n", 508 | "\n", 509 | " chills\n", 510 | " PROBLEM\n", 511 | "\n", 512 | " and \n", 513 | "\n", 514 | " decreased flatus\n", 515 | " PROBLEM\n", 516 | "\n", 517 | ".

\n", 518 | "\n", 519 | " Medications\n", 520 | " TREATMENT\n", 521 | "\n", 522 | " on Admission:
\n", 523 | "\n", 524 | " ASA\n", 525 | " TREATMENT\n", 526 | "\n", 527 | " 325mg daily
\n", 528 | "\n", 529 | " buspirone\n", 530 | " TREATMENT\n", 531 | "\n", 532 | " 5mg TID
\n", 533 | "\n", 534 | " colace\n", 535 | " TREATMENT\n", 536 | "\n", 537 | " 100mg BID
\n", 538 | "\n", 539 | " lasix\n", 540 | " TREATMENT\n", 541 | "\n", 542 | " 20mg daily
\n", 543 | "\n", 544 | " lipitor\n", 545 | " TREATMENT\n", 546 | "\n", 547 | " 10mg daily
\n", 548 | "\n", 549 | " lisinopril\n", 550 | " TREATMENT\n", 551 | "\n", 552 | " 20mg daily
\n", 553 | "\n", 554 | " neurontin\n", 555 | " TREATMENT\n", 556 | "\n", 557 | " 100mg BID
\n", 558 | "\n", 559 | " omeprazole\n", 560 | " TREATMENT\n", 561 | "\n", 562 | " 20mg daily
\n", 563 | "\n", 564 | " roxicet\n", 565 | " TREATMENT\n", 566 | "\n", 567 | " prn
\n", 568 | "\n", 569 | " zinc\n", 570 | " TEST\n", 571 | "\n", 572 | " 220mg daily
vit C

Discharge Medications:
1. \n", 573 | "\n", 574 | " Miconazole\n", 575 | " TREATMENT\n", 576 | "\n", 577 | " Nitrate 2 % Powder Sig: One (1) Appl Topical BID
(2 times a day) as needed.
2. \n", 578 | "\n", 579 | " Heparin Sodium\n", 580 | " TREATMENT\n", 581 | "\n", 582 | " (\n", 583 | "\n", 584 | " Porcine\n", 585 | " TREATMENT\n", 586 | "\n", 587 | ") 5,000 unit/mL Solution Sig: One (1)
Injection TID (3 times a day).
3. Acetaminophen 160 mg/5 mL Elixir Sig: One (1) PO Q4-6H
(every 4 to 6 hours) as needed.
4. \n", 588 | "\n", 589 | " Terbinafine HCl\n", 590 | " TREATMENT\n", 591 | "\n", 592 | " 1 % Cream Sig: One (1) Appl Topical BID (2
times a day).
5. Insulin Regular Human 100 unit/mL Solution Sig: One (1)
Injection ASDIR (AS DIRECTED).
6. \n", 593 | "\n", 594 | " Albuterol-Ipratropium\n", 595 | " TREATMENT\n", 596 | "\n", 597 | " 103-18 mcg/Actuation Aerosol Sig: [**1-31**]
Puffs Inhalation Q6H (every 6 hours) as needed.
7. \n", 598 | "\n", 599 | " Lansoprazole\n", 600 | " TREATMENT\n", 601 | "\n", 602 | " 30 mg Capsule, Delayed Release(E.C.) Sig: One
(1) Capsule, Delayed Release(E.C.) PO DAILY (Daily).
8. Metoprolol Tartrate 50 mg Tablet Sig: 1.5 Tablets PO TID (3
times a day).
9. \n", 603 | "\n", 604 | " Sertraline HCl\n", 605 | " TREATMENT\n", 606 | "\n", 607 | " 50 mg Tablet Sig: One (1) Tablet PO DAILY
(Daily).
10. \n", 608 | "\n", 609 | " Amiodarone HCl\n", 610 | " TREATMENT\n", 611 | "\n", 612 | " 200 mg Tablet Sig: Two (2) Tablet PO BID (2
times a day).
11. \n", 613 | "\n", 614 | " Hydralazine HCl\n", 615 | " TREATMENT\n", 616 | "\n", 617 | " 10 mg IV Q6 PRN
12. \n", 618 | "\n", 619 | " Fentanyl Citrate\n", 620 | " TREATMENT\n", 621 | "\n", 622 | " 25-50 mcg \n", 623 | "\n", 624 | " IV Q2H\n", 625 | " PROBLEM\n", 626 | "\n", 627 | ":PRN
13. Caspofungin 50 mg IV Q24H
14. \n", 628 | "\n", 629 | " Ambisome\n", 630 | " TREATMENT\n", 631 | "\n", 632 | " 300 mg IV Q24H
15. \n", 633 | "\n", 634 | " Furosemide\n", 635 | " TREATMENT\n", 636 | "\n", 637 | " 40 mg IV BID
16. \n", 638 | "\n", 639 | " Vancomycin HCl\n", 640 | " TREATMENT\n", 641 | "\n", 642 | " 1000 mg IV Q24H
vanco level 17
17. \n", 643 | "\n", 644 | " Dolasetron Mesylate\n", 645 | " TREATMENT\n", 646 | "\n", 647 | " 12.5 mg IV Q4H:PRN \n", 648 | "\n", 649 | " \n", 650 | "Discharge Disposition:\n", 651 | "Extended Care\n", 652 | " \n", 653 | "Facility:\n", 654 | "[**Hospital3 1446**] & Rehab Center - [**Hospital1 370**]\n", 655 | " \n", 656 | "

" 657 | ], 658 | "text/plain": [ 659 | "" 660 | ] 661 | }, 662 | "metadata": {}, 663 | "output_type": "display_data" 664 | } 665 | ], 666 | "source": [ 667 | "visualize_ent(doc)" 668 | ] 669 | }, 670 | { 671 | "cell_type": "code", 672 | "execution_count": null, 673 | "metadata": {}, 674 | "outputs": [], 675 | "source": [] 676 | } 677 | ], 678 | "metadata": { 679 | "kernelspec": { 680 | "display_name": "Python 3", 681 | "language": "python", 682 | "name": "python3" 683 | }, 684 | "language_info": { 685 | "codemirror_mode": { 686 | "name": "ipython", 687 | "version": 3 688 | }, 689 | "file_extension": ".py", 690 | "mimetype": "text/x-python", 691 | "name": "python", 692 | "nbconvert_exporter": "python", 693 | "pygments_lexer": "ipython3", 694 | "version": "3.7.9" 695 | } 696 | }, 697 | "nbformat": 4, 698 | "nbformat_minor": 4 699 | } 700 | -------------------------------------------------------------------------------- /notebooks/03-subsections.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Subsections\n", 8 | "\n", 9 | "The medspacy sectionizer supports adding subsections to your document." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import spacy\n", 19 | "\n", 20 | "import sys\n", 21 | "sys.path.insert(0, \"..\")\n", 22 | "\n", 23 | "from clinical_sectionizer import Sectionizer" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Here are four example documents showing slight permutations of a section-subsection structure found in text." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "text1 = '''Past Medical History: \n", 40 | "pt has history of medical events\n", 41 | "Comments: some comment here\n", 42 | "\n", 43 | "Allergies:\n", 44 | "peanuts\n", 45 | "'''\n", 46 | "\n", 47 | "text2 = '''Past Medical History: \n", 48 | "pt has history of medical events\n", 49 | "Comments: some comment here\n", 50 | "\n", 51 | "Allergies:\n", 52 | "peanuts\n", 53 | "Comments: pt cannot eat peanuts\n", 54 | "'''\n", 55 | "\n", 56 | "text3 = '''Past Medical History: \n", 57 | "pt has history of medical events\n", 58 | "\n", 59 | "Allergies:\n", 60 | "peanuts\n", 61 | "Comments: pt cannot eat peanuts\n", 62 | "'''\n", 63 | "\n", 64 | "text4 = '''Past Medical History: \n", 65 | "pt has history of medical events\n", 66 | "\n", 67 | "Allergies:\n", 68 | "peanuts\n", 69 | "\n", 70 | "Medical Assessment: pt has a fever\n", 71 | "Comments: fever is 101F\n", 72 | "'''" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "# Parent-Child attachment\n", 80 | "Rules specify a `parents` list. This defines all possible legal parents for this section by their `section_title`. The specific parent (if any exist) of each match is determined at runtime. In this example, we define four sections and the comment section has two candidate parents." 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "nlp = spacy.load(\"en_core_web_sm\")" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 4, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "sectionizer = Sectionizer(nlp,patterns=None)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 5, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "patterns = [{\"section_title\":\"past_medical_history\",\"pattern\":\"Past Medical History:\"},\n", 108 | " {\"section_title\":\"allergies\",\"pattern\":\"Allergies:\"},\n", 109 | " {\"section_title\":\"medical_assessment\",\"pattern\":\"Medical Assessment:\"},\n", 110 | " {\"section_title\":\"comment\",\"pattern\":\"Comments:\",\"parents\":[\"past_medical_history\",\"allergies\"]}]" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 6, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "sectionizer.add(patterns)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 7, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "nlp.add_pipe(sectionizer)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "We can print out the output of the sectionizer on each of these documents and see how they vary.\n", 136 | "\n", 137 | "In the first case, we see that three sections are identified in the text and the comment section has a parent \"past_medical_history\"" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 8, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "TITLE................. past_medical_history\n", 150 | "TEXT.................. Past Medical History:\n", 151 | "PARENT................ None\n", 152 | "SECTION TEXT..........\n", 153 | "Past Medical History: \n", 154 | "pt has history of medical events\n", 155 | "\n", 156 | "----------------------\n", 157 | "TITLE................. comment\n", 158 | "TEXT.................. Comments:\n", 159 | "PARENT................ past_medical_history\n", 160 | "SECTION TEXT..........\n", 161 | "Comments: some comment here\n", 162 | "\n", 163 | "\n", 164 | "----------------------\n", 165 | "TITLE................. allergies\n", 166 | "TEXT.................. Allergies:\n", 167 | "PARENT................ None\n", 168 | "SECTION TEXT..........\n", 169 | "Allergies:\n", 170 | "peanuts\n", 171 | "\n", 172 | "----------------------\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "doc = nlp(text1)\n", 178 | "for title,text,parent,section in doc._.sections:\n", 179 | " print(\"TITLE................. {0}\".format(title))\n", 180 | " print(\"TEXT.................. {0}\".format(text))\n", 181 | " print(\"PARENT................ {0}\".format(parent))\n", 182 | " print(\"SECTION TEXT..........\\n{0}\".format(section))\n", 183 | " print(\"----------------------\")" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "In this next document, there are two comment sections, each that match to the closest parent sections. Subsections cannot jump over other sections to attach to a parent." 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 9, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "TITLE................. past_medical_history\n", 203 | "TEXT.................. Past Medical History:\n", 204 | "PARENT................ None\n", 205 | "SECTION TEXT..........\n", 206 | "Past Medical History: \n", 207 | "pt has history of medical events\n", 208 | "\n", 209 | "----------------------\n", 210 | "TITLE................. comment\n", 211 | "TEXT.................. Comments:\n", 212 | "PARENT................ past_medical_history\n", 213 | "SECTION TEXT..........\n", 214 | "Comments: some comment here\n", 215 | "\n", 216 | "\n", 217 | "----------------------\n", 218 | "TITLE................. allergies\n", 219 | "TEXT.................. Allergies:\n", 220 | "PARENT................ None\n", 221 | "SECTION TEXT..........\n", 222 | "Allergies:\n", 223 | "peanuts\n", 224 | "\n", 225 | "----------------------\n", 226 | "TITLE................. comment\n", 227 | "TEXT.................. Comments:\n", 228 | "PARENT................ allergies\n", 229 | "SECTION TEXT..........\n", 230 | "Comments: pt cannot eat peanuts\n", 231 | "\n", 232 | "----------------------\n" 233 | ] 234 | } 235 | ], 236 | "source": [ 237 | "doc = nlp(text2)\n", 238 | "for title,text,parent,section in doc._.sections:\n", 239 | " print(\"TITLE................. {0}\".format(title))\n", 240 | " print(\"TEXT.................. {0}\".format(text))\n", 241 | " print(\"PARENT................ {0}\".format(parent))\n", 242 | " print(\"SECTION TEXT..........\\n{0}\".format(section))\n", 243 | " print(\"----------------------\")" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "This example further illustrates how subsections cannot attach to non-adjacent candidate parents. The subsection in `past_medical_history` has been removed but the `allergies` subsection matches the same as before" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 10, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "name": "stdout", 260 | "output_type": "stream", 261 | "text": [ 262 | "TITLE................. past_medical_history\n", 263 | "TEXT.................. Past Medical History:\n", 264 | "PARENT................ None\n", 265 | "SECTION TEXT..........\n", 266 | "Past Medical History: \n", 267 | "pt has history of medical events\n", 268 | "\n", 269 | "\n", 270 | "----------------------\n", 271 | "TITLE................. allergies\n", 272 | "TEXT.................. Allergies:\n", 273 | "PARENT................ None\n", 274 | "SECTION TEXT..........\n", 275 | "Allergies:\n", 276 | "peanuts\n", 277 | "\n", 278 | "----------------------\n", 279 | "TITLE................. comment\n", 280 | "TEXT.................. Comments:\n", 281 | "PARENT................ allergies\n", 282 | "SECTION TEXT..........\n", 283 | "Comments: pt cannot eat peanuts\n", 284 | "\n", 285 | "----------------------\n" 286 | ] 287 | } 288 | ], 289 | "source": [ 290 | "doc = nlp(text3)\n", 291 | "for title,text,parent,section in doc._.sections:\n", 292 | " print(\"TITLE................. {0}\".format(title))\n", 293 | " print(\"TEXT.................. {0}\".format(text))\n", 294 | " print(\"PARENT................ {0}\".format(parent))\n", 295 | " print(\"SECTION TEXT..........\\n{0}\".format(section))\n", 296 | " print(\"----------------------\")" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "This final examples shows that if no adjacent parent candidates exist, then no match will be made. `medical_assessment` was not listed as a candidate parent for `comment`, so there is no parent attachment made by the comment following this section" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 11, 309 | "metadata": {}, 310 | "outputs": [ 311 | { 312 | "name": "stdout", 313 | "output_type": "stream", 314 | "text": [ 315 | "TITLE................. past_medical_history\n", 316 | "TEXT.................. Past Medical History:\n", 317 | "PARENT................ None\n", 318 | "SECTION TEXT..........\n", 319 | "Past Medical History: \n", 320 | "pt has history of medical events\n", 321 | "\n", 322 | "\n", 323 | "--------------------------\n", 324 | "TITLE................. allergies\n", 325 | "TEXT.................. Allergies:\n", 326 | "PARENT................ None\n", 327 | "SECTION TEXT..........\n", 328 | "Allergies:\n", 329 | "peanuts\n", 330 | "\n", 331 | "\n", 332 | "--------------------------\n", 333 | "TITLE................. medical_assessment\n", 334 | "TEXT.................. Medical Assessment:\n", 335 | "PARENT................ None\n", 336 | "SECTION TEXT..........\n", 337 | "Medical Assessment: pt has a fever\n", 338 | "\n", 339 | "--------------------------\n", 340 | "TITLE................. comment\n", 341 | "TEXT.................. Comments:\n", 342 | "PARENT................ None\n", 343 | "SECTION TEXT..........\n", 344 | "Comments: fever is 101F\n", 345 | "\n", 346 | "--------------------------\n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "doc = nlp(text4)\n", 352 | "for title,text,parent,section in doc._.sections:\n", 353 | " print(\"TITLE................. {0}\".format(title))\n", 354 | " print(\"TEXT.................. {0}\".format(text))\n", 355 | " print(\"PARENT................ {0}\".format(parent))\n", 356 | " print(\"SECTION TEXT..........\\n{0}\".format(section))\n", 357 | " print(\"--------------------------\")" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "# Requiring Parents for matched sections\n", 365 | "\n", 366 | "It is possible to specify that a section is required to find a valid parent in order to be included in the resulting document. When the pattern defines the optional parameter `parent_required` as `True`, if the section finds no parent section in the document, then the section will be removed from the output.\n", 367 | "\n", 368 | "The following text shows a short example where a required parent might be useful. In this document, there are two mentions of the word \"color\". One might be part of a section, but without further specification, the other might be a false positive. There may be more than one way to solve this ambiguity, such as incorporating punctuation or proximity to line endings for further context." 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 12, 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [ 377 | "text5 = '''Patient is 6 years old and says his favorite color is purple\n", 378 | "\n", 379 | "medical assessment\n", 380 | "patient has a bruise from a bicycle accident\n", 381 | "color\n", 382 | "blue\n", 383 | "'''" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 13, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "nlp = spacy.load(\"en_core_web_sm\")" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 14, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "sectionizer = Sectionizer(nlp,patterns=None)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 15, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "patterns = [{\"section_title\":\"medical_assessment\",\"pattern\":\"medical assessment\"},\n", 411 | " {\"section_title\":\"color\",\"pattern\":\"color\",\"parents\":[\"medical_assessment\"],\"parent_required\":True}]" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 16, 417 | "metadata": {}, 418 | "outputs": [], 419 | "source": [ 420 | "sectionizer.add(patterns)" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 17, 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [ 429 | "nlp.add_pipe(sectionizer)" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 18, 435 | "metadata": {}, 436 | "outputs": [ 437 | { 438 | "name": "stdout", 439 | "output_type": "stream", 440 | "text": [ 441 | "TITLE................. None\n", 442 | "TEXT.................. None\n", 443 | "PARENT................ None\n", 444 | "SECTION TEXT..........\n", 445 | "Patient is 6 years old and says his favorite color is purple\n", 446 | "\n", 447 | "\n", 448 | "----------------------\n", 449 | "TITLE................. medical_assessment\n", 450 | "TEXT.................. medical assessment\n", 451 | "PARENT................ None\n", 452 | "SECTION TEXT..........\n", 453 | "medical assessment\n", 454 | "patient has a bruise from a bicycle accident\n", 455 | "\n", 456 | "----------------------\n", 457 | "TITLE................. color\n", 458 | "TEXT.................. color\n", 459 | "PARENT................ medical_assessment\n", 460 | "SECTION TEXT..........\n", 461 | "color\n", 462 | "blue\n", 463 | "\n", 464 | "----------------------\n" 465 | ] 466 | } 467 | ], 468 | "source": [ 469 | "doc = nlp(text5)\n", 470 | "for title,text,parent,section in doc._.sections:\n", 471 | " print(\"TITLE................. {0}\".format(title))\n", 472 | " print(\"TEXT.................. {0}\".format(text))\n", 473 | " print(\"PARENT................ {0}\".format(parent))\n", 474 | " print(\"SECTION TEXT..........\\n{0}\".format(section))\n", 475 | " print(\"----------------------\")" 476 | ] 477 | }, 478 | { 479 | "cell_type": "markdown", 480 | "metadata": {}, 481 | "source": [ 482 | "# Subsection trees and backtracking\n", 483 | "\n", 484 | "Subsections can be chained together and the parent matching will traverse the tree structure to match to the correct legal parent.\n", 485 | "\n", 486 | "The following two examples show deep subsection structures in a document. The first document is a simple example showing the subsection chaining that might exist in a document. The second example is more complex and shows subsection siblings (sections at the same depth of the subsection tree) and backtracking out of some, but not all subsections." 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": 19, 492 | "metadata": {}, 493 | "outputs": [], 494 | "source": [ 495 | "text6 = '''Section 1: some text\n", 496 | "Section 1.1: Some other text\n", 497 | "Section 1.1.1: Even more text\n", 498 | "Section 1.1.1.1: How deep can sections go?\n", 499 | "'''\n", 500 | "\n", 501 | "text7 = '''Section 1: some text\n", 502 | "Section 1.1: Some other text\n", 503 | "Section 1.1.1: Even more text\n", 504 | "Section 1.1.1.1: How deep can sections go?\n", 505 | "Section 1.1.1.2: As deep as you want!\n", 506 | "Section 1.2: Let's backtrack\n", 507 | "Section 2: A whole new section\n", 508 | "'''" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": 20, 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [ 517 | "nlp = spacy.load(\"en_core_web_sm\")" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 21, 523 | "metadata": {}, 524 | "outputs": [], 525 | "source": [ 526 | "sectionizer = Sectionizer(nlp,patterns=None)" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": 22, 532 | "metadata": {}, 533 | "outputs": [], 534 | "source": [ 535 | "patterns = [{\"section_title\":\"s1\",\"pattern\":\"Section 1:\"},\n", 536 | " {\"section_title\":\"s1.1\",\"pattern\":\"Section 1.1:\", \"parents\":[\"s1\"]},\n", 537 | " {\"section_title\":\"s1.1.1\",\"pattern\":\"Section 1.1.1:\", \"parents\":[\"s1.1\"]},\n", 538 | " {\"section_title\":\"s1.1.1.1\",\"pattern\":\"Section 1.1.1.1:\",\"parents\":[\"s1.1.1\"]},\n", 539 | " {\"section_title\":\"s1.1.1.2\",\"pattern\":\"Section 1.1.1.2:\",\"parents\":[\"s1.1.1\"]},\n", 540 | " {\"section_title\":\"s1.2\",\"pattern\":\"Section 1.2:\",\"parents\":[\"s1\"]},\n", 541 | " {\"section_title\":\"s2\",\"pattern\":\"Section 2:\"}]" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 23, 547 | "metadata": {}, 548 | "outputs": [], 549 | "source": [ 550 | "sectionizer.add(patterns)" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": 24, 556 | "metadata": {}, 557 | "outputs": [], 558 | "source": [ 559 | "nlp.add_pipe(sectionizer)" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": 25, 565 | "metadata": {}, 566 | "outputs": [ 567 | { 568 | "name": "stdout", 569 | "output_type": "stream", 570 | "text": [ 571 | "TITLE................. s1\n", 572 | "TEXT.................. Section 1:\n", 573 | "PARENT................ None\n", 574 | "SECTION TEXT..........\n", 575 | "Section 1: some text\n", 576 | "\n", 577 | "----------------------\n", 578 | "TITLE................. s1.1\n", 579 | "TEXT.................. Section 1.1:\n", 580 | "PARENT................ s1\n", 581 | "SECTION TEXT..........\n", 582 | "Section 1.1: Some other text\n", 583 | "\n", 584 | "----------------------\n", 585 | "TITLE................. s1.1.1\n", 586 | "TEXT.................. Section 1.1.1:\n", 587 | "PARENT................ s1.1\n", 588 | "SECTION TEXT..........\n", 589 | "Section 1.1.1: Even more text\n", 590 | "\n", 591 | "----------------------\n", 592 | "TITLE................. s1.1.1.1\n", 593 | "TEXT.................. Section 1.1.1.1:\n", 594 | "PARENT................ s1.1.1\n", 595 | "SECTION TEXT..........\n", 596 | "Section 1.1.1.1: How deep can sections go?\n", 597 | "\n", 598 | "----------------------\n" 599 | ] 600 | } 601 | ], 602 | "source": [ 603 | "doc = nlp(text6)\n", 604 | "for title,text,parent,section in doc._.sections:\n", 605 | " print(\"TITLE................. {0}\".format(title))\n", 606 | " print(\"TEXT.................. {0}\".format(text))\n", 607 | " print(\"PARENT................ {0}\".format(parent))\n", 608 | " print(\"SECTION TEXT..........\\n{0}\".format(section))\n", 609 | " print(\"----------------------\")" 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": 26, 615 | "metadata": {}, 616 | "outputs": [ 617 | { 618 | "name": "stdout", 619 | "output_type": "stream", 620 | "text": [ 621 | "TITLE................. s1\n", 622 | "TEXT.................. Section 1:\n", 623 | "PARENT................ None\n", 624 | "SECTION TEXT..........\n", 625 | "Section 1: some text\n", 626 | "\n", 627 | "----------------------\n", 628 | "TITLE................. s1.1\n", 629 | "TEXT.................. Section 1.1:\n", 630 | "PARENT................ s1\n", 631 | "SECTION TEXT..........\n", 632 | "Section 1.1: Some other text\n", 633 | "\n", 634 | "----------------------\n", 635 | "TITLE................. s1.1.1\n", 636 | "TEXT.................. Section 1.1.1:\n", 637 | "PARENT................ s1.1\n", 638 | "SECTION TEXT..........\n", 639 | "Section 1.1.1: Even more text\n", 640 | "\n", 641 | "----------------------\n", 642 | "TITLE................. s1.1.1.1\n", 643 | "TEXT.................. Section 1.1.1.1:\n", 644 | "PARENT................ s1.1.1\n", 645 | "SECTION TEXT..........\n", 646 | "Section 1.1.1.1: How deep can sections go?\n", 647 | "\n", 648 | "----------------------\n", 649 | "TITLE................. s1.1.1.2\n", 650 | "TEXT.................. Section 1.1.1.2:\n", 651 | "PARENT................ s1.1.1\n", 652 | "SECTION TEXT..........\n", 653 | "Section 1.1.1.2: As deep as you want!\n", 654 | "\n", 655 | "----------------------\n", 656 | "TITLE................. s1.2\n", 657 | "TEXT.................. Section 1.2:\n", 658 | "PARENT................ s1\n", 659 | "SECTION TEXT..........\n", 660 | "Section 1.2: Let's backtrack\n", 661 | "\n", 662 | "----------------------\n", 663 | "TITLE................. s2\n", 664 | "TEXT.................. Section 2:\n", 665 | "PARENT................ None\n", 666 | "SECTION TEXT..........\n", 667 | "Section 2: A whole new section\n", 668 | "\n", 669 | "----------------------\n" 670 | ] 671 | } 672 | ], 673 | "source": [ 674 | "doc = nlp(text7)\n", 675 | "for title,text,parent,section in doc._.sections:\n", 676 | " print(\"TITLE................. {0}\".format(title))\n", 677 | " print(\"TEXT.................. {0}\".format(text))\n", 678 | " print(\"PARENT................ {0}\".format(parent))\n", 679 | " print(\"SECTION TEXT..........\\n{0}\".format(section))\n", 680 | " print(\"----------------------\")" 681 | ] 682 | }, 683 | { 684 | "cell_type": "code", 685 | "execution_count": null, 686 | "metadata": {}, 687 | "outputs": [], 688 | "source": [] 689 | } 690 | ], 691 | "metadata": { 692 | "kernelspec": { 693 | "display_name": "Python 3", 694 | "language": "python", 695 | "name": "python3" 696 | }, 697 | "language_info": { 698 | "codemirror_mode": { 699 | "name": "ipython", 700 | "version": 3 701 | }, 702 | "file_extension": ".py", 703 | "mimetype": "text/x-python", 704 | "name": "python", 705 | "nbconvert_exporter": "python", 706 | "pygments_lexer": "ipython3", 707 | "version": "3.7.9" 708 | } 709 | }, 710 | "nbformat": 4, 711 | "nbformat_minor": 4 712 | } 713 | -------------------------------------------------------------------------------- /notebooks/example_discharge_summary.txt: -------------------------------------------------------------------------------- 1 | 2 | Admission Date: [**2573-5-30**] Discharge Date: [**2573-7-1**] 3 | 4 | Date of Birth: [**2498-8-19**] Sex: F 5 | 6 | Service: SURGERY 7 | 8 | Allergies: 9 | Hydrochlorothiazide 10 | 11 | Attending:[**First Name3 (LF) 1893**] 12 | Chief Complaint: 13 | Abdominal pain 14 | 15 | Major Surgical or Invasive Procedure: 16 | PICC line [**6-25**] 17 | ERCP w/ sphincterotomy [**5-31**] 18 | TEE [**6-22**] 19 | Tracheostomy [**6-24**] 20 | 21 | 22 | History of Present Illness: 23 | 74y female with hypertension and a recent stroke affecting her 24 | speech, who presents with 2 days of abdominal pain. She states 25 | it is constant, and radiates to her back. It started after 26 | eating a double cheese pizza and hard lemonade. There is no 27 | prior history of such an episode. She had multiple bouts of 28 | nausea and vomiting, with chills and decreased flatus. 29 | 30 | Past Medical History: 31 | 1. Colon cancer dx'd in [**2554**], tx'd with hemicolectomy, XRT, 32 | chemo. Last colonoscopy showed: Last CEA was in the 8 range 33 | (down from 9) 34 | 2. Lymphedema from XRT, takes a diuretic 35 | 3. Cataracts 36 | 4. Hypertension 37 | 5. heart murmur - TTE in [**2567**] showed LA mod dilated, LV mildly 38 | hypertrophied, aortic sclerosis, mild AI, mild MR. 39 | 6. Anxiety 40 | 7. CAD 41 | 8. Left corona radiata stroke with right facial droop and 42 | dysathria [**1-/2573**] 43 | 9. gallstones 44 | 10. scoliosis 45 | 11. rectus sheath hematoma 46 | 12. history of sacral ulcer status post z-plasty 47 | 13. ectopic pregnancy x2 48 | 49 | 50 | Social History: 51 | Married, former secretary, waitress. + tobacco x 40 years at 52 | 4ppd, quit 30 yrs ago. No alcohol or drug use. 53 | 54 | Family History: 55 | Mother with stroke at age 82. no early deaths. 56 | 2 daughters- healthy 57 | 58 | Physical Exam: 59 | VS: temp 101.5, HR 114, BP 213/98, RR 20, 97%RA 60 | Ill appearing, no distress 61 | Sclera mildly icteric, mucous membranes dry 62 | Lungs clear to auscultation bilaterally 63 | Abdomen distended, soft, diffusely tender, especially in the 64 | epigastrum and right upper quandrant 65 | Rectal tone normal with no masses, guaiac negative 66 | Extremities warm, well perfused, 3+ edema 67 | 68 | Pertinent Results: 69 | [**2573-5-30**] 09:10PM BLOOD WBC-19.2*# RBC-4.81 Hgb-15.5 Hct-44.0 70 | MCV-92 MCH-32.3* MCHC-35.2* RDW-13.3 Plt Ct-230 71 | [**2573-5-30**] 09:10PM BLOOD Neuts-87* Bands-10* Lymphs-3* Monos-0 72 | Eos-0 Baso-0 Atyps-0 Metas-0 Myelos-0 73 | [**2573-5-30**] 09:10PM BLOOD PT-13.1 PTT-23.2 INR(PT)-1.1 74 | [**2573-5-30**] 09:10PM BLOOD Glucose-189* UreaN-29* Creat-1.2* Na-143 75 | K-3.5 Cl-104 HCO3-24 AnGap-19 76 | [**2573-5-30**] 09:10PM BLOOD ALT-345* AST-388* AlkPhos-246* 77 | Amylase-1235* TotBili-8.4* 78 | [**2573-5-30**] 09:10PM BLOOD Lipase-2443* 79 | [**2573-5-30**] 09:10PM BLOOD Albumin-4.2 Calcium-9.3 Mg-1.2* 80 | 81 | [**2573-7-1**] 03:01AM BLOOD WBC-7.5 RBC-2.95* Hgb-8.8* Hct-27.7* 82 | MCV-94 MCH-29.9 MCHC-31.8 RDW-17.1* Plt Ct-213 83 | [**2573-7-1**] 03:01AM BLOOD Plt Ct-213 84 | [**2573-7-1**] 03:01AM BLOOD Glucose-100 UreaN-25* Creat-1.2* Na-141 85 | K-3.5 Cl-102 HCO3-31* AnGap-12 86 | [**2573-6-29**] 12:45AM BLOOD ALT-22 AST-14 AlkPhos-159* Amylase-37 87 | TotBili-0.7 88 | [**2573-6-29**] 12:45AM BLOOD Lipase-37 89 | [**2573-7-1**] 03:01AM BLOOD Calcium-8.4 Phos-3.0 Mg-1.8 90 | [**2573-7-1**] 05:25AM BLOOD Vanco-17.8* 91 | 92 | Ultrasound [**5-30**]: IMPRESSION: 1. Dilated common bile duct with 93 | mild intrahepatic biliary ductal dilatation and dilataion of the 94 | pancreatic duct. 2. Edematous gallbladder wall. 95 | 96 | ERCP [**5-31**]: There was bulging of the major pailla suggestive of an 97 | impacted stone. A stone causing partial obstruction was seen in 98 | the distal CBD. 99 | There was dilation of the CBD above the stone however accurate 100 | radiographic evaluation could not be obtained due to the use of 101 | the C-arm in the ICU. A sphincterotomy was performed in the 12 102 | o'clock position using a sphincterotome over an existing 103 | guidewire. A 15mm balloon was used to sweep the duct multiple 104 | times wich successfully extracted stones, sludge and a large 105 | amount of purulent material. 106 | 107 | 108 | Brief Hospital Course: 109 | Ms. [**Known patient lastname 2004**] was admitted on [**2573-5-30**]. Ultrasound at the time of 110 | admission demonstrated pancreatic duct dilitation and an 111 | edematous gallbladder. She was admitted to the ICU. [**5-31**] she 112 | underwent ERCP w/ sphincterotomy, an impacted stone was removed. 113 | She has had a prolonged ICU course. Review of hospital course by 114 | system includes. 115 | Neuro: Neurology was consutled on [**6-11**] for mental status 116 | changes. The team believed the patient's MS [**First Name (Titles) 1935**] [**Last Name (Titles) 2005**] from 117 | her overall metabolic and infectious conditions. A head CT was 118 | performed which was negative. A lumbar puncture was also done 119 | which too was negative. 120 | 121 | Cardiovascular: The cardiology team was consulted on [**6-1**] and a 122 | TTE was obtained showing decreased biventricular systolic 123 | dysfunction representing a diffuse process. On [**6-14**] a repeat TTE 124 | followed by a TEE was done to rule out endocarditis; no 125 | vegatation or abscess was seen. A TEE was again performed on 126 | [**6-22**] there was no significant change from the prior study. The 127 | patient was treated with amiodarone to control her atrial 128 | fibrillation. 129 | 130 | Pulmonary: Patient was intubated on admission and transferred to 131 | the ICU. Patient was initially extubated on hospital day six. 132 | The patient was re-intubated on [**6-12**] for hypercarbic respiratory 133 | failure and airway protection. The patient was extubated again 134 | on [**6-18**] and re-intubated on [**6-19**] for respiratory decompensation. 135 | The patient ultimately underwent a tracheostomy on [**6-24**]. The 136 | patient tolerated a trach mask on [**6-25**]. Bronchoscopy on [**6-26**] 137 | with suctioning of bronchial plugs. CT chest on [**6-27**] showed 138 | collapse of the left lung with left sided pleural effusion. Left 139 | sided thoracentesis was done on [**6-27**] to remove fluid with hope 140 | of re-expanding left lung. 141 | 142 | GI: Patient was admitted with a diagnosis of gallstone 143 | pancreatitis, she underwent ERCP w/ sphincterotomy on [**5-31**]. Her 144 | tube feeds were started due anticipation of a prolonged period 145 | without orally based enteral nutrition. [**6-9**] patient had a CT 146 | scan of the abdomen,it showed no gallstones or abscess. A 147 | post-pyloric dobhoff was placed on [**6-14**]. The patient's caloric 148 | intake was maintained by a combination of TPN and tube feeds. 149 | Each nutritional replacment was employed at different times 150 | independently of the other based on the patient's tolerance for 151 | tube feeds or TPN. 152 | 153 | FEN: Patient was dehydrated, with hypovolemia and treated with 154 | aggressive fluid hydration upon admission to the hospital. Tube 155 | feeds were held on [**6-30**] due to high residuals. Currently the 156 | patient is not on TPN, while nutrition more recently has been 157 | maintained with tube feeds. 158 | 159 | Renal: Foley in place to monitor urine output. 160 | 161 | Heme: Blood loss and anemia in the unit requiring multiple 162 | transfusions. Currently, hematocrit is stable. 163 | 164 | ID: Consult was obtained on [**6-12**] the ID team continued to follow 165 | the patient throughout her entire hospital stay. The patient had 166 | multiple episodes of fever and cultures which were positive for 167 | the following organisms. [**6-26**] Blood: MRSA // [**6-23**] Sputum: MRSA, 168 | Klebsiella // [**6-22**] Sputum: MRSA, Klebsiella // 23: Bld/Tip- pend 169 | // [**6-19**] Blood: [**Female First Name (un) **] // [**6-19**]: urine - neg // [**6-18**] Blood: 170 | [**Female First Name (un) **] // [**6-16**] Blood: [**Female First Name (un) **] // [**6-14**] Blood: [**Female First Name (un) **] // [**6-13**] 171 | Blood: [**Female First Name (un) **] // [**6-12**] Bladder swab: Enterococcus, [**Female First Name (un) **], 172 | Staph coag Pos, GNR, Staph Coag Neg // [**6-12**] Blood: [**Female First Name (un) **] // 173 | [**6-11**] Cath tip: [**Female First Name (un) **] // [**6-11**] Blood: [**Female First Name (un) **] // [**6-11**] Urine: 174 | Enterococcus, Yeast // [**6-9**] Cath tip: [**Female First Name (un) **] // Urine [**5-30**] 175 | KLEBSIELLA PNEUMONIAE, Viridans // Blood 5/01 KLEBSIELLA 176 | PNEUMONIAE, Corynybacterium 177 | Additionally the patient had multiple line changes secondary to 178 | spiking temperatures and positive cultures.The patient was 179 | treated with multiple antibiotics during her hospital course and 180 | at the time of discharge he was being treated with Ambisome, 181 | Caspofungin and Vancomycin. Please continue Ambisome and 182 | caspofungin until [**7-6**], Vancomycin should be continued until 183 | [**7-12**]. 184 | 185 | Endo: The patient has been maintained on an insulin sliding 186 | scale through the duration of her hospital course. 187 | 188 | Consults: The team orderd a pysch consult on the patient on [**6-7**] 189 | becuse the patient appeared to be confused. An opthomology 190 | consult was ordered to rule out fungally related eye infection. 191 | The patient was seen and evaluated by optho; they deemed that 192 | there was no eye infection. 193 | 194 | Hospital Procedures while in the SICU 195 | PICC line [**6-25**] 196 | Mulitple bronchoscopies 197 | Left thoracocentesis [**6-27**] 198 | Transesophageal echo [**6-22**]: normal 199 | ERCP with sphincterotomy [**5-31**] 200 | Tracheostomy [**6-24**] 201 | 202 | 203 | Medications on Admission: 204 | ASA 325mg daily 205 | buspirone 5mg TID 206 | colace 100mg BID 207 | lasix 20mg daily 208 | lipitor 10mg daily 209 | lisinopril 20mg daily 210 | neurontin 100mg BID 211 | omeprazole 20mg daily 212 | roxicet prn 213 | zinc 220mg daily 214 | vit C 215 | 216 | Discharge Medications: 217 | 1. Miconazole Nitrate 2 % Powder Sig: One (1) Appl Topical BID 218 | (2 times a day) as needed. 219 | 2. Heparin Sodium (Porcine) 5,000 unit/mL Solution Sig: One (1) 220 | Injection TID (3 times a day). 221 | 3. Acetaminophen 160 mg/5 mL Elixir Sig: One (1) PO Q4-6H 222 | (every 4 to 6 hours) as needed. 223 | 4. Terbinafine HCl 1 % Cream Sig: One (1) Appl Topical BID (2 224 | times a day). 225 | 5. Insulin Regular Human 100 unit/mL Solution Sig: One (1) 226 | Injection ASDIR (AS DIRECTED). 227 | 6. Albuterol-Ipratropium 103-18 mcg/Actuation Aerosol Sig: [**1-31**] 228 | Puffs Inhalation Q6H (every 6 hours) as needed. 229 | 7. Lansoprazole 30 mg Capsule, Delayed Release(E.C.) Sig: One 230 | (1) Capsule, Delayed Release(E.C.) PO DAILY (Daily). 231 | 8. Metoprolol Tartrate 50 mg Tablet Sig: 1.5 Tablets PO TID (3 232 | times a day). 233 | 9. Sertraline HCl 50 mg Tablet Sig: One (1) Tablet PO DAILY 234 | (Daily). 235 | 10. Amiodarone HCl 200 mg Tablet Sig: Two (2) Tablet PO BID (2 236 | times a day). 237 | 11. Hydralazine HCl 10 mg IV Q6 PRN 238 | 12. Fentanyl Citrate 25-50 mcg IV Q2H:PRN 239 | 13. Caspofungin 50 mg IV Q24H 240 | 14. Ambisome 300 mg IV Q24H 241 | 15. Furosemide 40 mg IV BID 242 | 16. Vancomycin HCl 1000 mg IV Q24H 243 | vanco level 17 244 | 17. Dolasetron Mesylate 12.5 mg IV Q4H:PRN 245 | 246 | 247 | Discharge Disposition: 248 | Extended Care 249 | 250 | Facility: 251 | [**Hospital3 1446**] & Rehab Center - [**Hospital1 370**] 252 | 253 | Discharge Diagnosis: 254 | Atrial Fibrillation 255 | Pancreatitis 256 | HTN 257 | hyperlipidemia 258 | h/o aspiration respiratory distress 259 | bacteremia ([**Female First Name (un) 929**]) 260 | UTI (klebsiella) 261 | 262 | 263 | Discharge Condition: 264 | Good 265 | 266 | Discharge Instructions: 267 | Patient may shower. Please call your surgeon or return to the 268 | emergency room if [**Doctor First Name **] experience fever >101.5, nausea, vomiting, 269 | abdominal pain, shortness of breath, abdominal pain or any 270 | significant change in your medical condition. Ambisome and 271 | caspofungin should be continued til [**7-6**] while vanco should be 272 | continued til [**7-12**]. 273 | 274 | Followup Instructions: 275 | Please follow up with Dr. [**Last Name (STitle) **] in 2 weeks. Upon discharge 276 | please call Dr.[**Initials (NamePattern4) 1895**] [**Last Name (NamePattern4) 2006**] in order to schedule your 277 | follow up appointment.([**Telephone/Fax (1) 2007**] 278 | 279 | Provider: [**Name10 (NameIs) 296**] [**Last Name (NamePattern4) 340**], M.D. [**MD Number 60**]: [**Hospital6 64**] 280 | [**Hospital3 2008**] CENTER Phone:[**Telephone/Fax (1) 341**] Date/Time:[**2573-11-24**] 9:15 281 | 282 | Provider: [**First Name11 (Name Pattern1) **] [**Last Name (NamePattern1) 2009**], MD [**MD Number 60**]: [**Hospital6 64**] 283 | HEMATOLOGY/ONCOLOGY Phone:[**Telephone/Fax (1) 936**] Date/Time:[**2574-5-11**] 10:00 284 | 285 | 286 | 287 | 288 | 289 | [**First Name11 (Name Pattern1) **] [**Last Name (NamePattern4) **] MD [**MD Number 1896**] 290 | 291 | Completed by: [**First Name11 (Name Pattern1) 2010**] [**Last Name (NamePattern1) 2011**] MD [**MD Number 2012**] [**2573-7-1**] @ 1404 292 | Signed electronically by: DR. [**First Name8 (NamePattern2) **] [**Last Name (NamePattern1) **] 293 | on: FRI [**2573-7-2**] 8:03 AM 294 | (End of Report) 295 | -------------------------------------------------------------------------------- /notebooks/with_compile_flags.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from add_parent_path import add_parent_path\n", 10 | "\n", 11 | "with add_parent_path(1):\n", 12 | " from clinical_sectionizer import TextSectionizer" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 14, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "text = \"\"\"\n", 22 | " FINDINGS: Compared to the prior days study, there is stable appearance of the\n", 23 | " right parietal intraparenchymal hemorrhage with surrounding edema. At the\n", 24 | " superior margin of the parenchymal hemorrhage there is a rounded heterogeneous\n", 25 | " focus which could represent a metastatic lesion. An additional 2mm hyperdense\n", 26 | " focus, possibly hemorrhage, is noted in the posteromedial margin of the left\n", 27 | " thalamus, with surroundng edema. Low-attenuation foci seen in both basal\n", 28 | " ganglia and insular regions are consistent with chronic lacunar infarcts.\n", 29 | " There is no shift of midline structures. The ventricles are stable in\n", 30 | " appearance. The osseous and soft tissue structures are unremarkable.\n", 31 | " \n", 32 | " IMPRESSION: Stable appearance of right parietal lobe and left thalamic\n", 33 | " hemorrhages, which are concerning for hemorrhagic metastasis in this patient\n", 34 | " with known metastatic lung carcinoma to the brain.\"\"\"" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "#### By default section detection is done ignoring case (`re.I`)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 23, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "sectionizer = TextSectionizer(patterns=None)\n", 51 | "\n", 52 | "rad_patterns = [{'section_title': 'impression', \n", 53 | " 'pattern':'impression:'},]\n", 54 | "\n", 55 | "sectionizer.add(rad_patterns)\n", 56 | "sections = sectionizer(text)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "#### Text is split into two parts: before and after section" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 25, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [ 74 | "2" 75 | ] 76 | }, 77 | "execution_count": 25, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "len(sections)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 26, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "text/plain": [ 94 | "('impression',\n", 95 | " 'IMPRESSION:',\n", 96 | " 'IMPRESSION: Stable appearance of right parietal lobe and left thalamic\\n hemorrhages, which are concerning for hemorrhagic metastasis in this patient\\n with known metastatic lung carcinoma to the brain.')" 97 | ] 98 | }, 99 | "execution_count": 26, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "sections[1]" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "#### Create regular expression without `re.I` flag" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 27, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "sectionizer = TextSectionizer(patterns=None)\n", 122 | "\n", 123 | "rad_patterns = [{'section_title': 'impression', \n", 124 | " 'pattern':'impression:'},]\n", 125 | "\n", 126 | "sectionizer.add(rad_patterns, cflags=[])\n", 127 | "sections = sectionizer(text)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "#### Section not detected\n", 135 | "\n", 136 | "Only one section in result" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 29, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/plain": [ 147 | "1" 148 | ] 149 | }, 150 | "execution_count": 29, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "len(sections)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "Python 3", 170 | "language": "python", 171 | "name": "python3" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.7.3" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 4 188 | } 189 | -------------------------------------------------------------------------------- /resources/patrick_section_patterns.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "pmh": 4 | [ 5 | "(past )?medica\\s+(history|hx):", 6 | "\\bmhx?:", 7 | "\\bmh?:", 8 | "pohx:", 9 | "pmh?:", 10 | "past\\s*history:", 11 | "history:", 12 | "p[hm]+\\s*computerized\\s+problem\\s+list:", 13 | "significant\\smedical\\shx:", 14 | "past\\smedical\\shistory\\s?\\/\\s?problem list:", 15 | "Past surgical history:", 16 | "patient_history:", 17 | "patient history:", 18 | "pt history:", 19 | "history/physical examination:", 20 | "history physical examination:", 21 | "clinical history:", 22 | "in clinical history:", 23 | "clinical history/indications:", 24 | "clinical history indication:", 25 | "issues briefly as following:", 26 | "issue briefly as following:", 27 | "history:", 28 | "current medical problems:", 29 | "current medical problem:", 30 | "history of chronic illness:", 31 | "history chronic illness:", 32 | "clinical presentation:", 33 | "issues briefly as follows:", 34 | "issue briefly as follow:", 35 | "interval history:", 36 | "past medical history and review of systems:", 37 | "past medical history and review of system:", 38 | "past medical history review system:", 39 | "past medical problems:", 40 | "past medical problem:", 41 | "history of past illness:", 42 | "history past illness:", 43 | "past medical history:", 44 | "previous medical history:", 45 | "hematology/oncology history:", 46 | "hematology oncology history:", 47 | "history of general health:", 48 | "history general health:", 49 | "past medical history/past surgical history:", 50 | "past medical history past surgical history:", 51 | "medical problems:", 52 | "medical problem:", 53 | "significant past medical history:", 54 | "history of major illnesses and injuries:", 55 | "history of major illness and injury:", 56 | "history major illness injury:", 57 | "past med history:", 58 | "past hospitalization history:", 59 | "medical history:", 60 | "past medical and surgical history:", 61 | "past medical surgical history:", 62 | "brief medical history:", 63 | "Past Medical History/Problem List:", 64 | "past medical history problem list:", 65 | "past medical issues:", 66 | "past medical issue:", 67 | "past_medical_history:", 68 | "past medical history/surgical history:", 69 | "past medical history surgical history:", 70 | "past infectious history:", 71 | "past medical history/family history:", 72 | "past medical history family history:", 73 | "Known Significant Medical Diagnoses and Conditions:", 74 | "past history:", 75 | "past medical history and physical examination:", 76 | "past medical history physical examination:", 77 | "past medical history/physical examination:", 78 | "past_medical_history_and_physical_examination:", 79 | "Historical\\s*data:?" 80 | ], 81 | 82 | "family_history": [ 83 | "family history:" 84 | ], 85 | 86 | "problem_list": [ 87 | "active problem list:", 88 | "chronic\\s+stable\\s+problems:", 89 | "PROBLEMS ?\\- ACTIVE:", 90 | "problem\\s*list" 91 | 92 | ], 93 | 94 | "sexual and social history": 95 | [ 96 | "SH:", 97 | "social history:", 98 | "PSHx:", 99 | "soc\\s*hx:", 100 | "mh/pshx:", 101 | "sexual history", 102 | "pmh[sx]:", 103 | "pmhx\\/pshx:", 104 | "sexual history:", 105 | "LGBTQ SCREENING NWI:" 106 | 107 | ], 108 | 109 | 110 | "HIV screening": [ 111 | "HIV:", 112 | "HIV Screening:", 113 | "HIV Risk:" 114 | 115 | ], 116 | 117 | "Observation_and_plan": [ 118 | "ADDITIONAL ASSESSMENT:", 119 | "MEDICAL\\s*DECISION\\s*MAKING/PLAN:", 120 | "ASS:", 121 | "ASSESMENT:", 122 | "ASSESS:", 123 | "ASSESSMENT:", 124 | "Assessment\\s*.\\s*Plan:", 125 | "CLINICAL IMPRESSION:", 126 | "CLINICAL IMPRESSIONS:", 127 | "\bimp:", 128 | "Imp:", 129 | "IMPRESSION AND RECOMMENDATION:", 130 | "IMPRESSION AND RECOMMENDATIONS:", 131 | "IMPRESSION RECOMMENDATION:", 132 | "IMPRESSION SECTION:", 133 | "IMPRESSION:", 134 | "IMPRESSION/ASSESSMENT:", 135 | "IMPRESSIONS:", 136 | "IMPRESSIONS/ASSESSMENT:", 137 | "IMPRESSSION:", 138 | "IMPRESSSIONS:", 139 | "INITAL IMPRESSION:", 140 | "INITIAL ASSESSMENT:", 141 | "INITIAL IMPRESSION:", 142 | "INITIAL IMPRESSION/ASSESSMENT:", 143 | "Recommendations:", 144 | "\\bA/P:", 145 | "\\bA/P:", 146 | "\\bA:", 147 | "\\r(\\n)? *ASSESSMENT AND PLAN:", 148 | "\\r(\\n)? *Impression:", 149 | "assessment and plan:", 150 | "assessment and recommendation:", 151 | "assessment and recommendations:", 152 | "assessment plan:", 153 | "assessment recommendation:", 154 | "assessment:", 155 | "assessment/plan:", 156 | "assessment:", 157 | "assessment_and_plan:", 158 | "clinical comment:", 159 | "clinical comments:", 160 | "clinical impression:", 161 | "discharge diagnoses:", 162 | "diagnoses:", 163 | "discharge diagnosis:", 164 | "diagnosis:", 165 | "diagnosis:", 166 | "impression and plan:", 167 | "impression and plans:", 168 | "impression and recommendation:", 169 | "impression and recommendations:", 170 | "impression plan:", 171 | "impression recommendation:", 172 | "impression(:|-|\\*)", 173 | "impresion:", 174 | "impression/plan:", 175 | "impression/recommendations:", 176 | "initial impression:", 177 | "interpretation:", 178 | "objective:", 179 | "plan and discussion:", 180 | "plan discussion:", 181 | "medical decision making:", 182 | 183 | "\\bplan:" 184 | 185 | 186 | ], 187 | 188 | 189 | "medication": [ 190 | "ACTIVE\\s*INPATIENT\\s*AND\\s*OUTPATIENT\\s*MEDICATIONS:", 191 | "ACTIVE\\s*MEDICATIONS:", 192 | "ACTIVE\\s*MEDICATIONS\\s*COMBINED:", 193 | "ACTIVE\\s*MEDICATIONS\\s*INCLUDE:", 194 | "ACTIVE\\s*MEDICATIONS\\s*LIST:", 195 | "ACTIVE\\s*MEDICATIONS\\s*PRESCRIBED\\s*AT\\s*SAGINAW\\s*VAMC:", 196 | "ACTIVE\\s*MEDICATIONS\\s*PRESCRIBED\\s*AT\\s*THE\\s*SAGINAW\\s*VAMC:", 197 | "ACTIVE\\s*NON-VA\\s*MEDICATIONS:", 198 | "ACTIVE\\s*NONVA\\s*MEDICATIONS:", 199 | "ACTIVE\\s*NON\\s*VA\\s*MEDICATIONS:", 200 | "ACTIVE\\s*OPT\\s*MEDICATIONS:", 201 | "ACTIVE\\s*OUTPATIENT\\s*MEDICATIONS:", 202 | "ACTIVE\\s*OUTPATIENT\\s*PRESCRIPTIONS:", 203 | "ACTIVE\\s*VA\\s*MEDICATIONS:", 204 | "ACTIVE\\s*\\s*MEDICATIONS:", 205 | "ADMISSION\\s*MEDICATIONS:", 206 | "ALL\\s*ACTIVE\\s*MEDICATIONS:", 207 | "Active\\s*Inpatient\\s*Medications\\s*\$including\\s*supplies\$:", 208 | "Active\\s*Inpatient\\s*Medications\\s*drug\\s*dosage:", 209 | "Active\\s*Inpatient\\s*Medications\\s*status:", 210 | "Active\\s*Medications\\s*\$including\\s*supplies\$:", 211 | "Active\\s*Medications\\s*from\\s*Remote\\s*Data:", 212 | "Active\\s*Outpatient\\s*Medications\\s*\$including\\s*supplies\$:", 213 | "Active\\s*medications:", 214 | "Active\\s*medications?\\s*prior\\s*to\\s*admission:", 215 | "CORRECT\\s*MEDICATIONS\\s*INCLUDE:", 216 | "CURRENT\\s*INPATIENT\\s*MEDICATIONS:", 217 | "CURRENT\\s*INPATIENT\\s*MEDICATIONS\\s*INCLUDE:", 218 | "CURRENT\\s*MEDICATIONS:", 219 | "CURRENT\\s*MEDICATIONS/RECONCILIATION:", 220 | "CURRENT\\s*MEDICATIONS\\s*LIST:", 221 | "DISCHARGE\\s*MEDICATIONS:", 222 | "DRUGS:", 223 | "HEALTH\\s*SUPPLEMENTS:", 224 | "HISTORY/MEDICATIONS:", 225 | "HISTORY\\s*OF\\s*MEDICATION\\s*TREATMENTS:", 226 | "HISTORY\\s*OF\\s*MEDICATION\\s*USE:", 227 | "Home\\s*medications:", 228 | "INACTIVE\\s*OUTPATIENT\\s*MEDICATIONS:", 229 | "INHOSPITAL\\s*MEDICATIONS:", 230 | "INPATIENT\\s*MEDICATIONS:", 231 | "INPATIENT\\s*MEDICATION\\s*RECONCILIATION:", 232 | "INPT\\s*MEDICATIONS:", 233 | "Inpatient\\s*medications?:", 234 | "Inpatient\\s*medications?\\s*=:", 235 | "MEDICATION(S)\\s*REVIEW:", 236 | "MEDICATIONS:", 237 | "MEDICATIONS\\s*AT\\s*ADMISSION:", 238 | "MEDICATIONS\\s*AT\\s*DISCHARGE:", 239 | "MEDICATIONS\\s*DURING\\s*ADMISSION:", 240 | "MEDICATIONS\\s*GIVEN\\s*TODAY:", 241 | "MEDICATIONS\\s*ON\\s*ADMISSION:", 242 | "MEDICATIONS\\s*ON\\s*DISCHARGE:", 243 | "MEDICATIONS\\s*PRIOR\\s*TO\\s*ADMISSION:", 244 | "MEDICATION\\s*ADMISSION:", 245 | "MEDICATION\\s*AT\\s*ADMISSION:", 246 | "MEDICATION\\s*AT\\s*DISCHARGE:", 247 | "MEDICATION\\s*DURING\\s*ADMISSION:", 248 | "MEDICATION\\s*HISTORY:", 249 | "MEDICATION\\s*MANAGEMENT\\s*AT\\s*DISCHARGE:", 250 | "MEDICATION\\s*ON\\s*ADMISSION:", 251 | "MEDICATION\\s*PRIOR\\s*ADMISSION:", 252 | "MEDICATION\\s*PRIOR\\s*TO\\s*ADMISSION:", 253 | "MEDICATION\\s*RECONCILIATION:", 254 | "MEDICATION\\s*RECONCILIATION\\s*REVIEW:", 255 | "MEDICATION\\s*RECONCILIATION\\s*SUMMARY:", 256 | "MEDICATION\\s*RECONCILLIATION:", 257 | "MEDICATION\\s*REVIEW\\s*for\\s*MEDICATION\\s*RECONCILIATION:", 258 | "MEDICINES\\s*AT\\s*PHARMACY:", 259 | "MED\\s*RECON:", 260 | "MED\\s*RECONCILIATION:", 261 | "MED\\s*RECONCILIATION\\s*OUTPT:", 262 | "MISUSE\\s*OF\\s*MEDICATIONS:", 263 | "M\\s*E\\s*D\\s*I\\s*C\\s*A\\s*T\\s*I\\s*O\\s*N\\s*S:", 264 | "NON-VA\\s*MEDICATIONS:", 265 | "NON-VA\\s*PRESCRIBED:", 266 | "NON-VA\\s*PRESCRIPTIONS:", 267 | "NON-VA\\s*PRESCRIPTION\\s*MEDICATIONS:", 268 | "NON-VA\\s*SUPPLIED\\s*MEDICATIONS:", 269 | "NONVA\\s*MEDICATIONS:", 270 | "NONVA\\s*MEDICATIONS\\s*LIST:", 271 | "NON\\s*VA:", 272 | "NON\\s*VA\\s*MEDICATIONS:", 273 | "NON\\s*VA\\s*PRESCRIBED:", 274 | "NON\\s*VA\\s*PRESCRIPTIONS:", 275 | "NON\\s*VA\\s*PRESCRIPTION\\s*MEDICATIONS:", 276 | "NON\\s*VA\\s*SUPPLIED\\s*MEDICATIONS:", 277 | "Non-VA\\s*medications:", 278 | "OUTPATIENT\\s*MEDICATIONS:", 279 | "OUTPATIENT\\s*MEDICATION\\s*REVIEW:", 280 | "OUTPT.\\s*MEDICATION\\s*RECONCILIATION:", 281 | "OUTPT\\s*MEDICATIONS:", 282 | "Outpatient\\s*medications:", 283 | "Outpatient\\s*medications\\s*status:", 284 | "Outpatient\\s*meds\\s*DRUG\\s*List:", 285 | "PENDING\\s*INPATIENT\\s*MEDICATIONS:", 286 | "PRE-ADMISSION\\s*MEDICATIONS:", 287 | "PRE-VISIT\\s*MED\\s*RECONCILIATION:", 288 | "PREADMISSION\\s*MEDICATION:", 289 | "PRESENT\\s*MEDICATIONS:", 290 | "PROVIDER\\s*MED\\s*RECONCILIATION:", 291 | "PTA\\s*Meds:", 292 | "RECONCILED\\s*MEDICATION\\s*LIST:", 293 | "RECONCILIATION:", 294 | "RECONCILIATION\\s*OF\\s*MEDICATIONS\\s*COMPLETED:", 295 | "SIGNIFICANT\\s*MEDICATIONS:", 296 | "SUBSTANCE\\s*USE/MISUSE\\s*OF\\s*MEDICATIONS:", 297 | "Status\\s*Active:", 298 | "VA\\s*MEDICATIONS:", 299 | "\\r(\\n)? *Active Outpatient Medications:", 300 | "\\r(\\n)? *meds:", 301 | "\\r(\\n)?\\s*MEDICATIONS:", 302 | "summary\\s*of\\s*medications", 303 | "MEDICATIONS \$as listed in Vista\$:", 304 | "OTC OR NON-VA PRESCRIPTION MEDICATIONS:", 305 | "Active\\s*Outpatient\\s*Medications\\s*\$including Supplies\$:" 306 | ], 307 | 308 | "allergy": [ 309 | 310 | "A L L E R G I E S:", 311 | "ADDITIONAL ADRS AND/OR ALLERGIES:", 312 | "ADR:", 313 | "ADVERSE DRUG REACTIONS:", 314 | "ADVERSE EVENTS:", 315 | "ADVERSE REACTION:", 316 | "ADVERSE REACTIONS:", 317 | "ALLERGIC DISORDER HISTORY:", 318 | "ALLERGIC REACTIONS:", 319 | "ALLERGIC:", 320 | "ALLERGIES AND ADVERSE REACTIONS:", 321 | "ALLERGIES AND SENSITIVITIES:", 322 | "ALLERGIES FAMILY HISTORY:", 323 | "ALLERGIES REVIEWED:", 324 | "ALLERGIES TO MEDICATIONS:", 325 | "ALLERGIES/ADVERSE REACTIONS:", 326 | "ALLERGIES/REACTIONS:", 327 | "ALLERGIES:", 328 | "ALLERGY ADVERSE REACTION:", 329 | "ALLERGY ENVIRONMENTAL ALLERGEN:", 330 | "ALLERGY FAMILY HISTORY:", 331 | "ALLERGY INFORMATION:", 332 | "ALLERGY REVIEW:", 333 | "ALLERGY SCREENING:", 334 | "ALLERGY SYMPTOM:", 335 | "ALLERGY SYMPTOMS:", 336 | "ALLERGY TO ENVIRONMENTAL ALLERGEN:", 337 | "ALLERGY TO ENVIRONMENTAL ALLERGENS:", 338 | "ALLERGY/ADVERSE DRUG REACTION HISTORY:", 339 | "ALLERGY/ADVERSE DRUG REACTION INFORMATION:", 340 | "ALLERGY/ADVERSE DRUG REACTION:", 341 | "ALLERGY:", 342 | "CONCOMITANT MEDICATIONS:", 343 | "CURRENT ALLERGIES:", 344 | "DRUG ALLERGIC REACTIONS:", 345 | "DRUG ALLERGIES:", 346 | "DRUG SENSITIVITIES:", 347 | "FOOD & DRUG REACTIONS INCLUDING ALLERGIES AS ENTERED IN CPRS:", 348 | "FOOD ALLERGIES:", 349 | "HISTORY ALLERGY:", 350 | "HISTORY OF ALLERGIES:", 351 | "KNOWN ALLERGIES:", 352 | "LATEX ALLERGY:", 353 | "MEDICATIONS ALLERGIES:", 354 | "NEW ALLERGIES:", 355 | "NEWLY IDENTIFIED ALLERGIES:", 356 | "OTHER ALLERGIES:", 357 | "PREVIOUSLY DOCUMENTED ALLERGIES:", 358 | "SEASONAL ALLERGIES:", 359 | "SEASONAL ALLERGY:", 360 | "SENSITIVITIES:", 361 | "\\r\\n *\\d+ *\\).? *allergies:", 362 | "\\r\\n *all:", 363 | "\\r\\n *allergies:", 364 | "\\r\\n *allergy:", 365 | "allergies/adr:", 366 | "allergies:", 367 | "allergy", 368 | 369 | "ALLERGIES AS DISPLAYED IN VISTA:", 370 | "Allergy/Other Non-VA or VA Meds:", 371 | "ALLERGY ASSESSMENT:" 372 | ], 373 | "Chief complaint":[ 374 | "CHIEF COMPLAINT:" 375 | ], 376 | 377 | 378 | "Physical Exam": [ 379 | "\\r(\\n)? *Physical Exam\\w*?:", 380 | "\\r(\\n)? *Review of systems:", 381 | "PHYSICAL EXAMINATION", 382 | "PE:", 383 | "exam:" 384 | ], 385 | 386 | "ED_Course": [ 387 | "ED\\s*COURSE:", 388 | "Er\\s*COURSE:", 389 | "Emergency\\s*Department\\s*Course" 390 | 391 | ], 392 | 393 | 394 | "labs_and_studies": [ 395 | "\\r(\\n)? *findings *:", 396 | "\\r(\\n)? *LABORATORY DATA:", 397 | "\\r(\\n)? *operation and findings *:", 398 | "\\r(\\n)? *operative findings *:", 399 | "\\r(\\n)? *pathologic staging *:", 400 | "\\r(\\n)? *pathology report *:", 401 | "\\r(\\n)? *performing lab\\b *:", 402 | "\\r(\\n)? *performing laboratory *(| *\r(\n)?):", 403 | "\\r(\\n)? *reporting lab *:", 404 | "\\bo:", 405 | "objective:", 406 | "s/o:", 407 | "indication:", 408 | "clinical indication:", 409 | "indication:", 410 | "indications:", 411 | "\\r(\\n)? *micro *:", 412 | "\\r(\\n)? *micro exam *", 413 | "labs:" 414 | 415 | ], 416 | 417 | "Present Illness": [ 418 | "hpi/interval history:", 419 | "hpi interval history:", 420 | "patient hpi:", 421 | "present illness:", 422 | "history_present_illness:", 423 | "history of the present illness:", 424 | "history of present illness:", 425 | "history present illness:", 426 | "summary of present illness:", 427 | "summary present illness" 428 | ], 429 | 430 | 431 | "other": [ 432 | "\\r(\\n)? *A signed copy of this report:", 433 | "\\r(\\n)? *modified report *:", 434 | "\\r(\\n)? *note *:", 435 | "\\r(\\n)? *postoperative diagnosis *(| *\r(\n)?):", 436 | "\\r(\\n)? *preoperative diagnosis *(| *\r(\n)?):", 437 | "\\r(\\n)? *procedure *:", 438 | "\\r(\\n)? *rectal mass *:", 439 | "\\r(\\n)? *regional lymph nodes *:", 440 | "\\r(\\n)? *result *:", 441 | "\\r(\\n)? *smw *:", 442 | "\\r(\\n)? *\\bsp\\b *:", 443 | "\\r(\\n)? *submitted *:", 444 | "\\r(\\n)? *summary of section *:", 445 | "\\r(\\n)? *supplementary report *:", 446 | "\\r(\\n)? *supplementary report\$s\$ *:", 447 | "\\r(\\n)? *synoptic report for colon rectum *:", 448 | "\\r(\\n)? *test performed at *:", 449 | "\\r(\\n)? *tumor synopsis *:", 450 | "Alcohol Screen (AUDIT-C):", 451 | "/es/:", 452 | "medications\\s*held\\s*or\\s*discontinued\\s*upon\\s*admission:", 453 | "changes/additions:", 454 | "possible risks or complications include" 455 | ] 456 | } 457 | 458 | 459 | 460 | 461 | 462 | 463 | -------------------------------------------------------------------------------- /resources/spacy_section_patterns.jsonl: -------------------------------------------------------------------------------- 1 | {"section_title": "addendum", "pattern": "ADDENDUM:"} 2 | {"section_title": "addendum", "pattern": "Addendum:"} 3 | {"section_title": "allergies", "pattern": "ALLERGIC REACTIONS:"} 4 | {"section_title": "allergies", "pattern": "ALLERGIES:"} 5 | {"section_title": "chief_complaint", "pattern": "CC:"} 6 | {"section_title": "chief_complaint", "pattern": "CHIEF COMPLAINT:"} 7 | {"section_title": "chief_complaint", "pattern": "Chief Complaint:"} 8 | {"section_title": "comments", "pattern": "COMMENTS:"} 9 | {"section_title": "diagnoses", "pattern": "ADMISSION DIAGNOSES:"} 10 | {"section_title": "diagnoses", "pattern": "DIAGNOSES:"} 11 | {"section_title": "diagnoses", "pattern": "Primary Diagnosis:"} 12 | {"section_title": "diagnoses", "pattern": "Primary:"} 13 | {"section_title": "diagnoses", "pattern": "SECONDARY DIAGNOSES:"} 14 | {"section_title": "diagnoses", "pattern": "Secondary Diagnoses:"} 15 | {"section_title": "diagnoses", "pattern": "Secondary Diagnosis:"} 16 | {"section_title": "diagnoses", "pattern": "Secondary:"} 17 | {"section_title": "family_history", "pattern": "Family History:"} 18 | {"section_title": "family_history", "pattern": "FAMILY HISTORY:"} 19 | {"section_title": "history_of_present_illness", "pattern": "HISTORY OF PRESENT ILLNESS:"} 20 | {"section_title": "hospital_course", "pattern": "Brief Hospital Course:"} 21 | {"section_title": "hospital_course", "pattern": "CONCISE SUMMARY OF HOSPITAL COURSE BY ISSUE/SYSTEM:"} 22 | {"section_title": "hospital_course", "pattern": "HOSPITAL COURSE:"} 23 | {"section_title": "hospital_course", "pattern": "SUMMARY OF HOSPITAL COURSE:"} 24 | {"section_title": "imaging", "pattern": "IMAGING:"} 25 | {"section_title": "imaging", "pattern": "INTERPRETATION:"} 26 | {"section_title": "imaging", "pattern": "Imaging:"} 27 | {"section_title": "imaging", "pattern": "MRI:"} 28 | {"section_title": "imaging", "pattern": "Radiology:"} 29 | {"section_title": "labs_and_studies", "pattern": "ADMISSION LABS:"} 30 | {"section_title": "labs_and_studies", "pattern": "Admission Labs:"} 31 | {"section_title": "labs_and_studies", "pattern": "Discharge Labs:"} 32 | {"section_title": "labs_and_studies", "pattern": "ECHO:"} 33 | {"section_title": "labs_and_studies", "pattern": "FINDINGS:"} 34 | {"section_title": "labs_and_studies", "pattern": "Findings:"} 35 | {"section_title": "labs_and_studies", "pattern": "INDICATION:"} 36 | {"section_title": "labs_and_studies", "pattern": "LABS:"} 37 | {"section_title": "labs_and_studies", "pattern": "Labs:"} 38 | {"section_title": "labs_and_studies", "pattern": "MICRO:"} 39 | {"section_title": "labs_and_studies", "pattern": "Micro:"} 40 | {"section_title": "labs_and_studies", "pattern": "Microbiology:"} 41 | {"section_title": "labs_and_studies", "pattern": "Pertinent Results:"} 42 | {"section_title": "labs_and_studies", "pattern": "STUDIES:"} 43 | {"section_title": "labs_and_studies", "pattern": "Studies:"} 44 | {"section_title": "medications", "pattern": "ACTIVE MEDICATIONS LIST:"} 45 | {"section_title": "medications", "pattern": "ACTIVE MEDICATIONS:"} 46 | {"section_title": "medications", "pattern": "ADMISSION MEDICATIONS:"} 47 | {"section_title": "medications", "pattern": "CURRENT MEDICATIONS:"} 48 | {"section_title": "medications", "pattern": "DISCHARGE MEDICATIONS:"} 49 | {"section_title": "medications", "pattern": "Discharge Medications:"} 50 | {"section_title": "medications", "pattern": "HOME MEDICATIONS:"} 51 | {"section_title": "medications", "pattern": "MEDICATIONS AT HOME:"} 52 | {"section_title": "medications", "pattern": "MEDICATIONS LIST:"} 53 | {"section_title": "medications", "pattern": "MEDICATIONS ON ADMISSION:"} 54 | {"section_title": "medications", "pattern": "MEDICATIONS ON DISCHARGE:"} 55 | {"section_title": "medications", "pattern": "MEDICATIONS ON TRANSFER:"} 56 | {"section_title": "medications", "pattern": "MEDICATIONS PRIOR TO ADMISSION:"} 57 | {"section_title": "medications", "pattern": "MEDICATIONS:"} 58 | {"section_title": "medications", "pattern": "MEDICATIONS:"} 59 | {"section_title": "neurological", "pattern": "Neuro:"} 60 | {"section_title": "observation_and_plan", "pattern": "A/P:"} 61 | {"section_title": "observation_and_plan", "pattern": "ASSESSMENT/PLAN:"} 62 | {"section_title": "observation_and_plan", "pattern": "ASSESSMENT:"} 63 | {"section_title": "observation_and_plan", "pattern": "Assessment/Plan:"} 64 | {"section_title": "observation_and_plan", "pattern": "Clinical Impression:"} 65 | {"section_title": "observation_and_plan", "pattern": "DISCHARGE DIAGNOSES:"} 66 | {"section_title": "observation_and_plan", "pattern": "DISCHARGE DIAGNOSIS:"} 67 | {"section_title": "observation_and_plan", "pattern": "Discharge Condition:"} 68 | {"section_title": "observation_and_plan", "pattern": "Discharge Diagnoses:"} 69 | {"section_title": "observation_and_plan", "pattern": "Discharge Diagnosis:"} 70 | {"section_title": "observation_and_plan", "pattern": "Discharge Disposition:"} 71 | {"section_title": "observation_and_plan", "pattern": "FINAL DIAGNOSES:"} 72 | {"section_title": "observation_and_plan", "pattern": "FINAL DIAGNOSIS:"} 73 | {"section_title": "observation_and_plan", "pattern": "IMPRESSION:"} 74 | {"section_title": "observation_and_plan", "pattern": "Impression and Plan:"} 75 | {"section_title": "observation_and_plan", "pattern": "Impression and Recommendation:"} 76 | {"section_title": "other", "pattern": "Facility:"} 77 | {"section_title": "other", "pattern": "Service:"} 78 | {"section_title": "past_medical_history", "pattern": "Current Medical Problems:"} 79 | {"section_title": "past_medical_history", "pattern": "History of Chronic Illness:"} 80 | {"section_title": "past_medical_history", "pattern": "MHx:"} 81 | {"section_title": "past_medical_history", "pattern": "PAST HISTORY:"} 82 | {"section_title": "past_medical_history", "pattern": "PAST MEDICAL HISTORY:"} 83 | {"section_title": "past_medical_history", "pattern": "PAST MEDICAL Hx:"} 84 | {"section_title": "past_medical_history", "pattern": "PAST SURGICAL HISTORY:"} 85 | {"section_title": "past_medical_history", "pattern": "PMH:"} 86 | {"section_title": "past_medical_history", "pattern": "PMHx:"} 87 | {"section_title": "past_medical_history", "pattern": "Past Medical History:"} 88 | {"section_title": "past_medical_history", "pattern": "UNDERLYING MEDICAL CONDITION:"} 89 | {"section_title": "patient_education", "pattern": "Education:"} 90 | {"section_title": "patient_education", "pattern": "Patient Education:"} 91 | {"section_title": "patient_instructions", "pattern": "DISCHARGE INSTRUCTIONS/FOLLOWUP:"} 92 | {"section_title": "patient_instructions", "pattern": "DISCHARGE INSTRUCTIONS:"} 93 | {"section_title": "patient_instructions", "pattern": "Discharge Instructions:"} 94 | {"section_title": "patient_instructions", "pattern": "Followup Instructions:"} 95 | {"section_title": "physical_exam", "pattern": "PE:"} 96 | {"section_title": "physical_exam", "pattern": "PHYSICAL EXAM:"} 97 | {"section_title": "physical_exam", "pattern": "PHYSICAL EXAMINATION:"} 98 | {"section_title": "physical_exam", "pattern": "Physical Exam:"} 99 | {"section_title": "problem_list", "pattern": "Active Problem List:"} 100 | {"section_title": "problem_list", "pattern": "Current Problems:"} 101 | {"section_title": "problem_list", "pattern": "Medical Problems:"} 102 | {"section_title": "problem_list", "pattern": "PROBLEM LIST:"} 103 | {"section_title": "problem_list", "pattern": "Problem List:"} 104 | {"section_title": "reason_for_examination", "pattern": "REASON FOR THIS EXAMINATION:"} 105 | {"section_title": "signature", "pattern": "Electronic Signature:"} 106 | {"section_title": "signature", "pattern": "Signed electronically by:"} 107 | {"section_title": "social_history", "pattern": "PMHSx:"} 108 | {"section_title": "social_history", "pattern": "PSH:"} 109 | {"section_title": "social_history", "pattern": "SH:"} 110 | {"section_title": "social_history", "pattern": "Sexual History:"} 111 | {"section_title": "social_history", "pattern": "Social History:"} 112 | {"section_title": "social_history", "pattern" : "SOCIAL HISTORY:"} 113 | -------------------------------------------------------------------------------- /resources/text_section_patterns.jsonl: -------------------------------------------------------------------------------- 1 | {"section_title": "past_medical_history", "pattern": "(past )?medical (history|hx)"} 2 | {"section_title": "past_medical_history", "pattern": "mhx?"} 3 | {"section_title": "past_medical_history", "pattern": "mh:"} 4 | {"section_title": "past_medical_history", "pattern": "pmh:"} 5 | {"section_title": "past_medical_history", "pattern": "pohx:"} 6 | {"section_title": "past_medical_history", "pattern": "past history:"} 7 | {"section_title": "past_medical_history", "pattern": "history:"} 8 | {"section_title": "past_medical_history", "pattern": "p[hm]+ computerized problem list:"} 9 | {"section_title": "past_medical_history", "pattern": "significant medical hx:"} 10 | {"section_title": "past_medical_history", "pattern": "past medical"} 11 | {"section_title": "past_medical_history", "pattern": "past medical history/problem list:"} 12 | {"section_title": "past_medical_history", "pattern": "past surgical history:"} 13 | {"section_title": "past_medical_history", "pattern": "patient history:"} 14 | {"section_title": "past_medical_history", "pattern": "pt history:"} 15 | {"section_title": "past_medical_history", "pattern": "history/physical examination:"} 16 | {"section_title": "past_medical_history", "pattern": "history physical examination:"} 17 | {"section_title": "past_medical_history", "pattern": "clinical history:"} 18 | {"section_title": "past_medical_history", "pattern": "clinical history/indications:"} 19 | {"section_title": "past_medical_history", "pattern": "clinical history indication:"} 20 | {"section_title": "past_medical_history", "pattern": "clinical history indication:"} 21 | {"section_title": "past_medical_history", "pattern": "issues briefly as following:"} 22 | {"section_title": "past_medical_history", "pattern": "issue briefly as following:"} 23 | {"section_title": "past_medical_history", "pattern": "history:"} 24 | {"section_title": "past_medical_history", "pattern": "current medical problems:"} 25 | {"section_title": "past_medical_history", "pattern": "history of chronic illness:"} 26 | {"section_title": "past_medical_history", "pattern": "history chronic illness:"} 27 | {"section_title": "past_medical_history", "pattern": "clinical presentation:"} 28 | {"section_title": "past_medical_history", "pattern": "issues briefly as follows:"} 29 | {"section_title": "past_medical_history", "pattern": "issue briefly as follow:"} 30 | {"section_title": "past_medical_history", "pattern": "interval history:"} 31 | {"section_title": "past_medical_history", "pattern": "past medical history and review of system:"} 32 | {"section_title": "past_medical_history", "pattern": "past medical history review system:"} 33 | {"section_title": "past_medical_history", "pattern": "past medical problems:"} 34 | {"section_title": "past_medical_history", "pattern": "past medical problems:"} 35 | {"section_title": "past_medical_history", "pattern": "past medical problem:"} 36 | {"section_title": "past_medical_history", "pattern": "history of past illness:"} 37 | {"section_title": "past_medical_history", "pattern": "history past illness:"} 38 | {"section_title": "past_medical_history", "pattern": "past medical history:"} 39 | {"section_title": "past_medical_history", "pattern": "previous medical history:"} 40 | {"section_title": "past_medical_history", "pattern": "hematology/oncology history:"} 41 | {"section_title": "past_medical_history", "pattern": "hematology oncology history:"} 42 | {"section_title": "past_medical_history", "pattern": "history of general health:"} 43 | {"section_title": "past_medical_history", "pattern": "history general health:"} 44 | {"section_title": "past_medical_history", "pattern": "past medical history/past surgical history:"} 45 | {"section_title": "past_medical_history", "pattern": "past medical history past surgical history:"} 46 | {"section_title": "past_medical_history", "pattern": "medical problems:"} 47 | {"section_title": "past_medical_history", "pattern": "medical problem:"} 48 | {"section_title": "past_medical_history", "pattern": "significant past medical history:"} 49 | {"section_title": "past_medical_history", "pattern": "history of major illnesses and injuries:"} 50 | {"section_title": "past_medical_history", "pattern": "history of major illness and injury:"} 51 | {"section_title": "past_medical_history", "pattern": "history major illness injury:"} 52 | {"section_title": "past_medical_history", "pattern": "past med history:"} 53 | {"section_title": "past_medical_history", "pattern": "past hospitalization history:"} 54 | {"section_title": "past_medical_history", "pattern": "medical history:"} 55 | {"section_title": "past_medical_history", "pattern": "past medical and surgical history:"} 56 | {"section_title": "past_medical_history", "pattern": "past medical surgical history:"} 57 | {"section_title": "past_medical_history", "pattern": "brief medical history:"} 58 | {"section_title": "past_medical_history", "pattern": "Past Medical History/Problem List:"} 59 | {"section_title": "past_medical_history", "pattern": "brief medical history:"} 60 | {"section_title": "past_medical_history", "pattern": "past medical history problem list:"} 61 | {"section_title": "past_medical_history", "pattern": "past medical issues:"} 62 | {"section_title": "past_medical_history", "pattern": "past medical issues:"} 63 | {"section_title": "past_medical_history", "pattern": "past medical issue:"} 64 | {"section_title": "past_medical_history", "pattern": "past_medical_history:"} 65 | {"section_title": "past_medical_history", "pattern": "past medical history/surgical history:"} 66 | {"section_title": "past_medical_history", "pattern": "past medical history surgical history:"} 67 | {"section_title": "past_medical_history", "pattern": "past infectious history:"} 68 | {"section_title": "past_medical_history", "pattern": "past medical history/family history:"} 69 | {"section_title": "past_medical_history", "pattern": "past medical history family history:"} 70 | {"section_title": "past_medical_history", "pattern": "past medical history family history:"} 71 | {"section_title": "past_medical_history", "pattern": "Known Significant Medical Diagnoses and Conditions:"} 72 | {"section_title": "past_medical_history", "pattern": "past history:"} 73 | {"section_title": "past_medical_history", "pattern": "past medical history and physical examination:"} 74 | {"section_title": "past_medical_history", "pattern": "past medical history physical examination:"} 75 | {"section_title": "past_medical_history", "pattern": "past medical history physical examination:"} 76 | {"section_title": "past_medical_history", "pattern": "past medical history/physical examination:"} 77 | {"section_title": "past_medical_history", "pattern": "past_medical_history_and_physical_examination:"} 78 | {"section_title": "past_medical_history", "pattern": "Historical data:"} 79 | {"section_title": "family_history", "pattern": "family history:"} 80 | {"section_title": "problem_list", "pattern": "active problem list:"} 81 | {"section_title": "problem_list", "pattern": "chronic stable problems:"} 82 | {"section_title": "problem_list", "pattern": "PROBLEMS- ACTIVE:"} 83 | {"section_title": "problem_list", "pattern": "problem list"} 84 | {"section_title": "sexual_and_social_history", "pattern": "SH:"} 85 | {"section_title": "sexual_and_social_history", "pattern": "social history:"} 86 | {"section_title": "sexual_and_social_history", "pattern": "PSHx:"} 87 | {"section_title": "sexual_and_social_history", "pattern": "soc hx:"} 88 | {"section_title": "sexual_and_social_history", "pattern": "mh/pshx:"} 89 | {"section_title": "sexual_and_social_history", "pattern": "sexual history"} 90 | {"section_title": "sexual_and_social_history", "pattern": "pmhsx:"} 91 | {"section_title": "sexual_and_social_history", "pattern": "pmhx/pshx:"} 92 | {"section_title": "sexual_and_social_history", "pattern": "sexual history:"} 93 | {"section_title": "sexual_and_social_history", "pattern": "LGBTQ SCREENING NWI:"} 94 | {"section_title": "hiv_screening", "pattern": "HIV:"} 95 | {"section_title": "hiv_screening", "pattern": "hiv_screening:"} 96 | {"section_title": "hiv_screening", "pattern": "HIV Risk:"} 97 | {"section_title": "observation_and_plan", "pattern": "ADDITIONAL ASSESSMENT:"} 98 | {"section_title": "observation_and_plan", "pattern": "MEDICAL DECISION MAKING/PLAN:"} 99 | {"section_title": "observation_and_plan", "pattern": "ASS:"} 100 | {"section_title": "observation_and_plan", "pattern": "ASSESMENT:"} 101 | {"section_title": "observation_and_plan", "pattern": "ASSESS:"} 102 | {"section_title": "observation_and_plan", "pattern": "ASSESSMENT:"} 103 | {"section_title": "observation_and_plan", "pattern": "CLINICAL IMPRESSION:"} 104 | {"section_title": "observation_and_plan", "pattern": "Imp:"} 105 | {"section_title": "observation_and_plan", "pattern": "IMPRESSION AND RECOMMENDATION:"} 106 | {"section_title": "observation_and_plan", "pattern": "IMPRESSION AND RECOMMENDATIONS:"} 107 | {"section_title": "observation_and_plan", "pattern": "IMPRESSION RECOMMENDATION:"} 108 | {"section_title": "observation_and_plan", "pattern": "IMPRESSION SECTION:"} 109 | {"section_title": "observation_and_plan", "pattern": "IMPRESSION:"} 110 | {"section_title": "observation_and_plan", "pattern": "IMPRESSION/ASSESSMENT:"} 111 | {"section_title": "observation_and_plan", "pattern": "IMPRESSIONS:"} 112 | {"section_title": "observation_and_plan", "pattern": "IMPRESSIONS/ASSESSMENT:"} 113 | {"section_title": "observation_and_plan", "pattern": "IMPRESSSION:"} 114 | {"section_title": "observation_and_plan", "pattern": "IMPRESSSIONS:"} 115 | {"section_title": "observation_and_plan", "pattern": "INITAL IMPRESSION:"} 116 | {"section_title": "observation_and_plan", "pattern": "INITIAL ASSESSMENT:"} 117 | {"section_title": "observation_and_plan", "pattern": "INITIAL IMPRESSION:"} 118 | {"section_title": "observation_and_plan", "pattern": "INITIAL IMPRESSION/ASSESSMENT:"} 119 | {"section_title": "observation_and_plan", "pattern": "Recommendations:"} 120 | {"section_title": "observation_and_plan", "pattern": "A/P:"} 121 | {"section_title": "observation_and_plan", "pattern": "A/P:"} 122 | {"section_title": "observation_and_plan", "pattern": "A:"} 123 | {"section_title": "observation_and_plan", "pattern": "ASSESSMENT AND PLAN:"} 124 | {"section_title": "observation_and_plan", "pattern": "Impression:"} 125 | {"section_title": "observation_and_plan", "pattern": "assessment and plan:"} 126 | {"section_title": "observation_and_plan", "pattern": "assessment and recommendation:"} 127 | {"section_title": "observation_and_plan", "pattern": "assessment and recommendations:"} 128 | {"section_title": "observation_and_plan", "pattern": "assessment plan:"} 129 | {"section_title": "observation_and_plan", "pattern": "assessment recommendation:"} 130 | {"section_title": "observation_and_plan", "pattern": "assessment:"} 131 | {"section_title": "observation_and_plan", "pattern": "assessment/plan:"} 132 | {"section_title": "observation_and_plan", "pattern": "assessment:"} 133 | {"section_title": "observation_and_plan", "pattern": "assessment_and_plan:"} 134 | {"section_title": "observation_and_plan", "pattern": "clinical comment:"} 135 | {"section_title": "observation_and_plan", "pattern": "clinical comments:"} 136 | {"section_title": "observation_and_plan", "pattern": "clinical impression:"} 137 | {"section_title": "observation_and_plan", "pattern": "discharge diagnoses:"} 138 | {"section_title": "observation_and_plan", "pattern": "diagnoses:"} 139 | {"section_title": "observation_and_plan", "pattern": "discharge diagnosis:"} 140 | {"section_title": "observation_and_plan", "pattern": "diagnosis:"} 141 | {"section_title": "observation_and_plan", "pattern": "diagnosis:"} 142 | {"section_title": "observation_and_plan", "pattern": "impression and plan:"} 143 | {"section_title": "observation_and_plan", "pattern": "impression and plans:"} 144 | {"section_title": "observation_and_plan", "pattern": "impression and recommendation:"} 145 | {"section_title": "observation_and_plan", "pattern": "impression and recommendations:"} 146 | {"section_title": "observation_and_plan", "pattern": "impression plan:"} 147 | {"section_title": "observation_and_plan", "pattern": "impression recommendation:"} 148 | {"section_title": "observation_and_plan", "pattern": "impression[:\\- ]"} 149 | {"section_title": "observation_and_plan", "pattern": "impresion:"} 150 | {"section_title": "observation_and_plan", "pattern": "impression/plan:"} 151 | {"section_title": "observation_and_plan", "pattern": "impression/recommendations:"} 152 | {"section_title": "observation_and_plan", "pattern": "initial impression:"} 153 | {"section_title": "observation_and_plan", "pattern": "interpretation:"} 154 | {"section_title": "observation_and_plan", "pattern": "objective:"} 155 | {"section_title": "observation_and_plan", "pattern": "plan and discussion:"} 156 | {"section_title": "observation_and_plan", "pattern": "plan discussion:"} 157 | {"section_title": "observation_and_plan", "pattern": "medical decision making:"} 158 | {"section_title": "observation_and_plan", "pattern": "plan:"} 159 | {"section_title": "medication", "pattern": "ACTIVE INPATIENT AND OUTPATIENT MEDICATIONS:"} 160 | {"section_title": "medication", "pattern": "ACTIVE MEDICATIONS:"} 161 | {"section_title": "medication", "pattern": "ACTIVE MEDICATIONS COMBINED:"} 162 | {"section_title": "medication", "pattern": "ACTIVE MEDICATIONS INCLUDE:"} 163 | {"section_title": "medication", "pattern": "ACTIVE MEDICATIONS LIST:"} 164 | {"section_title": "medication", "pattern": "ACTIVE MEDICATIONS PRESCRIBED AT SAGINAW VAMC:"} 165 | {"section_title": "medication", "pattern": "ACTIVE MEDICATIONS PRESCRIBED AT THE SAGINAW VAMC:"} 166 | {"section_title": "medication", "pattern": "ACTIVE NON-VA MEDICATIONS:"} 167 | {"section_title": "medication", "pattern": "ACTIVE NONVA MEDICATIONS:"} 168 | {"section_title": "medication", "pattern": "ACTIVE NON VA MEDICATIONS:"} 169 | {"section_title": "medication", "pattern": "ACTIVE OPT MEDICATIONS:"} 170 | {"section_title": "medication", "pattern": "ACTIVE OUTPATIENT MEDICATIONS:"} 171 | {"section_title": "medication", "pattern": "ACTIVE OUTPATIENT PRESCRIPTIONS:"} 172 | {"section_title": "medication", "pattern": "ACTIVE VA MEDICATIONS:"} 173 | {"section_title": "medication", "pattern": "ACTIVE MEDICATIONS:"} 174 | {"section_title": "medication", "pattern": "ADMISSION MEDICATIONS:"} 175 | {"section_title": "medication", "pattern": "ALL ACTIVE MEDICATIONS:"} 176 | {"section_title": "medication", "pattern": "Active Inpatient Medications (including supplies):"} 177 | {"section_title": "medication", "pattern": "Active Inpatient Medications drug dosage:"} 178 | {"section_title": "medication", "pattern": "Active Inpatient Medications status:"} 179 | {"section_title": "medication", "pattern": "Active Medications (including supplies):"} 180 | {"section_title": "medication", "pattern": "Active Medications from Remote Data:"} 181 | {"section_title": "medication", "pattern": "Active Outpatient Medications (including supplies):"} 182 | {"section_title": "medication", "pattern": "Active medications:"} 183 | {"section_title": "medication", "pattern": "Active medications? prior to admission:"} 184 | {"section_title": "medication", "pattern": "CORRECT MEDICATIONS INCLUDE:"} 185 | {"section_title": "medication", "pattern": "CURRENT INPATIENT MEDICATIONS:"} 186 | {"section_title": "medication", "pattern": "CURRENT INPATIENT MEDICATIONS INCLUDE:"} 187 | {"section_title": "medication", "pattern": "CURRENT MEDICATIONS:"} 188 | {"section_title": "medication", "pattern": "CURRENT MEDICATIONS/RECONCILIATION:"} 189 | {"section_title": "medication", "pattern": "CURRENT MEDICATIONS LIST:"} 190 | {"section_title": "medication", "pattern": "DISCHARGE MEDICATIONS:"} 191 | {"section_title": "medication", "pattern": "DRUGS:"} 192 | {"section_title": "medication", "pattern": "HEALTH SUPPLEMENTS:"} 193 | {"section_title": "medication", "pattern": "HISTORY/MEDICATIONS:"} 194 | {"section_title": "medication", "pattern": "HISTORY OF MEDICATION TREATMENTS:"} 195 | {"section_title": "medication", "pattern": "HISTORY OF MEDICATION USE:"} 196 | {"section_title": "medication", "pattern": "Home medications:"} 197 | {"section_title": "medication", "pattern": "INACTIVE OUTPATIENT MEDICATIONS:"} 198 | {"section_title": "medication", "pattern": "INHOSPITAL MEDICATIONS:"} 199 | {"section_title": "medication", "pattern": "INPATIENT MEDICATIONS:"} 200 | {"section_title": "medication", "pattern": "INPATIENT MEDICATION RECONCILIATION:"} 201 | {"section_title": "medication", "pattern": "INPT MEDICATIONS:"} 202 | {"section_title": "medication", "pattern": "Inpatient medications?:"} 203 | {"section_title": "medication", "pattern": "Inpatient medications? =:"} 204 | {"section_title": "medication", "pattern": "MEDICATION(S) REVIEW:"} 205 | {"section_title": "medication", "pattern": "MEDICATIONS:"} 206 | {"section_title": "medication", "pattern": "MEDICATIONS AT ADMISSION:"} 207 | {"section_title": "medication", "pattern": "MEDICATIONS AT DISCHARGE:"} 208 | {"section_title": "medication", "pattern": "MEDICATIONS DURING ADMISSION:"} 209 | {"section_title": "medication", "pattern": "MEDICATIONS GIVEN TODAY:"} 210 | {"section_title": "medication", "pattern": "MEDICATIONS ON ADMISSION:"} 211 | {"section_title": "medication", "pattern": "MEDICATIONS ON DISCHARGE:"} 212 | {"section_title": "medication", "pattern": "MEDICATIONS PRIOR TO ADMISSION:"} 213 | {"section_title": "medication", "pattern": "MEDICATION ADMISSION:"} 214 | {"section_title": "medication", "pattern": "MEDICATION AT ADMISSION:"} 215 | {"section_title": "medication", "pattern": "MEDICATION AT DISCHARGE:"} 216 | {"section_title": "medication", "pattern": "MEDICATION DURING ADMISSION:"} 217 | {"section_title": "medication", "pattern": "MEDICATION HISTORY:"} 218 | {"section_title": "medication", "pattern": "MEDICATION MANAGEMENT AT DISCHARGE:"} 219 | {"section_title": "medication", "pattern": "MEDICATION ON ADMISSION:"} 220 | {"section_title": "medication", "pattern": "MEDICATION PRIOR ADMISSION:"} 221 | {"section_title": "medication", "pattern": "MEDICATION PRIOR TO ADMISSION:"} 222 | {"section_title": "medication", "pattern": "MEDICATION RECONCILIATION:"} 223 | {"section_title": "medication", "pattern": "MEDICATION RECONCILIATION REVIEW:"} 224 | {"section_title": "medication", "pattern": "MEDICATION RECONCILIATION SUMMARY:"} 225 | {"section_title": "medication", "pattern": "MEDICATION RECONCILLIATION:"} 226 | {"section_title": "medication", "pattern": "MEDICATION REVIEW for MEDICATION RECONCILIATION:"} 227 | {"section_title": "medication", "pattern": "MEDICINES AT PHARMACY:"} 228 | {"section_title": "medication", "pattern": "MED RECON:"} 229 | {"section_title": "medication", "pattern": "MED RECONCILIATION:"} 230 | {"section_title": "medication", "pattern": "MED RECONCILIATION OUTPT:"} 231 | {"section_title": "medication", "pattern": "MISUSE OF MEDICATIONS:"} 232 | {"section_title": "medication", "pattern": "M E D I C A T I O N S:"} 233 | {"section_title": "medication", "pattern": "NON-VA MEDICATIONS:"} 234 | {"section_title": "medication", "pattern": "NON-VA PRESCRIBED:"} 235 | {"section_title": "medication", "pattern": "NON-VA PRESCRIPTIONS:"} 236 | {"section_title": "medication", "pattern": "NON-VA PRESCRIPTION MEDICATIONS:"} 237 | {"section_title": "medication", "pattern": "NON-VA SUPPLIED MEDICATIONS:"} 238 | {"section_title": "medication", "pattern": "NONVA MEDICATIONS:"} 239 | {"section_title": "medication", "pattern": "NONVA MEDICATIONS LIST:"} 240 | {"section_title": "medication", "pattern": "NON VA:"} 241 | {"section_title": "medication", "pattern": "NON VA MEDICATIONS:"} 242 | {"section_title": "medication", "pattern": "NON VA PRESCRIBED:"} 243 | {"section_title": "medication", "pattern": "NON VA PRESCRIPTIONS:"} 244 | {"section_title": "medication", "pattern": "NON VA PRESCRIPTION MEDICATIONS:"} 245 | {"section_title": "medication", "pattern": "NON VA SUPPLIED MEDICATIONS:"} 246 | {"section_title": "medication", "pattern": "Non-VA medications:"} 247 | {"section_title": "medication", "pattern": "OUTPATIENT MEDICATIONS:"} 248 | {"section_title": "medication", "pattern": "OUTPATIENT MEDICATION REVIEW:"} 249 | {"section_title": "medication", "pattern": "OUTPT. MEDICATION RECONCILIATION:"} 250 | {"section_title": "medication", "pattern": "OUTPT MEDICATIONS:"} 251 | {"section_title": "medication", "pattern": "Outpatient medications:"} 252 | {"section_title": "medication", "pattern": "Outpatient medications status:"} 253 | {"section_title": "medication", "pattern": "Outpatient meds DRUG List:"} 254 | {"section_title": "medication", "pattern": "PENDING INPATIENT MEDICATIONS:"} 255 | {"section_title": "medication", "pattern": "PRE-ADMISSION MEDICATIONS:"} 256 | {"section_title": "medication", "pattern": "PRE-VISIT MED RECONCILIATION:"} 257 | {"section_title": "medication", "pattern": "PREADMISSION MEDICATION:"} 258 | {"section_title": "medication", "pattern": "PRESENT MEDICATIONS:"} 259 | {"section_title": "medication", "pattern": "PROVIDER MED RECONCILIATION:"} 260 | {"section_title": "medication", "pattern": "PTA Meds:"} 261 | {"section_title": "medication", "pattern": "RECONCILED MEDICATION LIST:"} 262 | {"section_title": "medication", "pattern": "RECONCILIATION:"} 263 | {"section_title": "medication", "pattern": "RECONCILIATION OF MEDICATIONS COMPLETED:"} 264 | {"section_title": "medication", "pattern": "SIGNIFICANT MEDICATIONS:"} 265 | {"section_title": "medication", "pattern": "SUBSTANCE USE/MISUSE OF MEDICATIONS:"} 266 | {"section_title": "medication", "pattern": "Status Active:"} 267 | {"section_title": "medication", "pattern": "VA MEDICATIONS:"} 268 | {"section_title": "medication", "pattern": "Active Outpatient Medications:"} 269 | {"section_title": "medication", "pattern": "meds:"} 270 | {"section_title": "medication", "pattern": "MEDICATIONS:"} 271 | {"section_title": "medication", "pattern": "summary of medications"} 272 | {"section_title": "medication", "pattern": "MEDICATIONS (as listed in Vista):"} 273 | {"section_title": "medication", "pattern": "OTC OR NON-VA PRESCRIPTION MEDICATIONS:"} 274 | {"section_title": "medication", "pattern": "Active Outpatient Medications (including Supplies):"} 275 | {"section_title": "allergy", "pattern": "A L L E R G I E S:"} 276 | {"section_title": "allergy", "pattern": "ADDITIONAL ADRS AND/OR ALLERGIES:"} 277 | {"section_title": "allergy", "pattern": "ADR:"} 278 | {"section_title": "allergy", "pattern": "ADVERSE DRUG REACTIONS:"} 279 | {"section_title": "allergy", "pattern": "ADVERSE EVENTS:"} 280 | {"section_title": "allergy", "pattern": "ADVERSE REACTION:"} 281 | {"section_title": "allergy", "pattern": "ADVERSE REACTIONS:"} 282 | {"section_title": "allergy", "pattern": "ALLERGIC DISORDER HISTORY:"} 283 | {"section_title": "allergy", "pattern": "ALLERGIC REACTIONS:"} 284 | {"section_title": "allergy", "pattern": "ALLERGIC:"} 285 | {"section_title": "allergy", "pattern": "ALLERGIES AND ADVERSE REACTIONS:"} 286 | {"section_title": "allergy", "pattern": "ALLERGIES AND SENSITIVITIES:"} 287 | {"section_title": "allergy", "pattern": "ALLERGIES FAMILY HISTORY:"} 288 | {"section_title": "allergy", "pattern": "ALLERGIES REVIEWED:"} 289 | {"section_title": "allergy", "pattern": "ALLERGIES TO MEDICATIONS:"} 290 | {"section_title": "allergy", "pattern": "ALLERGIES/ADVERSE REACTIONS:"} 291 | {"section_title": "allergy", "pattern": "ALLERGIES/REACTIONS:"} 292 | {"section_title": "allergy", "pattern": "ALLERGIES:"} 293 | {"section_title": "allergy", "pattern": "ALLERGY ADVERSE REACTION:"} 294 | {"section_title": "allergy", "pattern": "ALLERGY ENVIRONMENTAL ALLERGEN:"} 295 | {"section_title": "allergy", "pattern": "ALLERGY FAMILY HISTORY:"} 296 | {"section_title": "allergy", "pattern": "ALLERGY INFORMATION:"} 297 | {"section_title": "allergy", "pattern": "ALLERGY REVIEW:"} 298 | {"section_title": "allergy", "pattern": "ALLERGY SCREENING:"} 299 | {"section_title": "allergy", "pattern": "ALLERGY SYMPTOM:"} 300 | {"section_title": "allergy", "pattern": "ALLERGY SYMPTOMS:"} 301 | {"section_title": "allergy", "pattern": "ALLERGY TO ENVIRONMENTAL ALLERGEN:"} 302 | {"section_title": "allergy", "pattern": "ALLERGY TO ENVIRONMENTAL ALLERGENS:"} 303 | {"section_title": "allergy", "pattern": "ALLERGY/ADVERSE DRUG REACTION HISTORY:"} 304 | {"section_title": "allergy", "pattern": "ALLERGY/ADVERSE DRUG REACTION INFORMATION:"} 305 | {"section_title": "allergy", "pattern": "ALLERGY/ADVERSE DRUG REACTION:"} 306 | {"section_title": "allergy", "pattern": "ALLERGY:"} 307 | {"section_title": "allergy", "pattern": "CONCOMITANT MEDICATIONS:"} 308 | {"section_title": "allergy", "pattern": "CURRENT ALLERGIES:"} 309 | {"section_title": "allergy", "pattern": "DRUG ALLERGIC REACTIONS:"} 310 | {"section_title": "allergy", "pattern": "DRUG ALLERGIES:"} 311 | {"section_title": "allergy", "pattern": "DRUG SENSITIVITIES:"} 312 | {"section_title": "allergy", "pattern": "FOOD & DRUG REACTIONS INCLUDING ALLERGIES AS ENTERED IN CPRS:"} 313 | {"section_title": "allergy", "pattern": "FOOD ALLERGIES:"} 314 | {"section_title": "allergy", "pattern": "HISTORY ALLERGY:"} 315 | {"section_title": "allergy", "pattern": "HISTORY OF ALLERGIES:"} 316 | {"section_title": "allergy", "pattern": "KNOWN ALLERGIES:"} 317 | {"section_title": "allergy", "pattern": "LATEX ALLERGY:"} 318 | {"section_title": "allergy", "pattern": "MEDICATIONS ALLERGIES:"} 319 | {"section_title": "allergy", "pattern": "NEW ALLERGIES:"} 320 | {"section_title": "allergy", "pattern": "NEWLY IDENTIFIED ALLERGIES:"} 321 | {"section_title": "allergy", "pattern": "OTHER ALLERGIES:"} 322 | {"section_title": "allergy", "pattern": "PREVIOUSLY DOCUMENTED ALLERGIES:"} 323 | {"section_title": "allergy", "pattern": "SEASONAL ALLERGIES:"} 324 | {"section_title": "allergy", "pattern": "SEASONAL ALLERGY:"} 325 | {"section_title": "allergy", "pattern": "SENSITIVITIES:"} 326 | {"section_title": "allergy", "pattern": "allergies:"} 327 | {"section_title": "allergy", "pattern": "allergy:"} 328 | {"section_title": "allergy", "pattern": "allergies/adr:"} 329 | {"section_title": "allergy", "pattern": "allergies:"} 330 | {"section_title": "allergy", "pattern": "allergy"} 331 | {"section_title": "allergy", "pattern": "ALLERGIES AS DISPLAYED IN VISTA:"} 332 | {"section_title": "allergy", "pattern": "Allergy/Other Non-VA or VA Meds:"} 333 | {"section_title": "allergy", "pattern": "ALLERGY ASSESSMENT:"} 334 | {"section_title": "chief_complaint", "pattern": "chief_complaint:"} 335 | {"section_title": "physical_exam", "pattern": "Physical Exam:"} 336 | {"section_title": "physical_exam", "pattern": "Review of systems:"} 337 | {"section_title": "physical_exam", "pattern": "PHYSICAL EXAMINATION"} 338 | {"section_title": "physical_exam", "pattern": "PE:"} 339 | {"section_title": "physical_exam", "pattern": "exam:"} 340 | {"section_title": "ed_course", "pattern": "ED COURSE:"} 341 | {"section_title": "ed_course", "pattern": "Er COURSE:"} 342 | {"section_title": "ed_course", "pattern": "Emergency Department Course"} 343 | {"section_title": "labs_and_studies", "pattern": "findings :"} 344 | {"section_title": "labs_and_studies", "pattern": "LABORATORY DATA:"} 345 | {"section_title": "labs_and_studies", "pattern": "operation and findings:"} 346 | {"section_title": "labs_and_studies", "pattern": "operative findings:"} 347 | {"section_title": "labs_and_studies", "pattern": "pathologic staging:"} 348 | {"section_title": "labs_and_studies", "pattern": "pathology report:"} 349 | {"section_title": "labs_and_studies", "pattern": "performing lab:"} 350 | {"section_title": "labs_and_studies", "pattern": "performing laboratory:"} 351 | {"section_title": "labs_and_studies", "pattern": "reporting lab :"} 352 | {"section_title": "labs_and_studies", "pattern": "objective:"} 353 | {"section_title": "labs_and_studies", "pattern": "s/o:"} 354 | {"section_title": "labs_and_studies", "pattern": "indication:"} 355 | {"section_title": "labs_and_studies", "pattern": "clinical indication:"} 356 | {"section_title": "labs_and_studies", "pattern": "indication:"} 357 | {"section_title": "labs_and_studies", "pattern": "indications:"} 358 | {"section_title": "labs_and_studies", "pattern": "micro:"} 359 | {"section_title": "labs_and_studies", "pattern": "micro exam "} 360 | {"section_title": "labs_and_studies", "pattern": "labs:"} 361 | {"section_title": "present_illness", "pattern": "hpi/interval history:"} 362 | {"section_title": "present_illness", "pattern": "hpi interval history:"} 363 | {"section_title": "present_illness", "pattern": "patient hpi:"} 364 | {"section_title": "present_illness", "pattern": "present illness:"} 365 | {"section_title": "present_illness", "pattern": "history_present_illness:"} 366 | {"section_title": "present_illness", "pattern": "history of the present illness:"} 367 | {"section_title": "present_illness", "pattern": "history of present illness:"} 368 | {"section_title": "present_illness", "pattern": "history present illness:"} 369 | {"section_title": "present_illness", "pattern": "summary of present illness:"} 370 | {"section_title": "present_illness", "pattern": "summary present illness"} 371 | {"section_title": "other", "pattern": "A signed copy of this report:"} 372 | {"section_title": "other", "pattern": "modified report:"} 373 | {"section_title": "other", "pattern": "note:"} 374 | {"section_title": "other", "pattern": "postoperative diagnosis:"} 375 | {"section_title": "other", "pattern": "preoperative diagnosis:"} 376 | {"section_title": "other", "pattern": "procedure:"} 377 | {"section_title": "other", "pattern": "rectal mass:"} 378 | {"section_title": "other", "pattern": "regional lymph nodes:"} 379 | {"section_title": "other", "pattern": "result:"} 380 | {"section_title": "other", "pattern": "smw:"} 381 | {"section_title": "other", "pattern": "sp:"} 382 | {"section_title": "other", "pattern": "submitted:"} 383 | {"section_title": "other", "pattern": "summary of section:"} 384 | {"section_title": "other", "pattern": "supplementary report:"} 385 | {"section_title": "other", "pattern": "supplementary report:"} 386 | {"section_title": "other", "pattern": "synoptic report for colon rectum:"} 387 | {"section_title": "other", "pattern": "test performed at:"} 388 | {"section_title": "other", "pattern": "tumor synopsis:"} 389 | {"section_title": "other", "pattern": "Alcohol Screen (AUDIT-C):"} 390 | {"section_title": "other", "pattern": "/es/:"} 391 | {"section_title": "other", "pattern": "medications held or discontinued upon admission:"} 392 | {"section_title": "other", "pattern": "changes/additions:"} 393 | {"section_title": "other", "pattern": "possible risks or complications include"} 394 | {"section_title": "imaging", "pattern": "MRI:"} 395 | {"section_title": "patient_instructions", "pattern": "Discharge Instructions:"} 396 | {"section_title": "patient_instructions", "pattern": "Followup Instructions:"} 397 | {"section_title": "signature", "pattern": "Signed electronically by:"} 398 | {"section_title": "education", "pattern": "Patient Education:"} 399 | {"section_title": "education", "pattern": "Education:"} -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | # read the contents of the README file 4 | from os import path 5 | 6 | import warnings 7 | warnings.simplefilter('once', DeprecationWarning) 8 | warnings.warn("cycontext is now *deprecated*. Please use medspacy.context instead: `pip install medspacy`", RuntimeWarning) 9 | 10 | this_directory = path.abspath(path.dirname(__file__)) 11 | with open(path.join(this_directory, "README.md"), encoding="utf-8") as f: 12 | long_description = f.read() 13 | 14 | def get_version(): 15 | """Load the version from version.py, without importing it. 16 | This function assumes that the last line in the file contains a variable defining the 17 | version string with single quotes. 18 | """ 19 | try: 20 | with open('clinical_sectionizer/_version.py', 'r') as f: 21 | return f.read().split('\n')[0].split('=')[-1].replace('\'', '').strip() 22 | except IOError: 23 | raise IOError 24 | 25 | setup( 26 | name="clinical_sectionizer", 27 | version=get_version(), 28 | description="Document section detector using spaCy for clinical NLP", 29 | author="medSpaCy", 30 | author_email="medspacy.dev@gmail.com", 31 | packages=["clinical_sectionizer"], 32 | install_requires=[ 33 | "spacy>=2.3.0,<3.0.0", 34 | ], 35 | long_description=long_description, 36 | long_description_content_type="text/markdown", 37 | package_data={"clinical_sectionizer": ["../resources/*"]}, 38 | ) -------------------------------------------------------------------------------- /tests/test_sectionizer.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | import warnings 3 | 4 | from clinical_sectionizer import Sectionizer 5 | 6 | nlp = spacy.load("en_core_web_sm") 7 | 8 | 9 | class TestSectionizer: 10 | def test_initiate(self): 11 | assert Sectionizer(nlp) 12 | 13 | def test_load_default_rules(self): 14 | sectionizer = Sectionizer(nlp, patterns="default") 15 | assert sectionizer.patterns 16 | 17 | def test_load_no_rules(self): 18 | sectionizer = Sectionizer(nlp, patterns=None) 19 | assert sectionizer.patterns == [] 20 | 21 | def test_add(self): 22 | sectionizer = Sectionizer(nlp, patterns=None) 23 | sectionizer.add( 24 | [{"section_title": "section", "pattern": "my pattern"}] 25 | ) 26 | assert sectionizer.patterns 27 | 28 | def test_num_sections(self): 29 | sectionizer = Sectionizer(nlp, patterns=None) 30 | sectionizer.add( 31 | [ 32 | { 33 | "section_title": "past_medical_history", 34 | "pattern": "Past Medical History:", 35 | } 36 | ] 37 | ) 38 | doc = nlp("Past Medical History: PE") 39 | sectionizer(doc) 40 | assert len(doc._.sections) == 1 41 | # Now reprocess and make sure it resets 42 | doc = nlp("Past Medical History: PE") 43 | sectionizer(doc) 44 | assert len(doc._.sections) == 1 45 | 46 | def test_string_match(self): 47 | sectionizer = Sectionizer(nlp, patterns=None) 48 | sectionizer.add( 49 | [ 50 | { 51 | "section_title": "past_medical_history", 52 | "pattern": "Past Medical History:", 53 | } 54 | ] 55 | ) 56 | doc = nlp("Past Medical History: PE") 57 | sectionizer(doc) 58 | (section_title, header, parent, section) = doc._.sections[0] 59 | assert section_title == "past_medical_history" 60 | assert header.text == "Past Medical History:" 61 | assert section.text == "Past Medical History: PE" 62 | 63 | def test_list_pattern_match(self): 64 | sectionizer = Sectionizer(nlp, patterns=None) 65 | sectionizer.add( 66 | [ 67 | { 68 | "section_title": "past_medical_history", 69 | "pattern": [ 70 | {"LOWER": "past"}, 71 | {"LOWER": "medical"}, 72 | {"LOWER": "history"}, 73 | {"LOWER": ":"}, 74 | ], 75 | } 76 | ] 77 | ) 78 | doc = nlp("Past Medical History: PE") 79 | sectionizer(doc) 80 | (section_title, header, parent, section) = doc._.sections[0] 81 | assert section_title == "past_medical_history" 82 | assert header.text == "Past Medical History:" 83 | assert section.text == "Past Medical History: PE" 84 | 85 | def test_document_starts_no_header(self): 86 | sectionizer = Sectionizer(nlp, patterns=None) 87 | sectionizer.add( 88 | [ 89 | { 90 | "section_title": "past_medical_history", 91 | "pattern": "Past Medical History:", 92 | } 93 | ] 94 | ) 95 | doc = nlp("This is separate. Past Medical History: PE") 96 | sectionizer(doc) 97 | assert len(doc._.sections) == 2 98 | (section_title, header, parent, section_span) = doc._.sections[0] 99 | assert section_title is None 100 | assert header is None 101 | assert section_span.text == "This is separate." 102 | 103 | (section_title, header, parent, section_span) = doc._.sections[1] 104 | assert section_title == "past_medical_history" 105 | assert header.text == "Past Medical History:" 106 | assert section_span.text == "Past Medical History: PE" 107 | 108 | def test_max_scope_none(self): 109 | sectionizer = Sectionizer(nlp, patterns=None, max_scope=None) 110 | sectionizer.add( 111 | [ 112 | { 113 | "section_title": "past_medical_history", 114 | "pattern": "Past Medical History:", 115 | } 116 | ] 117 | ) 118 | doc = nlp("Past Medical History: This is the sentence.") 119 | sectionizer(doc) 120 | title, header, parent, section = doc._.sections[0] 121 | assert ( 122 | section[len(header) - 1 + 2]._.section_title 123 | == "past_medical_history" 124 | ) 125 | assert ( 126 | section[len(header) - 1 + 3]._.section_title 127 | == "past_medical_history" 128 | ) 129 | 130 | def test_max_scope(self): 131 | sectionizer = Sectionizer(nlp, patterns=None, max_scope=2) 132 | sectionizer.add( 133 | [ 134 | { 135 | "section_title": "past_medical_history", 136 | "pattern": "Past Medical History:", 137 | } 138 | ] 139 | ) 140 | doc = nlp("Past Medical History: This is the sentence.") 141 | sectionizer(doc) 142 | title, header, parent, section = doc._.sections[0] 143 | assert ( 144 | section[len(header) - 1 + 2]._.section_title 145 | == "past_medical_history" 146 | ) 147 | assert section[len(header) - 1 + 3]._.section_title is None 148 | 149 | def test_start_line(self): 150 | sectionizer = Sectionizer(nlp, patterns=None, require_start_line=True) 151 | sectionizer.add( 152 | [ 153 | { 154 | "section_title": "past_medical_history", 155 | "pattern": "Past Medical History:", 156 | } 157 | ] 158 | ) 159 | text = "\n\n Past Medical History: The patient has a Past Medical History:" 160 | doc = nlp(text) 161 | sectionizer(doc) 162 | assert len(doc._.sections) == 2 163 | 164 | def test_end_line(self): 165 | sectionizer = Sectionizer(nlp, patterns=None, require_end_line=True) 166 | sectionizer.add( 167 | [ 168 | { 169 | "section_title": "past_medical_history", 170 | "pattern": "Past Medical History:", 171 | } 172 | ] 173 | ) 174 | text = "\n\n Past Medical History:\n The patient has a Past Medical History: this" 175 | doc = nlp(text) 176 | sectionizer(doc) 177 | assert len(doc._.sections) == 2 178 | 179 | def test_parent_section(self): 180 | sectionizer = Sectionizer(nlp, patterns=None) 181 | sectionizer.add( 182 | [ 183 | { 184 | "section_title": "past_medical_history", 185 | "pattern": "Past Medical History:", 186 | }, 187 | { 188 | "section_title": "explanation", 189 | "pattern": "Explanation:", 190 | "parents": ["past_medical_history"], 191 | }, 192 | ] 193 | ) 194 | text = "Past Medical History: some other text Explanation: The patient has one" 195 | doc = nlp(text) 196 | sectionizer(doc) 197 | assert len(doc._.sections) == 2 198 | _, _, pmh_parent, _ = doc._.sections[0] 199 | _, _, explanation_parent, _ = doc._.sections[1] 200 | assert pmh_parent is None 201 | assert explanation_parent == "past_medical_history" 202 | 203 | def test_parent_section_multiple_candidates(self): 204 | sectionizer = Sectionizer(nlp, patterns=None) 205 | sectionizer.add( 206 | [ 207 | { 208 | "section_title": "past_medical_history", 209 | "pattern": "Past Medical History:", 210 | }, 211 | { 212 | "section_title": "explanation", 213 | "pattern": "Explanation:", 214 | "parents": ["past_medical_history", "allergies"], 215 | }, 216 | ] 217 | ) 218 | text = "Past Medical History: some other text. Explanation: The patient has one" 219 | doc = nlp(text) 220 | sectionizer(doc) 221 | assert len(doc._.sections) == 2 222 | _, _, pmh_parent, _ = doc._.sections[0] 223 | _, _, explanation_parent, _ = doc._.sections[1] 224 | assert pmh_parent is None 225 | assert explanation_parent == "past_medical_history" 226 | 227 | def test_parent_section_candidate_after_section(self): 228 | sectionizer = Sectionizer(nlp, patterns=None) 229 | sectionizer.add( 230 | [ 231 | { 232 | "section_title": "past_medical_history", 233 | "pattern": "Past Medical History:", 234 | }, 235 | {"section_title": "allergies", "pattern": "Allergies:"}, 236 | { 237 | "section_title": "explanation", 238 | "pattern": "Explanation:", 239 | "parents": ["past_medical_history", "allergies"], 240 | }, 241 | ] 242 | ) 243 | text = "Past Medical History: some other text. Explanation: The patient has one. Allergies: peanuts" 244 | doc = nlp(text) 245 | sectionizer(doc) 246 | assert len(doc._.sections) == 3 247 | _, _, pmh_parent, _ = doc._.sections[0] 248 | _, _, explanation_parent, _ = doc._.sections[1] 249 | _, _, allergies_parent, _ = doc._.sections[2] 250 | assert pmh_parent is None 251 | assert explanation_parent == "past_medical_history" 252 | assert allergies_parent is None 253 | 254 | def test_parent_section_duplicate_sections_different_parents(self): 255 | sectionizer = Sectionizer(nlp, patterns=None) 256 | sectionizer.add( 257 | [ 258 | { 259 | "section_title": "past_medical_history", 260 | "pattern": "Past Medical History:", 261 | }, 262 | {"section_title": "allergies", "pattern": "Allergies:"}, 263 | { 264 | "section_title": "explanation", 265 | "pattern": "Explanation:", 266 | "parents": ["past_medical_history", "allergies"], 267 | }, 268 | ] 269 | ) 270 | text = "Past Medical History: some other text. Explanation: The patient has one. Allergies: peanuts Explanation: pt cannot eat peanuts" 271 | doc = nlp(text) 272 | sectionizer(doc) 273 | assert len(doc._.sections) == 4 274 | _, _, pmh_parent, _ = doc._.sections[0] 275 | _, _, explanation_parent, _ = doc._.sections[1] 276 | _, _, allergies_parent, _ = doc._.sections[2] 277 | _, _, explanation_parent2, _ = doc._.sections[3] 278 | assert pmh_parent is None 279 | assert explanation_parent == "past_medical_history" 280 | assert allergies_parent is None 281 | assert explanation_parent2 == "allergies" 282 | 283 | def test_parent_section_no_valid_parent(self): 284 | sectionizer = Sectionizer(nlp, patterns=None) 285 | sectionizer.add( 286 | [ 287 | { 288 | "section_title": "past_medical_history", 289 | "pattern": "Past Medical History:", 290 | }, 291 | {"section_title": "allergies", "pattern": "Allergies:"}, 292 | { 293 | "section_title": "explanation", 294 | "pattern": "Explanation:", 295 | "parents": ["past_medical_history"], 296 | }, 297 | ] 298 | ) 299 | text = "Past Medical History: some other text. Allergies: peanuts Explanation: pt cannot eat peanuts" 300 | doc = nlp(text) 301 | sectionizer(doc) 302 | assert len(doc._.sections) == 3 303 | _, _, pmh_parent, _ = doc._.sections[0] 304 | _, _, allergies_parent, _ = doc._.sections[1] 305 | _, _, explanation_parent2, _ = doc._.sections[2] 306 | assert pmh_parent is None 307 | assert allergies_parent is None 308 | assert explanation_parent2 is None 309 | 310 | def test_parent_section_parent_required(self): 311 | sectionizer = Sectionizer(nlp, patterns=None) 312 | sectionizer.add( 313 | [ 314 | { 315 | "section_title": "past_medical_history", 316 | "pattern": "Past Medical History:", 317 | }, 318 | { 319 | "section_title": "explanation", 320 | "pattern": "Explanation:", 321 | "parents": ["past_medical_history"], 322 | "parent_required": True, 323 | }, 324 | ] 325 | ) 326 | text = "other text Explanation: The patient has one" 327 | doc = nlp(text) 328 | sectionizer(doc) 329 | assert len(doc._.sections) == 1 330 | name, text, parent, section = doc._.sections[0] 331 | assert name is None 332 | assert parent is None 333 | 334 | def test_parent_section_chain(self): 335 | sectionizer = Sectionizer(nlp, patterns=None) 336 | sectionizer.add( 337 | [ 338 | {"section_title": "s1", "pattern": "section 1:"}, 339 | { 340 | "section_title": "s2", 341 | "pattern": "section 2:", 342 | "parents": ["s1"], 343 | }, 344 | { 345 | "section_title": "s3", 346 | "pattern": "section 3:", 347 | "parents": ["s2"], 348 | }, 349 | ] 350 | ) 351 | text = "section 1: abc section 2: abc section 3: abc" 352 | doc = nlp(text) 353 | sectionizer(doc) 354 | assert len(doc._.sections) == 3 355 | _, _, s1, _ = doc._.sections[0] 356 | _, _, s2, _ = doc._.sections[1] 357 | _, _, s3, _ = doc._.sections[2] 358 | assert s1 is None 359 | assert s2 == "s1" 360 | assert s3 == "s2" 361 | 362 | def test_parent_section_chain_backtracking(self): 363 | sectionizer = Sectionizer(nlp, patterns=None) 364 | sectionizer.add( 365 | [ 366 | {"section_title": "s1", "pattern": "section 1:"}, 367 | { 368 | "section_title": "s2", 369 | "pattern": "section 2:", 370 | "parents": ["s1"], 371 | }, 372 | { 373 | "section_title": "s3", 374 | "pattern": "section 3:", 375 | "parents": ["s2"], 376 | }, 377 | { 378 | "section_title": "s4", 379 | "pattern": "section 4:", 380 | "parents": ["s1"], 381 | }, 382 | ] 383 | ) 384 | text = "section 1: abc section 2: abc section 3: abc section 4: abc" 385 | doc = nlp(text) 386 | sectionizer(doc) 387 | assert len(doc._.sections) == 4 388 | _, _, s1, _ = doc._.sections[0] 389 | _, _, s2, _ = doc._.sections[1] 390 | _, _, s3, _ = doc._.sections[2] 391 | _, _, s4, _ = doc._.sections[3] 392 | assert s1 is None 393 | assert s2 == "s1" 394 | assert s3 == "s2" 395 | assert s4 == "s1" 396 | 397 | def test_parent_section_chain_backtracking_interrupted(self): 398 | sectionizer = Sectionizer(nlp, patterns=None) 399 | sectionizer.add( 400 | [ 401 | {"section_title": "s1", "pattern": "section 1:"}, 402 | { 403 | "section_title": "s2", 404 | "pattern": "section 2:", 405 | "parents": ["s1"], 406 | }, 407 | { 408 | "section_title": "s3", 409 | "pattern": "section 3:", 410 | "parents": ["s2"], 411 | }, 412 | {"section_title": "break", "pattern": "section break:"}, 413 | { 414 | "section_title": "s4", 415 | "pattern": "section 4:", 416 | "parents": ["s1"], 417 | }, 418 | ] 419 | ) 420 | text = "section 1: abc section 2: abc section 3: abc section break: abc section 4: abc" 421 | doc = nlp(text) 422 | sectionizer(doc) 423 | assert len(doc._.sections) == 5 424 | _, _, s1, _ = doc._.sections[0] 425 | _, _, s2, _ = doc._.sections[1] 426 | _, _, s3, _ = doc._.sections[2] 427 | _, _, s4, _ = doc._.sections[4] 428 | assert s1 is None 429 | assert s2 == "s1" 430 | assert s3 == "s2" 431 | assert s4 is None 432 | 433 | def test_duplicate_parent_definitions(self): 434 | with warnings.catch_warnings(record=True) as w: 435 | sectionizer = Sectionizer(nlp, patterns=None) 436 | sectionizer.add( 437 | [ 438 | {"section_title": "s1", "pattern": "section 1:"}, 439 | { 440 | "section_title": "s2", 441 | "pattern": "section 2:", 442 | "parents": ["s1"], 443 | }, 444 | { 445 | "section_title": "s2", 446 | "pattern": "section 2:", 447 | "parents": ["s3"], 448 | }, 449 | {"section_title": "s3", "pattern": "section 3:"}, 450 | ] 451 | ) 452 | text = ( 453 | "section 1: abc section 2: abc section 3: abc section 2: abc" 454 | ) 455 | doc = nlp(text) 456 | sectionizer(doc) 457 | assert len(doc._.sections) == 4 458 | _, _, s1, _ = doc._.sections[0] 459 | _, _, s2, _ = doc._.sections[1] 460 | _, _, s3, _ = doc._.sections[2] 461 | _, _, s2_2, _ = doc._.sections[3] 462 | assert len(w) == 1 463 | assert issubclass(w[0].category, RuntimeWarning) 464 | assert s1 is None 465 | assert s2 == "s1" 466 | assert s3 is None 467 | assert s2_2 == "s3" 468 | 469 | def test_named_tuple(self): 470 | doc = nlp("Past Medical History: PE") 471 | sectionizer = Sectionizer(nlp, patterns=None) 472 | sectionizer.add( 473 | [ 474 | { 475 | "section_title": "past_medical_history", 476 | "pattern": "Past Medical History:", 477 | } 478 | ] 479 | ) 480 | sectionizer(doc) 481 | section_tup = doc._.sections[0] 482 | from clinical_sectionizer.sectionizer import Section 483 | assert isinstance(section_tup, (tuple, Section)) 484 | assert len(section_tup) == 4 485 | (section_title, header, parent, section) = section_tup 486 | assert section_title is section_tup.section_title 487 | assert header is section_tup.section_header 488 | assert parent is section_tup.section_parent 489 | assert section is section_tup.section_span 490 | 491 | -------------------------------------------------------------------------------- /tests/test_textsectionizer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from clinical_sectionizer import TextSectionizer 4 | 5 | 6 | class TestTextSectionizer: 7 | def test_add(self): 8 | sectionizer = TextSectionizer(patterns=None) 9 | sectionizer.add([{"section_title": "section", "pattern": "my pattern"}]) 10 | assert sectionizer.patterns 11 | 12 | def test_add_wo_flag(self): 13 | sectionizer = TextSectionizer(patterns=None) 14 | sectionizer.add( 15 | [{"section_title": "section", "pattern": "my pattern"}], cflags=[] 16 | ) 17 | assert sectionizer.patterns 18 | 19 | def test_string_match(self): 20 | sectionizer = TextSectionizer(patterns=None) 21 | sectionizer.add( 22 | [ 23 | { 24 | "section_title": "past_medical_history", 25 | "pattern": "Past Medical History:", 26 | } 27 | ] 28 | ) 29 | doc = "Past Medical History: PE" 30 | sections = sectionizer(doc) 31 | (section_title, header, section) = sections[0] 32 | assert section_title == "past_medical_history" 33 | assert header == "Past Medical History:" 34 | assert section == "Past Medical History: PE" 35 | 36 | def test_string_match_case_sensitive(self): 37 | sectionizer = TextSectionizer(patterns=None) 38 | sectionizer.add( 39 | [ 40 | { 41 | "section_title": "past_medical_history", 42 | "pattern": "PAST MEDICAL HISTORY:", 43 | } 44 | ], 45 | cflags=[], 46 | ) 47 | doc = "Past Medical History: PE" 48 | sections = sectionizer(doc) 49 | (section_title, header, section) = sections[0] 50 | assert section_title == None 51 | assert header == None 52 | 53 | def test_string_match_case_sensitive2(self): 54 | sectionizer = TextSectionizer(patterns=None) 55 | sectionizer.add( 56 | [ 57 | { 58 | "section_title": "past_medical_history", 59 | "pattern": "PAST MEDICAL HISTORY:", 60 | } 61 | ], 62 | cflags=[], 63 | ) 64 | doc = "PAST MEDICAL HISTORY: PE" 65 | sections = sectionizer(doc) 66 | (section_title, header, section) = sections[0] 67 | assert section_title == "past_medical_history" 68 | assert header == "PAST MEDICAL HISTORY:" 69 | assert section == "PAST MEDICAL HISTORY: PE" 70 | 71 | def test_string_match_case_sensitive3(self): 72 | sectionizer = TextSectionizer(patterns=None) 73 | sectionizer.add( 74 | [ 75 | { 76 | "section_title": "past_medical_history", 77 | "pattern": "PAST MEDICAL HISTORY:", 78 | } 79 | ], 80 | cflags=[], 81 | ) 82 | doc = "PAST MEDICAL HISTORY: PE" 83 | sections = sectionizer(doc) 84 | (section_title, header, section) = sections[0] 85 | assert section_title == "past_medical_history" 86 | assert header == "Past Medical History:" 87 | assert section == "Past Medical History: PE" 88 | --------------------------------------------------------------------------------