├── .gitignore ├── LICENSE.txt ├── README.md ├── __init__.py ├── csvimporter.py ├── databases ├── rxnorm.sh └── umls.sh ├── docs ├── Makefile ├── conf.py ├── index.rst └── py-umls.rst ├── graphable.py ├── loinc.py ├── rxnorm.py ├── rxnorm_download.py ├── rxnorm_graph.py ├── rxnorm_link.py ├── rxnorm_link_run.py ├── rxnorm_link_run.sh ├── rxnorm_tests.py ├── snomed.py ├── snomed_tests.py ├── sqlite.py └── umls.py /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore database files but not the import scripts 2 | databases/*.db 3 | 4 | # virtualenv 5 | env 6 | 7 | # docs 8 | docs/_build 9 | 10 | # system files 11 | .DS_Store 12 | __pycache__ 13 | 14 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2015 Boston Children's Hospital 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | UMLS for Python 2 | =============== 3 | 4 | These are basic tools to interact with UMLS lexica, namely UMLS, SNOMED and RxNorm, using Python 3 scripts. 5 | For each of the three databases there are scripts (2 Bash and 1 Python) that facilitate import of the downloaded data into a local SQLite 3 database. 6 | 7 | > You will need a UMLS license to download UMLS lexica. 8 | 9 | For a simple start, run one of the files (`umls.py`, `snomed.py`, `rxnorm.py`) in your Shell and follow the instructions. 10 | The scripts will prompt you to download and install the databases and, when completed, print a simple example lookup. 11 | 12 | There are also utility scripts that offer help for specific use cases, see below. 13 | 14 | Documentation 15 | ------------- 16 | 17 | An [auto-generated documentation](http://chb.github.io/py-umls/) (via Sphinx) is available but not very exhaustive at the moment. 18 | See below for some quick examples. 19 | 20 | Usage 21 | ----- 22 | 23 | More detailed instructions here: 24 | 25 | - [**RxNorm**](https://github.com/chb/py-umls/wiki/RxNorm) 26 | - [**SNOMED-CT**](https://github.com/chb/py-umls/wiki/SNOMED) 27 | 28 | There are `XYLookup` classes in each of the three files which can be used for database lookups (where `XY` stands for `UMLS`, `SNOMED` or `RxNorm`). 29 | The following example code is appended to the end of the respective scripts and will be executed if you run it in the Shell. 30 | You might want to insert `XY.check_databases()` before this code so you will get an exception if the databases haven't been set up. 31 | 32 | look_umls = UMLSLookup() 33 | code_umls = 'C0002962' 34 | meaning_umls = look_umls.lookup_code_meaning(code_umls) 35 | print('UMLS code "{0}": {1}'.format(code_umls, meaning_umls)) 36 | 37 | look_snomed = SNOMEDLookup() 38 | code_snomed = '215350009' 39 | meaning_snomed = look_snomed.lookup_code_meaning(code_snomed) 40 | print('SNOMED code "{0}": {1}'.format(code_snomed, meaning_snomed)) 41 | 42 | look_rxnorm = RxNormLookup() 43 | code_rxnorm = '328406' 44 | meaning_rxnorm = look_rxnorm.lookup_code_meaning(code_rxnorm, preferred=False) 45 | print('RxNorm code "{0}": {1}'.format(code_rxnorm, meaning_rxnorm)) 46 | 47 | You would typically use this module as a submodule in your own project. 48 | Best add this as a _git submodule_ but that really is up to you. 49 | If you do use this module as a Python module, you can't use the name `py-umls` because it contains a dash, so you must checkout this code to a correctly named directory. 50 | I usually use `umls`. 51 | 52 | License 53 | ------- 54 | 55 | This work is [Apache licensed](LICENSE.txt). 56 | 57 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path 3 | abspath = os.path.abspath(os.path.dirname(__file__)) 4 | if abspath not in sys.path: 5 | sys.path.insert(0, abspath) 6 | -------------------------------------------------------------------------------- /csvimporter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Simple CSV importer. 5 | 6 | import re 7 | import csv 8 | import sqlite3 9 | 10 | 11 | class CSVImporter(object): 12 | """ A simple CSV to SQLite importer class. 13 | 14 | Expects a CSV file with a header row, will create a table reflecting the 15 | header row and import all rows. 16 | """ 17 | _sqlite = None 18 | 19 | def __init__(self, csv_path, tablename='rows'): 20 | self.filepath = csv_path 21 | self.tablename = tablename 22 | 23 | def sqlite_handle(self, dbpath): 24 | if self._sqlite is None: 25 | self._sqlite = sqlite3.connect(dbpath) 26 | return self._sqlite 27 | 28 | def import_to(self, dbpath, csv_format='excel'): 29 | assert self.filepath 30 | assert dbpath 31 | 32 | # SQLite handling 33 | sql_handle = self.sqlite_handle(dbpath) 34 | sql_handle.isolation_level = 'EXCLUSIVE' 35 | sql_cursor = sql_handle.cursor() 36 | create_sql = 'CREATE TABLE {} '.format(self.tablename) 37 | insert_sql = 'INSERT INTO {} '.format(self.tablename) 38 | all_but_alnum = r'\W+' 39 | 40 | # loop rows 41 | with open(self.filepath, 'r') as csv_handle: 42 | reader = csv.reader(csv_handle, quotechar='"', dialect=csv_format) 43 | try: 44 | i = 0 45 | for row in reader: 46 | sql = insert_sql 47 | params = () 48 | 49 | # first row is the header row 50 | if 0 == i: 51 | fields = [] 52 | fields_create = [] 53 | for field in row: 54 | field = re.sub(all_but_alnum, '', field) 55 | fields.append(field) 56 | fields_create.append('{} VARCHAR'.format(field)) 57 | 58 | create_sql += "(\n\t{}\n)".format(",\n\t".join(fields_create)) 59 | sql = create_sql 60 | 61 | insert_sql += '({}) VALUES ({})'.format(', '.join(fields), ', '.join(['?' for i in range(len(fields))])) 62 | 63 | # data rows 64 | else: 65 | params = tuple(row) 66 | 67 | # execute SQL statement 68 | try: 69 | sql_cursor.execute(sql, params) 70 | except Exception as e: 71 | sys.exit(u'SQL failed: %s -- %s' % (e, sql)) 72 | i += 1 73 | 74 | # commit to file 75 | sql_handle.commit() 76 | sql_handle.isolation_level = None 77 | 78 | except csv.Error as e: 79 | sys.exit('CSV error on line %d: %s' % (reader.line_num, e)) 80 | 81 | -------------------------------------------------------------------------------- /databases/rxnorm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # create an RxNORM SQLite database (and a relations triple store). 4 | # 5 | 6 | # our SQLite database does not exist 7 | if [ ! -e rxnorm.db ]; then 8 | if [ ! -d "$1" ]; then 9 | echo "Provide the path to the RxNorm directory as first argument when invoking this script. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html" 10 | exit 1 11 | fi 12 | if [ ! -d "$1/rrf" ]; then 13 | echo "There is no directory named rrf in the directory you provided. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html" 14 | exit 1 15 | fi 16 | if ! hash sqlite3 &>/dev/null; then 17 | echo "It seems 'sqlite3' is not installed, I will need it. Aborting." 18 | exit 1 19 | fi 20 | 21 | # init the database 22 | cat "$1/scripts/mysql/Table_scripts_mysql_rxn.sql" | sqlite3 rxnorm.db 23 | 24 | # convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles) 25 | if [ ! -e "$1/rrf/RXNREL.pipe" ]; then 26 | current=$(pwd) 27 | cd "$1/rrf" 28 | echo "-> Converting RRF files for SQLite" 29 | for f in *.RRF; do 30 | sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe" 31 | done 32 | cd $current 33 | fi 34 | 35 | # import tables 36 | for f in "$1/rrf/"*.pipe; do 37 | table=$(basename ${f%.pipe}) 38 | echo "-> Importing $table" 39 | sqlite3 rxnorm.db ".import '$f' '$table'" 40 | done 41 | 42 | # create an NDC table 43 | echo "-> Creating extra tables" 44 | # sqlite3 rxnorm.db "CREATE TABLE NDC AS SELECT RXCUI, ATV AS NDC FROM RXNSAT WHERE ATN = 'NDC';" # we do it in 2 steps to create the primary index column 45 | sqlite3 rxnorm.db "CREATE TABLE NDC (RXCUI INT, NDC VARCHAR);" 46 | sqlite3 rxnorm.db "INSERT INTO NDC SELECT RXCUI, ATV FROM RXNSAT WHERE ATN = 'NDC';" 47 | 48 | # create drug class tables 49 | sqlite3 rxnorm.db "CREATE TABLE VA_DRUG_CLASS (RXCUI int, RXCUI_ORIGINAL int, VA varchar);" 50 | sqlite3 rxnorm.db "CREATE TABLE FRIENDLY_CLASS_NAMES (VACODE varchar, FRIENDLY varchar);" 51 | sqlite3 rxnorm.db "CREATE INDEX X_FRIENDLY_CLASS_NAMES_VACODE ON FRIENDLY_CLASS_NAMES (VACODE);" 52 | 53 | # create indices 54 | echo "-> Indexing NDC table" 55 | sqlite3 rxnorm.db "CREATE INDEX X_NDC_RXCUI ON NDC (RXCUI);" 56 | sqlite3 rxnorm.db "CREATE INDEX X_NDC_NDC ON NDC (NDC);" 57 | 58 | echo "-> Indexing RXNSAT table" 59 | sqlite3 rxnorm.db "CREATE INDEX RXNSAT_RXCUI ON RXNSAT (RXCUI);" 60 | sqlite3 rxnorm.db "CREATE INDEX RXNSAT_ATN ON RXNSAT (ATN);" 61 | 62 | echo "-> Indexing RXNREL table" 63 | sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RXCUI1 ON RXNREL (RXCUI1);" 64 | sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RXCUI2 ON RXNREL (RXCUI2);" 65 | sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RXAUI2 ON RXNREL (RXAUI2);" 66 | #sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RELA ON RXNREL (RELA);" # do NOT do this! slows down queries dramatically 67 | 68 | echo "-> Indexing RXNCONSO table" 69 | sqlite3 rxnorm.db "CREATE INDEX X_RXNCONSO_RXCUI ON RXNCONSO (RXCUI);" 70 | sqlite3 rxnorm.db "CREATE INDEX X_RXNCONSO_RXAUI ON RXNCONSO (RXAUI);" 71 | 72 | # How to export from SQLite: export NDC to CSV 73 | # .mode csv 74 | # .header on 75 | # .out va-class.csv 76 | # SELECT RXCUI, NDC FROM NDC; 77 | # SELECT DISTINCT ATV FROM RXNSAT WHERE ATN = 'VA_CLASS_NAME' ORDER BY ATV ASC; 78 | fi 79 | 80 | -------------------------------------------------------------------------------- /databases/umls.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # create a UMLS SQLite database. 4 | # 5 | 6 | # our SQLite database does not exist 7 | if [ ! -e umls.db ]; then 8 | if [ ! -d "$1" ]; then 9 | echo "Provide the path to the UMLS install directory, which is named something like \"2014AA\" and contains a \"META\" directory, as first argument when invoking this script." 10 | echo 11 | echo "Downloading and Extracting UMLS Data" 12 | echo "====================================" 13 | echo 14 | echo "Downloading and extracting UMLS data is a painful process." 15 | echo "Begin by downloading most files for the latest version listed on the left side here: http://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html" 16 | echo "Extract 'mmsys.zip' and place every downloaded file, including 'mmsys.zip', into the extracted directory." 17 | echo "Run the respective 'runXX' script inside the mmsys directory; the MetamorphoSys Java GUI will open." 18 | echo "Click \"Install UMLS\", as source directory select the just extracted mmsys directory and your chosen target directory." 19 | echo "Leave the checkboxes alone and click OK." 20 | echo "Now you must generate a configuration and in order to be able to proceed, save the configuration via a command from the menu bar." 21 | echo "Then select \"Begin Subset\", also from the menubar, to start the extraction process." 22 | echo "This should extract all the things and put in in the selected directory, which now contains a META directory with all the files we need to proceed." 23 | echo 24 | echo "Once you have done this, run this script again with the correct path as the first argument." 25 | exit 1 26 | fi 27 | if [ ! -d "$1/META" ]; then 28 | echo "There is no directory named META in the install directory you provided." 29 | echo "Point this script to the directory named something like \"2014AA\"." 30 | exit 1 31 | fi 32 | 33 | # convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles) 34 | if [ ! -e "$1/META/MRDEF.pipe" ]; then 35 | current=$(pwd) 36 | cd "$1/META" 37 | echo "-> Converting RRF files for SQLite" 38 | for f in MRCONSO.RRF MRDEF.RRF MRSTY.RRF; do 39 | sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe" 40 | done 41 | cd $current 42 | fi 43 | 44 | # init the database for MRDEF 45 | # table structure here: http://www.ncbi.nlm.nih.gov/books/NBK9685/ 46 | sqlite3 umls.db "CREATE TABLE MRDEF ( 47 | CUI varchar, 48 | AUI varchar, 49 | ATUI varchar, 50 | SATUI varchar, 51 | SAB varchar, 52 | DEF text, 53 | SUPPRESS varchar, 54 | CVF varchar 55 | )" 56 | 57 | # init the database for MRCONSO 58 | sqlite3 umls.db "CREATE TABLE MRCONSO ( 59 | CUI varchar, 60 | LAT varchar, 61 | TS varchar, 62 | LUI varchar, 63 | STT varchar, 64 | SUI varchar, 65 | ISPREF varchar, 66 | AUI varchar, 67 | SAUI varchar, 68 | SCUI varchar, 69 | SDUI varchar, 70 | SAB varchar, 71 | TTY varchar, 72 | CODE varchar, 73 | STR text, 74 | SRL varchar, 75 | SUPPRESS varchar, 76 | CVF varchar 77 | )" 78 | 79 | # init the database for MRSTY 80 | sqlite3 umls.db "CREATE TABLE MRSTY ( 81 | CUI varchar, 82 | TUI varchar, 83 | STN varchar, 84 | STY text, 85 | ATUI varchar, 86 | CVF varchar 87 | )" 88 | 89 | # import tables 90 | for f in "$1/META/"*.pipe; do 91 | table=$(basename ${f%.pipe}) 92 | echo "-> Importing $table" 93 | sqlite3 umls.db ".import '$f' '$table'" 94 | done 95 | 96 | # create indexes 97 | echo "-> Creating indexes" 98 | sqlite3 umls.db "CREATE INDEX X_CUI_MRDEF ON MRDEF (CUI);" 99 | sqlite3 umls.db "CREATE INDEX X_SAB_MRDEF ON MRDEF (SAB);" 100 | sqlite3 umls.db "CREATE INDEX X_CUI_MRCONSO ON MRCONSO (CUI);" 101 | sqlite3 umls.db "CREATE INDEX X_LAT_MRCONSO ON MRCONSO (LAT);" 102 | sqlite3 umls.db "CREATE INDEX X_TS_MRCONSO ON MRCONSO (TS);" 103 | sqlite3 umls.db "CREATE INDEX X_CUI_MRSTY ON MRSTY (CUI);" 104 | sqlite3 umls.db "CREATE INDEX X_TUI_MRSTY ON MRSTY (TUI);" 105 | 106 | # create faster lookup table 107 | echo "-> Creating fast lookup table" 108 | sqlite3 umls.db "CREATE TABLE descriptions AS SELECT CUI, LAT, SAB, TTY, STR FROM MRCONSO WHERE LAT = 'ENG' AND TS = 'P' AND ISPREF = 'Y'" 109 | sqlite3 umls.db "ALTER TABLE descriptions ADD COLUMN STY TEXT" 110 | sqlite3 umls.db "CREATE INDEX X_CUI_desc ON descriptions (CUI)" 111 | sqlite3 umls.db "UPDATE descriptions SET STY = (SELECT GROUP_CONCAT(MRSTY.TUI, '|') FROM MRSTY WHERE MRSTY.CUI = descriptions.CUI GROUP BY MRSTY.CUI)" 112 | else 113 | echo "=> umls.db already exists" 114 | fi 115 | 116 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/py-umls.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/py-umls.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/py-umls" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/py-umls" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # py-umls documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Apr 18 20:08:31 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | sys.path.insert(0, os.path.abspath('..')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | 'sphinx.ext.autodoc', 33 | 'sphinx.ext.todo', 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # The suffix of source filenames. 40 | source_suffix = '.rst' 41 | 42 | # The encoding of source files. 43 | #source_encoding = 'utf-8-sig' 44 | 45 | # The master toctree document. 46 | master_doc = 'index' 47 | 48 | # General information about the project. 49 | project = u'py-umls' 50 | copyright = u'2014, Pascal Pfiffner' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = '' 58 | # The full version, including alpha/beta/rc tags. 59 | release = '' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | #language = None 64 | 65 | # There are two options for replacing |today|: either, you set today to some 66 | # non-false value, then it is used: 67 | #today = '' 68 | # Else, today_fmt is used as the format for a strftime call. 69 | #today_fmt = '%B %d, %Y' 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | exclude_patterns = ['_build'] 74 | 75 | # The reST default role (used for this markup: `text`) to use for all 76 | # documents. 77 | #default_role = None 78 | 79 | # If true, '()' will be appended to :func: etc. cross-reference text. 80 | #add_function_parentheses = True 81 | 82 | # If true, the current module name will be prepended to all description 83 | # unit titles (such as .. function::). 84 | #add_module_names = True 85 | 86 | # If true, sectionauthor and moduleauthor directives will be shown in the 87 | # output. They are ignored by default. 88 | #show_authors = False 89 | 90 | # The name of the Pygments (syntax highlighting) style to use. 91 | pygments_style = 'sphinx' 92 | 93 | # A list of ignored prefixes for module index sorting. 94 | #modindex_common_prefix = [] 95 | 96 | # If true, keep warnings as "system message" paragraphs in the built documents. 97 | #keep_warnings = False 98 | 99 | 100 | # -- Options for HTML output ---------------------------------------------- 101 | 102 | # The theme to use for HTML and HTML Help pages. See the documentation for 103 | # a list of builtin themes. 104 | html_theme = 'default' 105 | 106 | # Theme options are theme-specific and customize the look and feel of a theme 107 | # further. For a list of options available for each theme, see the 108 | # documentation. 109 | #html_theme_options = {} 110 | 111 | # Add any paths that contain custom themes here, relative to this directory. 112 | #html_theme_path = [] 113 | 114 | # The name for this set of Sphinx documents. If None, it defaults to 115 | # " v documentation". 116 | #html_title = None 117 | 118 | # A shorter title for the navigation bar. Default is the same as html_title. 119 | #html_short_title = None 120 | 121 | # The name of an image file (relative to this directory) to place at the top 122 | # of the sidebar. 123 | #html_logo = None 124 | 125 | # The name of an image file (within the static path) to use as favicon of the 126 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 127 | # pixels large. 128 | #html_favicon = None 129 | 130 | # Add any paths that contain custom static files (such as style sheets) here, 131 | # relative to this directory. They are copied after the builtin static files, 132 | # so a file named "default.css" will overwrite the builtin "default.css". 133 | html_static_path = ['_static'] 134 | 135 | # Add any extra paths that contain custom files (such as robots.txt or 136 | # .htaccess) here, relative to this directory. These files are copied 137 | # directly to the root of the documentation. 138 | #html_extra_path = [] 139 | 140 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 141 | # using the given strftime format. 142 | #html_last_updated_fmt = '%b %d, %Y' 143 | 144 | # If true, SmartyPants will be used to convert quotes and dashes to 145 | # typographically correct entities. 146 | #html_use_smartypants = True 147 | 148 | # Custom sidebar templates, maps document names to template names. 149 | #html_sidebars = {} 150 | 151 | # Additional templates that should be rendered to pages, maps page names to 152 | # template names. 153 | #html_additional_pages = {} 154 | 155 | # If false, no module index is generated. 156 | #html_domain_indices = True 157 | 158 | # If false, no index is generated. 159 | #html_use_index = True 160 | 161 | # If true, the index is split into individual pages for each letter. 162 | #html_split_index = False 163 | 164 | # If true, links to the reST sources are added to the pages. 165 | #html_show_sourcelink = True 166 | 167 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 168 | #html_show_sphinx = True 169 | 170 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 171 | #html_show_copyright = True 172 | 173 | # If true, an OpenSearch description file will be output, and all pages will 174 | # contain a tag referring to it. The value of this option must be the 175 | # base URL from which the finished HTML is served. 176 | #html_use_opensearch = '' 177 | 178 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 179 | #html_file_suffix = None 180 | 181 | # Output file base name for HTML help builder. 182 | htmlhelp_basename = 'py-umlsdoc' 183 | 184 | 185 | # -- Options for LaTeX output --------------------------------------------- 186 | 187 | latex_elements = { 188 | # The paper size ('letterpaper' or 'a4paper'). 189 | #'papersize': 'letterpaper', 190 | 191 | # The font size ('10pt', '11pt' or '12pt'). 192 | #'pointsize': '10pt', 193 | 194 | # Additional stuff for the LaTeX preamble. 195 | #'preamble': '', 196 | } 197 | 198 | # Grouping the document tree into LaTeX files. List of tuples 199 | # (source start file, target name, title, 200 | # author, documentclass [howto, manual, or own class]). 201 | latex_documents = [ 202 | ('index', 'py-umls.tex', u'py-umls Documentation', 203 | u'Pascal Pfiffner', 'manual'), 204 | ] 205 | 206 | # The name of an image file (relative to this directory) to place at the top of 207 | # the title page. 208 | #latex_logo = None 209 | 210 | # For "manual" documents, if this is true, then toplevel headings are parts, 211 | # not chapters. 212 | #latex_use_parts = False 213 | 214 | # If true, show page references after internal links. 215 | #latex_show_pagerefs = False 216 | 217 | # If true, show URL addresses after external links. 218 | #latex_show_urls = False 219 | 220 | # Documents to append as an appendix to all manuals. 221 | #latex_appendices = [] 222 | 223 | # If false, no module index is generated. 224 | #latex_domain_indices = True 225 | 226 | 227 | # -- Options for manual page output --------------------------------------- 228 | 229 | # One entry per manual page. List of tuples 230 | # (source start file, name, description, authors, manual section). 231 | man_pages = [ 232 | ('index', 'py-umls', u'py-umls Documentation', 233 | [u'Pascal Pfiffner'], 1) 234 | ] 235 | 236 | # If true, show URL addresses after external links. 237 | #man_show_urls = False 238 | 239 | 240 | # -- Options for Texinfo output ------------------------------------------- 241 | 242 | # Grouping the document tree into Texinfo files. List of tuples 243 | # (source start file, target name, title, author, 244 | # dir menu entry, description, category) 245 | texinfo_documents = [ 246 | ('index', 'py-umls', u'py-umls Documentation', 247 | u'Pascal Pfiffner', 'py-umls', 'One line description of project.', 248 | 'Miscellaneous'), 249 | ] 250 | 251 | # Documents to append as an appendix to all manuals. 252 | #texinfo_appendices = [] 253 | 254 | # If false, no module index is generated. 255 | #texinfo_domain_indices = True 256 | 257 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 258 | #texinfo_show_urls = 'footnote' 259 | 260 | # If true, do not generate a @detailmenu in the "Top" node's menu. 261 | #texinfo_no_detailmenu = False 262 | 263 | 264 | # -- Options for Epub output ---------------------------------------------- 265 | 266 | # Bibliographic Dublin Core info. 267 | epub_title = u'py-umls' 268 | epub_author = u'Pascal Pfiffner' 269 | epub_publisher = u'Pascal Pfiffner' 270 | epub_copyright = u'2014, Pascal Pfiffner' 271 | 272 | # The basename for the epub file. It defaults to the project name. 273 | #epub_basename = u'py-umls' 274 | 275 | # The HTML theme for the epub output. Since the default themes are not optimized 276 | # for small screen space, using the same theme for HTML and epub output is 277 | # usually not wise. This defaults to 'epub', a theme designed to save visual 278 | # space. 279 | #epub_theme = 'epub' 280 | 281 | # The language of the text. It defaults to the language option 282 | # or en if the language is not set. 283 | #epub_language = '' 284 | 285 | # The scheme of the identifier. Typical schemes are ISBN or URL. 286 | #epub_scheme = '' 287 | 288 | # The unique identifier of the text. This can be a ISBN number 289 | # or the project homepage. 290 | #epub_identifier = '' 291 | 292 | # A unique identification for the text. 293 | #epub_uid = '' 294 | 295 | # A tuple containing the cover image and cover page html template filenames. 296 | #epub_cover = () 297 | 298 | # A sequence of (type, uri, title) tuples for the guide element of content.opf. 299 | #epub_guide = () 300 | 301 | # HTML files that should be inserted before the pages created by sphinx. 302 | # The format is a list of tuples containing the path and title. 303 | #epub_pre_files = [] 304 | 305 | # HTML files shat should be inserted after the pages created by sphinx. 306 | # The format is a list of tuples containing the path and title. 307 | #epub_post_files = [] 308 | 309 | # A list of files that should not be packed into the epub file. 310 | epub_exclude_files = ['search.html'] 311 | 312 | # The depth of the table of contents in toc.ncx. 313 | #epub_tocdepth = 3 314 | 315 | # Allow duplicate toc entries. 316 | #epub_tocdup = True 317 | 318 | # Choose between 'default' and 'includehidden'. 319 | #epub_tocscope = 'default' 320 | 321 | # Fix unsupported image types using the PIL. 322 | #epub_fix_images = False 323 | 324 | # Scale large images. 325 | #epub_max_image_width = 0 326 | 327 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 328 | #epub_show_urls = 'inline' 329 | 330 | # If false, no index is generated. 331 | #epub_use_index = True 332 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. py-umls documentation master file, created by 2 | sphinx-quickstart on Fri Apr 18 20:08:31 2014. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | py-umls 7 | ======= 8 | 9 | This is the documentation for `py-umls` (https://github.com/chb/py-umls), a simple Python 3 library that helps deal with RxNorm, SNOMED and UMLS resources. 10 | Development is ongoing, based on the needs of the developer, and documentation is sparse at best. 11 | 12 | This modules creates **SQLite** databases from UMLS downloads. 13 | The scripts that perform this task can be found in the `databases` directory. 14 | 15 | Contents: 16 | 17 | .. toctree:: 18 | :maxdepth: 4 19 | 20 | py-umls 21 | 22 | 23 | Index & Search 24 | -------------- 25 | 26 | * :ref:`genindex` 27 | * :ref:`modindex` 28 | * :ref:`search` 29 | 30 | -------------------------------------------------------------------------------- /docs/py-umls.rst: -------------------------------------------------------------------------------- 1 | py-umls package 2 | =============== 3 | 4 | This package contains three modules with classes useful for dealing with **RxNorm**, then a module each for UMLS and SNOMED handling. 5 | 6 | 7 | rxnorm 8 | ------ 9 | 10 | Provides classes that deal with RxNorm. This is very much WiP! 11 | 12 | .. automodule:: rxnorm 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | rxnorm_link 18 | ----------- 19 | 20 | A script used to create JSON documents from most RxNorm concepts and store them into a NoSQL database. This is very much WiP! 21 | 22 | .. automodule:: rxnorm_link 23 | :members: 24 | :undoc-members: 25 | :show-inheritance: 26 | 27 | rxnorm_graph 28 | ------------ 29 | 30 | A useful script to help visualize relationships between RxNorm concepts, starting from a given RXCUI. 31 | Just run this script in your command line and follow the leader. 32 | 33 | .. automodule:: rxnorm_graph 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | umls 39 | ---- 40 | 41 | Module to deal with UMLS lexica. 42 | 43 | .. automodule:: umls 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | 48 | snomed 49 | ------ 50 | 51 | Module to deal with the SNOMED terminology. 52 | 53 | .. automodule:: snomed 54 | :members: 55 | :undoc-members: 56 | :show-inheritance: 57 | 58 | graphable 59 | --------- 60 | 61 | Provides classes that can be used to create an interdependency graph. 62 | 63 | .. automodule:: graphable 64 | :members: 65 | :undoc-members: 66 | :show-inheritance: 67 | 68 | sqlite 69 | ------ 70 | 71 | Our SQLite connection class. 72 | 73 | .. automodule:: sqlite 74 | :members: 75 | :undoc-members: 76 | :show-inheritance: 77 | 78 | -------------------------------------------------------------------------------- /graphable.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Graphable objects for fun 5 | # 6 | # 2014-02-18 Created by Pascal Pfiffner 7 | 8 | import os 9 | import uuid 10 | import subprocess 11 | import tempfile 12 | 13 | 14 | class GraphableObject (object): 15 | _name = None # The name uniquely identifying the object 16 | label = None # The label to show in place of the name 17 | shape = None 18 | style = None 19 | color = None 20 | announced_via = None 21 | 22 | def __init__(self, name, label=None): 23 | self._name = name if name else 'o' + uuid.uuid4().hex 24 | self.label = label 25 | 26 | @property 27 | def name(self): 28 | return self._name if self._name else 'unnamed' 29 | 30 | def inner_dot(self): 31 | if self.label or self.style or self.color or self.shape: 32 | inner = [] 33 | if self.shape: 34 | inner.append("shape={}".format(self.shape)) 35 | if self.style: 36 | inner.append("style={}".format(self.style)) 37 | if self.color: 38 | inner.append("color={}".format(self.color)) 39 | if self.label: 40 | inner.append('label="{}"'.format(self.label)) 41 | return "[{}]".format(','.join(inner)) 42 | return None 43 | 44 | def dot_representation(self): 45 | inner = self.inner_dot() 46 | if inner: 47 | return "\t{} {};\n".format(self.name, inner) 48 | return "\t{};\n".format(self.name) 49 | 50 | def announce_to(self, dot_context, via=None): 51 | """ Announce the receiver to the context. 52 | 53 | Subclasses MUST NOT announce other graphable objects they are holding 54 | on to here but they MUST announce them in "deliver_to" if appropriate. 55 | 56 | - dot_context The context to announce to 57 | - via If not-None the other GraphableObject that is responsible for 58 | announcing the receiver 59 | """ 60 | self.announced_via = via 61 | dot_context.announce(self) 62 | 63 | def deliver_to(self, dot_context, is_leaf): 64 | """ Call the context's "deliver" method. 65 | 66 | This method is guaranteed to only be called once per context. Hence 67 | subclasses that hold on to other graphable objects MUST ANNOUNCE those 68 | instances here (but NOT deliver them) but ONLY IF "is_leaf" is not True. 69 | - dot_context The context to deliver to 70 | - is_leaf If True means the receiver is intended to be a leaf object 71 | """ 72 | dot_context.deliver(self) 73 | 74 | 75 | class GraphableRelation (GraphableObject): 76 | relation_from = None # first GraphableObject instance 77 | relation_to = None # second GraphableObject instance 78 | 79 | def __init__(self, rel_from, label, rel_to): 80 | name = "{}->{}".format(rel_from.name, rel_to.name) 81 | super().__init__(name, label) 82 | self.relation_from = rel_from 83 | self.relation_to = rel_to 84 | 85 | def dot_representation(self): 86 | if self.relation_to: 87 | return "\t{} -> {} {};\n".format( 88 | self.relation_from.name, 89 | self.relation_to.name, 90 | self.inner_dot() or '' 91 | ) 92 | return '' 93 | 94 | def deliver_to(self, dot_context, is_leaf): 95 | self.relation_from.announce_to(dot_context, self) 96 | self.relation_to.announce_to(dot_context, self) 97 | super().deliver_to(dot_context, is_leaf) # deliver after announcing our nodes! 98 | 99 | 100 | class DotContext (object): 101 | items = None 102 | source = None 103 | depth = 0 104 | max_depth = 8 # there is something fishy still, make this double the tree depth you want 105 | max_width = 15 # pass to graphable objects, they will decide what to do with this 106 | 107 | def __init__(self, max_depth=None, max_width=None): 108 | self.items = set() 109 | self.source = '' 110 | self.depth = 0 111 | if max_depth is not None: 112 | self.max_depth = max_depth 113 | if max_width is not None: 114 | self.max_width = max_width 115 | 116 | def announce(self, obj): 117 | if obj.name not in self.items: 118 | self.items.add(obj.name) 119 | 120 | self.depth += 1 121 | obj.deliver_to(self, self.depth > self.max_depth) 122 | self.depth -= 1 123 | 124 | def deliver(self, obj): 125 | self.source += obj.dot_representation() 126 | 127 | def get(self): 128 | return self.source 129 | 130 | 131 | class GraphvizGraphic (object): 132 | cmd = 'dot' 133 | out_dot = None 134 | out_type = 'pdf' 135 | out_file = None 136 | max_depth = None 137 | max_width = None 138 | 139 | def __init__(self, out_file='rxgraph.png'): 140 | self.out_file = out_file 141 | 142 | def executableCommand(self, infile): 143 | return [ 144 | self.cmd, 145 | '-T{}'.format(self.out_type), 146 | infile, 147 | '-o', format(self.out_file), 148 | ] 149 | 150 | def write_dot_graph(self, obj): 151 | if self.out_file is None: 152 | raise Exception('Please assign an output filename to "out_file"') 153 | 154 | context = DotContext(max_depth=self.max_depth, max_width=self.max_width) 155 | obj.announce_to(context) 156 | source = """digraph G {{ 157 | ranksep=equally;\n{}}}\n""".format(context.get()) 158 | 159 | # write to a temporary file 160 | filedesc, tmpname = tempfile.mkstemp() 161 | with os.fdopen(filedesc, 'w') as handle: 162 | handle.write(source) 163 | 164 | # execute command 165 | cmd = self.executableCommand(tmpname) 166 | ret = subprocess.call(cmd) 167 | 168 | if self.out_dot: 169 | os.rename(tmpname, self.out_dot) 170 | else: 171 | os.unlink(tmpname) 172 | 173 | if ret > 0: 174 | raise Exception('Failed executing: "{}"'.format(' '.join(cmd))) 175 | 176 | -------------------------------------------------------------------------------- /loinc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # LOINC import and lookup utilities 5 | 6 | 7 | import sys 8 | import os.path 9 | import logging 10 | 11 | 12 | class LOINCLookup(object): 13 | pass 14 | 15 | 16 | class LOINC(object): 17 | """ Class that helps with setting up a local LOINC SQLite database. 18 | """ 19 | 20 | @classmethod 21 | def check_database(cls): 22 | """ Check if our database is in place and if not, prompts to create it. 23 | Will raise on errors! 24 | 25 | Reads LOINC from CSV files and create an SQLite database, if needed. 26 | """ 27 | 28 | dbpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'databases/loinc.db') 29 | if not os.path.exists(dbpath): 30 | raise Exception("The LOINC database at {} does not exist. Run the script `loinc.py`." 31 | .format(dbpath)) 32 | 33 | @classmethod 34 | def import_from_files(cls, dirpath): 35 | """ Imports LOINC from the downloaded CSV files. 36 | """ 37 | import sqlite 38 | import csvimporter 39 | 40 | mapping = { 41 | 'loinc.csv': 'loinc', 42 | 'map_to.csv': 'map_to', 43 | 'source_organization.csv': 'sources' 44 | } 45 | dbpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'databases/loinc.db') 46 | 47 | # import 48 | for csvfile, table in mapping.items(): 49 | print("Importing LOINC table {}".format(csvfile)) 50 | imp = csvimporter.CSVImporter(os.path.join(dirpath, csvfile), table) 51 | imp.import_to(dbpath) 52 | 53 | # index 54 | print("Creating indexes") 55 | sql_handle = sqlite.SQLite(dbpath) 56 | sql_handle.execute('CREATE INDEX x_loinc_num_loinc ON loinc (LOINC_NUM)') 57 | sql_handle.execute('CREATE INDEX x_shortname_loinc ON loinc (SHORTNAME)') 58 | sql_handle.execute('CREATE INDEX x_long_common_name_loinc ON loinc (LONG_COMMON_NAME)') 59 | 60 | 61 | 62 | # running this as a script performs the database setup/check 63 | if '__main__' == __name__: 64 | logging.basicConfig(level=logging.DEBUG) 65 | 66 | # if the database check fails, run import commands 67 | try: 68 | LOINC.check_database() 69 | except Exception as e: 70 | csv_path = sys.argv[1] if 2 == len(sys.argv) else None 71 | if csv_path is not None and os.path.exists(csv_path): 72 | try: 73 | LOINC.import_from_files(csv_path) 74 | except Exception as e: 75 | raise Exception("SNOMED import failed: {}".format(e)) 76 | else: 77 | print("Provide the path to the directory containing the LOINC CSV files as first argument.") 78 | print("Download the LOINC Table File in CSV format (free registration required) here:") 79 | print("http://loinc.org/downloads/loinc") 80 | 81 | # TODO: lookup examples 82 | -------------------------------------------------------------------------------- /rxnorm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Utilities to handle RxNorm 5 | # 6 | # 2014-01-28 Extracted from UMLS.py 7 | 8 | import os.path 9 | import logging 10 | import re 11 | import requests 12 | import xml.etree.ElementTree as ET 13 | 14 | from collections import Counter, OrderedDict 15 | from sqlite import SQLite 16 | from graphable import GraphableObject, GraphableRelation 17 | 18 | 19 | class RxNorm (object): 20 | """ A class for handling RxNorm in an SQLite database and performing a 21 | handful of RxNorm-related tasks. 22 | """ 23 | 24 | @classmethod 25 | def check_database(cls): 26 | """ Check if our database is in place and if not, import them. 27 | Will raise on errors! 28 | 29 | RxNorm: (rxnorm.db) 30 | If missing prompt to use the `rxnorm.sh` script 31 | """ 32 | 33 | # RxNorm 34 | rxnorm_db = os.path.join(os.path.dirname(__file__), 'databases/rxnorm.db') 35 | if not os.path.exists(rxnorm_db): 36 | raise Exception("The RxNorm database at {} does not exist. Run the import script `databases/rxnorm.sh`." 37 | .format(os.path.abspath(rxnorm_db))) 38 | 39 | @classmethod 40 | def ndc_normalize_list(cls, ndc_list): 41 | ndc_set = set([cls.ndc_normalize(ndc) for ndc in ndc_list]) 42 | return list(ndc_set) 43 | 44 | @classmethod 45 | def ndc_normalize(cls, ndc): 46 | """ Normalizes an NDC (National Drug Code) number. 47 | 48 | The pseudo-code published by NIH 49 | (http://www.nlm.nih.gov/research/umls/rxnorm/NDC_Normalization_Code.rtf) 50 | first identifies the format (e.g. "6-3-2") and then normalizes based on 51 | that finding. However since the normalized string is always 5-4-2, 52 | padded with leading zeroes and removing all dashes afterwards, this 53 | implementation goes a much simpler route. 54 | 55 | NDCs that only contain one dash are treated as if they were missing the 56 | package specifier, so they get a "-00" appended before normalization. 57 | 58 | 59 | :param str ndc: The NDC to normalize as string 60 | :returns: A string with the normalized NDC, or `None` if the number 61 | couldn't be normalized 62 | """ 63 | if ndc is None or 0 == len(ndc) or len(ndc) > 14: 64 | return None 65 | 66 | # replace '*' with '0' as some of the NDCs from MTHFDA contain * instead of 0 67 | norm = ndc.replace('*', '0') 68 | 69 | # split at dashes, pad with leading zeroes, cut to desired length 70 | parts = norm.split('-') 71 | 72 | # Code with only one dash; this is NOT mentioned in the above cited 73 | # reference but I see a lot of codes with 6-4 format. 74 | # These are likely codes without package specifier, though some that I 75 | # checked seem to not or no longer exist. 76 | # We append "-00" to get a 6-4-2 format and are done with it. 77 | if 2 == len(parts): 78 | parts.append('00') 79 | 80 | # two dashes, 6-4-1 or 5-3-2 or similar formats, concat to 5-4-2 81 | if 3 == len(parts): 82 | norm = '{}{}{}'.format(('00000'+parts[0])[-5:], ('0000'+parts[1])[-4:], ('00'+parts[2])[-2:]) 83 | 84 | # no dashes 85 | elif 1 == len(parts): 86 | 87 | # "if NDC passed has 12 digits and first char is '0' and it's from 88 | # VANDF then trim first char". We do NOT check if it's from the VA 89 | # as this would require more information than just the NDC 90 | if 12 == len(norm) and '0' == norm[:1]: 91 | norm = norm[1:] 92 | 93 | # only valid if it's 11 digits 94 | elif 11 != len(norm): 95 | return None 96 | 97 | # reject NDCs that still contain non-numeric chars 98 | return norm if norm.isdigit() else None 99 | 100 | 101 | class RxNormLookup (object): 102 | """ Class for RxNorm lookup. """ 103 | 104 | sqlite = None 105 | cache_drug_class = False # will be set to true when the prepare_to_cache_classes method gets called 106 | 107 | 108 | def __init__(self): 109 | absolute = os.path.dirname(os.path.realpath(__file__)) 110 | self.sqlite = SQLite.get(os.path.join(absolute, 'databases/rxnorm.db')) 111 | 112 | 113 | # MARK: - "name" lookup 114 | 115 | def lookup_rxcui(self, rxcui, preferred=True): 116 | """ Return a tuple with (str, tty, rxcui, rxaui) or - if "preferred" is 117 | False - a tuple with (preferred-name, list-of-tuples) 118 | """ 119 | if rxcui is None or len(rxcui) < 1: 120 | return None 121 | 122 | # retrieve all matches 123 | sql = 'SELECT str, tty, rxcui, rxaui FROM rxnconso WHERE rxcui = ? AND lat = "ENG"' 124 | 125 | found = [] 126 | for res in self.sqlite.execute(sql, (rxcui,)): 127 | found.append(res) 128 | 129 | if 0 == len(found): 130 | logging.error("RxNormLookup.lookup_rxcui: RxCUI {} not found".format(rxcui)) 131 | return None 132 | 133 | # preferred name 134 | pref_match = None 135 | for tty in ['SBDC', 'SCDC', 'SBD', 'SCD', 'CD', 'SBDF', 'SCDF', 'BN', 'IN', 'PIN', 'MIN']: 136 | for res in found: 137 | if tty == res[1]: 138 | pref_match = res 139 | break 140 | if pref_match is not None: 141 | break 142 | 143 | if preferred: 144 | return pref_match if pref_match is not None else found[0] 145 | 146 | return (pref_match[0] if pref_match is not None else None, found) 147 | 148 | def lookup_rxcui_name(self, rxcui, preferred=True, no_html=True): 149 | """ Return a string or HTML for the meaning of the given code. 150 | If preferred is True (the default), only one match will be returned, 151 | looking for specific TTY and using the "best" one. 152 | There is currently NO SUPPORT FOR preferred = False 153 | """ 154 | 155 | res = self.lookup_rxcui(rxcui, preferred=True) 156 | if res is None: 157 | return '' 158 | 159 | if no_html: 160 | str_format = "{0} [{1}]" 161 | else: 162 | str_format = "{0} [{1}]" 163 | 164 | return str_format.format(*res) 165 | 166 | 167 | # MARK: - Relations 168 | 169 | def lookup_tty(self, rxcui): 170 | """ Returns a set of TTYs for the given RXCUI. """ 171 | if rxcui is None: 172 | return None 173 | 174 | sql = 'SELECT tty FROM rxnconso WHERE rxcui = ?' 175 | ttys = set() 176 | for res in self.sqlite.execute(sql, (rxcui,)): 177 | ttys.add(res[0]) 178 | 179 | return ttys 180 | 181 | def lookup_related(self, rxcui, relation=None, to_rxcui=None): 182 | """ Returns a set of tuples containing the RXCUI and the actual relation 183 | for the desired relation, or all if the relation is not specified. 184 | 185 | :param str rxcui: The RXCUI for which to look up relations 186 | :param str relation: Optional: the type of the relation, e.g. "has_ingredient" 187 | :param str to_rxcui: An optional second rxcui, to return all relations 188 | between the two given rxcuis. Ignored if `relation` is present. 189 | :returns: A set of tuples, where tuples are (rxcui, rela) 190 | """ 191 | if rxcui is None: 192 | return None 193 | 194 | found = set() 195 | if relation is not None: 196 | sql = "SELECT rxcui1, rela FROM rxnrel WHERE rxcui2 = ? AND rela = ?" 197 | for res in self.sqlite.execute(sql, (rxcui, relation)): 198 | found.add(res) 199 | elif to_rxcui is not None: 200 | sql = "SELECT rxcui1, rela FROM rxnrel WHERE rxcui2 = ? AND rxcui1 = ?" 201 | for res in self.sqlite.execute(sql, (rxcui, to_rxcui)): 202 | found.add(res) 203 | else: 204 | sql = "SELECT rxcui1, rela FROM rxnrel WHERE rxcui2 = ?" 205 | for res in self.sqlite.execute(sql, (rxcui,)): 206 | found.add(res) 207 | 208 | return found 209 | 210 | 211 | # MARK: - RxCUI 212 | 213 | def rxcui_for_ndc(self, ndc): 214 | """ Find the RXCUI for the given NDC from our NDC-cache-table. 215 | 216 | This method only does exact lookup for now, it should be extended to 217 | use normalized NDC formats. 218 | 219 | :param str ndc: The NDC to look up 220 | :returns: The matching RXCUI as string, or None 221 | """ 222 | if ndc is None: 223 | return None 224 | # TODO: ensure NDC normalization 225 | 226 | rxcuis = {} 227 | sql = "SELECT RXCUI FROM NDC WHERE NDC = ?" 228 | for res in self.sqlite.execute(sql, (ndc,)): 229 | rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1 230 | 231 | rxcui = list(rxcuis.keys())[0] if len(rxcuis) > 0 else None 232 | if len(rxcuis) > 1: 233 | popular = OrderedDict(Counter(rxcuis).most_common()) 234 | rxcui = popular.popitem(False)[0] 235 | 236 | return str(rxcui) if rxcui is not None else None 237 | 238 | def ndc_for_rxcui(self, rxcui): 239 | """ Find the NDC from our NDC-cache-table for the given RXCUI. 240 | """ 241 | if rxcui is None: 242 | return None 243 | 244 | sql = 'SELECT distinct ndc FROM ndc WHERE rxcui = ?' 245 | return [res[0] for res in self.sqlite.execute(sql, (rxcui,))] 246 | 247 | def rxcui_for_name(self, name, limit_tty=None): 248 | """ Tries to find an RXCUI for the concept name. 249 | 250 | Does this by performing a "starts with" against the STR column on 251 | RXNCONSO, then replaces any spaces with wildcards and finally chops off 252 | one word after the other until a match is found. 253 | 254 | This works but is slow and far from perfect. RxNav's ``approxMatch`` is 255 | definitely better, you can use ``rxcui_for_name_approx`` to get an 256 | RXCUI using that service. 257 | 258 | :param str name: The name to get an RXCUI for 259 | :param list limit_tty: Optional: limit search to a given list of TTYs 260 | :returns: The best matching rxcui, if any, as string 261 | """ 262 | if name is None: 263 | return None 264 | 265 | rxcuis = {} 266 | lim = 'tty IN ("{}") AND'.format('","'.join(limit_tty)) if limit_tty else '' 267 | sql = 'SELECT rxcui, tty FROM rxnconso WHERE {} str LIKE ?'.format(lim) 268 | 269 | # try the full string, allowing wildcard at the trailing end 270 | for res in self.sqlite.execute(sql, (name + '%',)): 271 | rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1 272 | 273 | # nothing yet, replace spaces with '%' 274 | for res in self.sqlite.execute(sql, (name.replace(' ', '%') + '%',)): 275 | rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1 276 | 277 | # still nothing, try chopping off parts from the right 278 | if 0 == len(rxcuis): 279 | parts = name.split() 280 | for x in range(len(parts) - 1): 281 | comp = '%'.join(parts[:-(x+1)]) 282 | for res in self.sqlite.execute(sql, (comp + '%',)): 283 | rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1 284 | if len(rxcuis) > 0: 285 | break 286 | 287 | rxcui = list(rxcuis.keys())[0] if len(rxcuis) > 0 else None 288 | if len(rxcuis) > 1: 289 | popular = OrderedDict(Counter(rxcuis).most_common()) 290 | rxcui = popular.popitem(False)[0] 291 | 292 | return str(rxcui) if rxcui is not None else None 293 | 294 | def rxcui_for_name_approx(self, name): 295 | """ Returns the best ``approxMatch`` RXCUI as found when using RxNav's 296 | service against the provided name. Runs synchronously. 297 | 298 | :param str name: The name to get an RXCUI for 299 | :returns: The top ranked rxcui, if any, as string 300 | """ 301 | matches = self.rxnav_approx_match(name, nmax=1) 302 | return str(matches[0]) if matches is not None and len(matches) > 0 else None 303 | 304 | def rxnav_approx_match(self, name, nmax=10): 305 | """ Returns the top #nmax ``approximateTerm`` rxcuis as found when using 306 | RxNav's service against the provided name. Runs synchronously. 307 | 308 | :param str name: The name to get an RXCUI for 309 | :param int nmax: The maximum number of unique rxcuis to return, 10 by 310 | default 311 | :returns: The top ranked rxcuis, if any, as a list 312 | """ 313 | if name is None: 314 | return None 315 | 316 | url = 'http://rxnav.nlm.nih.gov/REST/approximateTerm' 317 | r = requests.get(url, params={'term': name, 'option': 1}) # we don't use `maxEntries` as duplicate rxcuis count separately 318 | root = ET.fromstring(r.text) 319 | candidates = root.findall('.//candidate') 320 | rxcuis = [] 321 | for cand in candidates: 322 | rxcui = cand.find('rxcui') 323 | if rxcui is not None and rxcui.text is not None: 324 | #rank = cand.find('rank') # rely on RxNav's order for now 325 | if rxcui.text not in rxcuis: 326 | rxcuis.append(rxcui.text) 327 | 328 | # stop after nmax 329 | if nmax is not None and len(rxcuis) >= nmax: 330 | break 331 | 332 | return rxcuis 333 | 334 | 335 | # MARK: - Drug Class OBSOLETE, WILL BE GONE 336 | 337 | def can_cache(self): 338 | return self.sqlite.hasTable('va_cache') 339 | 340 | def prepare_to_cache_classes(self): 341 | if self.sqlite.create('va_cache', '(rxcui primary key, va varchar)'): 342 | self.cache_drug_class = True 343 | 344 | def va_drug_class(self, rxcui): 345 | """ Returns a list of VA class names for a given RXCUI. EXPERIMENTAL. 346 | """ 347 | #if not self.cache_drug_class: 348 | # return None 349 | if rxcui is None: 350 | return None 351 | 352 | # check dedicated dable 353 | sql = 'SELECT va FROM va_cache WHERE rxcui = ?' 354 | res = self.sqlite.executeOne(sql, (rxcui,)) 355 | return res[0].split('|') if res else None 356 | 357 | def friendly_class_format(self, va_name): 358 | """ Tries to reformat the VA drug class name so it's suitable for 359 | display. 360 | """ 361 | if va_name is None or 0 == len(va_name): 362 | return None 363 | 364 | # remove identifier 365 | if ']' in va_name: 366 | va_name = va_name[va_name.index(']')+1:] 367 | va_name = va_name.strip() 368 | 369 | # remove appended specificiers 370 | if ',' in va_name and va_name.index(',') > 2: 371 | va_name = va_name[0:va_name.index(',')] 372 | 373 | if '/' in va_name and va_name.index('/') > 2: 374 | va_name = va_name[0:va_name.index('/')] 375 | 376 | # capitalize nicely 377 | va_name = va_name.lower(); 378 | va_name = re.sub(r'(^| )(\w)', lambda match: r'{}{}'.format(match.group(1), match.group(2).upper()), va_name) 379 | 380 | return va_name 381 | 382 | 383 | # MARK: - Bare Metal 384 | 385 | def execute(self, sql, params=()): 386 | """ Execute and return the pointer of an SQLite execute() query. """ 387 | return self.sqlite.execute(sql, params) 388 | 389 | def fetchOne(self, sql, params=()): 390 | """ Execute and return the result of fetchone() on a raw SQL query. """ 391 | return self.sqlite.execute(sql, params).fetchone() 392 | 393 | def fetchAll(self, sql, params=()): 394 | """ Execute and return the result of fetchall() on a raw SQL query. """ 395 | return self.sqlite.execute(sql, params).fetchall() 396 | 397 | 398 | class RxNormCUI (GraphableObject): 399 | rxcui = None 400 | _ttys = None 401 | relations = None 402 | rxlookup = RxNormLookup() 403 | 404 | def __init__(self, rxcui, label=None): 405 | super().__init__(rxcui, rxcui) 406 | self.shape = 'box' 407 | self.rxcui = rxcui 408 | 409 | @property 410 | def ttys(self): 411 | return self._ttys 412 | 413 | @ttys.setter 414 | def ttys(self, val): 415 | self._ttys = val 416 | self.update_shape_from_ttys() 417 | 418 | 419 | def find_relations(self, to_rxcui=None, max_width=10): 420 | counted = {} 421 | for rxcui, rela in self.rxlookup.lookup_related(self.rxcui, None, to_rxcui): 422 | if rela in counted: 423 | counted[rela].append(rxcui) 424 | else: 425 | counted[rela] = [rxcui] 426 | 427 | found = [] 428 | for rela, items in sorted(counted.items()): # sort to generate mostly consistent dot files 429 | if len(items) > max_width: 430 | proxy = GraphableObject(None, rela) 431 | rel = GraphableRelation(self, str(len(items)), proxy) 432 | 433 | if self.announced_via: # if our announcer is here, be nice and link back 434 | for rxcui in items: 435 | if rxcui == self.announced_via.rxcui1.rxcui: 436 | via = RxNormCUI(rxcui) 437 | found.append(RxNormConceptRelation(self, rela, via)) 438 | else: 439 | for rxcui in sorted(items): # sort to generate mostly consistent dot files 440 | obj = RxNormCUI(rxcui) 441 | rel = RxNormConceptRelation(self, rela, obj) 442 | found.append(rel) 443 | 444 | return found 445 | 446 | 447 | def deliver_to(self, dot_context, is_leaf): 448 | self.update_self_from_rxcui() 449 | super().deliver_to(dot_context, is_leaf) 450 | 451 | # if we are a leaf, still fetch the relation going back to our announcer 452 | if is_leaf: 453 | if self.relations is None and self.announced_via: 454 | rela = self.find_relations( 455 | to_rxcui=self.announced_via.rxcui1.rxcui, 456 | max_width=dot_context.max_width 457 | ) 458 | if rela: 459 | rela[0].announce_to(dot_context) 460 | else: 461 | if self.relations is None: 462 | self.relations = self.find_relations(max_width=dot_context.max_width) 463 | 464 | for rel in self.relations: 465 | rel.announce_to(dot_context) 466 | 467 | 468 | def update_self_from_rxcui(self): 469 | if self.rxcui: 470 | ret = self.rxlookup.lookup_rxcui(self.rxcui, preferred=False) 471 | if ret is not None and len(ret) > 1 and len(ret[1]) > 0: 472 | pref = ret[0] 473 | found = ret[1] 474 | self.ttys = set([res[1] for res in found]) 475 | self.label = _splitted_string(pref if pref else found[0][0]) 476 | self.label += "\n[{} - {}]".format(self.rxcui, ', '.join(sorted(self._ttys))) 477 | 478 | vas = self.rxlookup.va_drug_class(self.rxcui) 479 | if vas: 480 | self.style = 'bold' 481 | self.color = 'violet' 482 | self.label += "\n{}".format(_splitted_string(', '.join(vas))) 483 | 484 | def update_shape_from_ttys(self): 485 | if self._ttys: 486 | if 'BD' in self._ttys or 'BN' in self._ttys: 487 | self.style = 'bold' 488 | elif 'SBD' in [tty[:3] for tty in self._ttys]: 489 | self.shape = 'box,peripheries=2' 490 | elif 'MIN' in self._ttys: 491 | self.shape = 'polygon,sides=5,peripheries=2' 492 | elif 'IN' in self._ttys or 'PIN' in self._ttys: 493 | self.shape = 'polygon,sides=5' 494 | 495 | class RxNormConceptRelation (GraphableRelation): 496 | rxcui1 = None 497 | rxcui2 = None 498 | 499 | def __init__(self, rxcuiobj1, rela, rxcuiobj2): 500 | super().__init__(rxcuiobj1, rela, rxcuiobj2) 501 | self.rxcui1 = rxcuiobj1 502 | self.rxcui2 = rxcuiobj2 503 | 504 | if 'isa' == rela[-3:]: 505 | self.style = 'dashed' 506 | 507 | 508 | def _splitted_string(string, maxlen=60): 509 | if len(string) > maxlen: 510 | at = 0 511 | newstr = '' 512 | for word in string.split(): 513 | if at > maxlen: 514 | newstr += "\n" 515 | at = 0 516 | if at > 0: 517 | newstr += ' ' 518 | at += 1 519 | newstr += word 520 | at += len(word) 521 | return newstr 522 | return string 523 | 524 | 525 | # running this as a script does the database setup/check 526 | if '__main__' == __name__: 527 | RxNorm.check_database() 528 | 529 | import sys 530 | rxcuis = sys.argv[1:] if len(sys.argv) > 1 else None 531 | if rxcuis is None: 532 | print('x> Provide RXCUIs as arguments on the command line') 533 | sys.exit(0) 534 | 535 | look = RxNormLookup() 536 | for rxcui in rxcuis: 537 | print('-----') 538 | meaning = look.lookup_rxcui_name(rxcui, preferred=False) 539 | ttys = look.lookup_tty(rxcui) 540 | related = look.lookup_related(rxcui) 541 | 542 | print('RxCUI "{0}": {1}'.format(rxcui, meaning)) 543 | print('Concept type "{0}": {1}'.format(rxcui, ', '.join(ttys))) 544 | print('Relationships "{0}":'.format(rxcui)) 545 | for rrxcui, rrela in sorted(related, key=lambda x: x[1]): 546 | rname, rtty, a, b = look.lookup_rxcui(rrxcui) 547 | sp1 = ''.join([' ' for i in range(17+len(rxcui)-len(rrela))]) 548 | sp2 = ''.join([' ' for i in range(9-len(rrxcui))]) 549 | sp3 = ''.join([' ' for i in range(6-len(rtty))]) 550 | print('{}{}:{}{}{}{} {}'.format(sp1, rrela, sp2, rrxcui, sp3, rtty, rname)) 551 | -------------------------------------------------------------------------------- /rxnorm_download.py: -------------------------------------------------------------------------------- 1 | import mechanize 2 | import zipfile 3 | import re 4 | import sys 5 | import argparse 6 | 7 | DOWNLOADS_URL = "https://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html" 8 | ZIP_URL = "http://download.nlm.nih.gov/umls/kss/rxnorm/RxNorm_full_%s.zip" 9 | CHUNK_SIZE= 1000 10 | LINK_PATTERN = re.compile("download.nlm.nih.gov.*full") 11 | 12 | def download_rxnorm(args): 13 | 14 | br = mechanize.Browser() 15 | br.set_handle_robots(False) 16 | 17 | if args.release: 18 | url = ZIP_URL%args.release 19 | else: 20 | br.open(DOWNLOADS_URL) 21 | url = br.links(url_regex=LINK_PATTERN).next().url 22 | 23 | print("Signing in to download %s"%(url)) 24 | br.open(url) 25 | 26 | br.select_form(nr=0) 27 | br["username"] = args.username 28 | br["password"] = args.password 29 | zip_request = br.submit() 30 | 31 | try: 32 | bytes = int(zip_request.info().getheader('Content-Length')) 33 | except: 34 | print "Failed to download file. Check your credentials." 35 | sys.exit(1) 36 | 37 | with open(args.file, "wb") as outfile: 38 | while zip_request.tell() < bytes: 39 | outfile.write(zip_request.read(size=CHUNK_SIZE)) 40 | read = zip_request.tell() 41 | print "\rDownload: %.2f%% of %sMB"%( 42 | read * 100.0 / bytes, 43 | bytes / 1000000), 44 | 45 | print("Extracting zip") 46 | with zipfile.ZipFile(args.file) as zf: 47 | zf.extractall() 48 | 49 | if __name__ == "__main__": 50 | parser = argparse.ArgumentParser(description='Download RxNorm Release') 51 | 52 | parser.add_argument('--username', help='UMLS username', required=True) 53 | parser.add_argument('--password', help='UMLS password', required=True) 54 | parser.add_argument( 55 | "--release", 56 | help="specify release version (e.g. '10052015'). Default: latest.", 57 | default=None) 58 | parser.add_argument( 59 | '--file', 60 | help='Where to save .zip download. Default: "rxnorm-download.zip"', 61 | default="rxnorm-download.zip") 62 | 63 | args = parser.parse_args() 64 | download_rxnorm(args) 65 | -------------------------------------------------------------------------------- /rxnorm_graph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Draw an RxNorm graph for a given RxCUI. 5 | # You must have "dot" installed (Graphviz) 6 | # 7 | # 2014-02-18 Created by Pascal Pfiffner 8 | 9 | import sys 10 | import subprocess 11 | 12 | from rxnorm import RxNormCUI 13 | from graphable import GraphvizGraphic 14 | 15 | 16 | if '__main__' == __name__: 17 | rxcui = sys.argv[1] if 2 == len(sys.argv) else None 18 | if rxcui is None: 19 | print('x> Provide a RXCUI as first argument') 20 | sys.exit(0) 21 | 22 | rx = RxNormCUI(rxcui) 23 | gv = GraphvizGraphic('rxgraph.pdf') 24 | gv.out_dot = 'rxgraph.dot' 25 | gv.max_depth = 8 26 | gv.max_width = 15 27 | 28 | gv.write_dot_graph(rx) 29 | 30 | print('-> DOT file: {}'.format(gv.out_dot)) 31 | print('-> PNG graph: {}'.format(gv.out_file)) 32 | 33 | subprocess.call(['open', gv.out_file]) 34 | -------------------------------------------------------------------------------- /rxnorm_link.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Precompute interesting RXCUI relationships into a dictionary. Use the script 5 | # `rxnorm_link_run.sh` to store these dictionaries into a JSON database. See 6 | # that script for parameters to change. 7 | # 8 | # 2012-09-28 Created by Josh Mandel 9 | # 2014-02-10 Stolen by Pascal Pfiffner 10 | # 11 | # For profiling: pycallgraph graphviz -- rxnorm_link.py 12 | 13 | import sys 14 | import os.path 15 | sys.path.insert(0, os.path.dirname(__file__)) 16 | 17 | import json 18 | import signal 19 | import logging 20 | from datetime import datetime 21 | 22 | from rxnorm import RxNorm, RxNormLookup 23 | 24 | 25 | def doQ(rxhandle, q, p): 26 | return [x[0] for x in rxhandle.fetchAll(q, p)] 27 | 28 | def toBrandAndGeneric(rxhandle, rxcuis, tty): 29 | ret = set() 30 | for rxcui in rxcuis: 31 | ret.update(doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela='tradename_of'", (rxcui,))) 32 | return ret 33 | 34 | def toComponents(rxhandle, rxcuis, tty): 35 | ret = set() 36 | 37 | if tty not in ("SBD", "SCD"): 38 | return ret 39 | 40 | for rxcui in rxcuis: 41 | cs = doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela='consists_of'", (rxcui,)) 42 | for c in cs: 43 | ret.update(doQ(rxhandle, "SELECT rxcui from rxnconso where rxcui=? and sab='RXNORM' and tty='SCDC'", (c,))) 44 | 45 | return ret 46 | 47 | def toTreatmentIntents(rxhandle, rxcuis, tty): 48 | ret = set() 49 | for rxcui in rxcuis: 50 | ret.update(toTreatmentIntents_helper(rxhandle, rxcui, tty)) 51 | return ret 52 | 53 | def toTreatmentIntents_helper(rxhandle, rxcui, tty): 54 | assert tty=='IN' 55 | ret = [] 56 | rxauis = doQ(rxhandle, "SELECT rxaui from rxnconso where rxcui=? and tty='FN' and sab='NDFRT'", (rxcui,)) 57 | for rxaui in rxauis: 58 | rxauis1 = doQ(rxhandle, "SELECT rxaui1 from rxnrel where rxaui2=? and rela='may_treat'", (rxaui,)) 59 | for rxaui1 in rxauis1: 60 | name = doQ(rxhandle, "SELECT str from rxnconso where rxaui=? and tty='FN' and sab='NDFRT'", (rxaui1,)) 61 | name = map(lambda x: x.replace(" [Disease/Finding]", ""), name) 62 | ret.extend(name) 63 | return ret 64 | 65 | def toMechanism(rxhandle, rxcuis, tty): 66 | ret = set() 67 | for v in rxcuis: 68 | ret.update(toMechanism_helper(rxhandle, v, tty)) 69 | return ret 70 | 71 | def toMechanism_helper(rxhandle, rxcui, tty): 72 | assert tty=='IN' 73 | ret = set() 74 | rxauis = doQ(rxhandle, "SELECT rxaui from rxnconso where rxcui=? and tty='FN' and sab='NDFRT'", (rxcui,)) 75 | for a in rxauis: 76 | a1 = doQ(rxhandle, "SELECT rxaui1 from rxnrel where rxaui2=? and rela='has_mechanism_of_action'", (a,)) 77 | if len(a1) > 0: 78 | moa = doQ(rxhandle, "SELECT str from rxnconso where rxaui=? and tty='FN' and sab='NDFRT'", (a1[0],)) 79 | moa = map(lambda x: x.replace(" [MoA]", ""), moa) 80 | ret.update(moa) 81 | return ret 82 | 83 | 84 | def toIngredients(rxhandle, rxcuis, tty): 85 | ret = set() 86 | for v in rxcuis: 87 | ret.update(toIngredients_helper(rxhandle, v, tty)) 88 | return ret 89 | 90 | def toIngredients_helper(rxhandle, rxcui, tty): 91 | if 'IN' == tty: 92 | return [] 93 | 94 | # can lookup ingredient directly 95 | map_direct = { 96 | 'MIN': 'has_part', 97 | 'PIN': 'form_of', 98 | 'BN': 'tradename_of', 99 | 'SCDC': 'has_ingredient', 100 | 'SCDF': 'has_ingredient', 101 | 'SCDG': 'has_ingredient', 102 | } 103 | 104 | if tty in map_direct: 105 | return doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela=?", (rxcui, map_direct[tty])) 106 | 107 | # indirect ingredient lookup 108 | map_indirect = { 109 | 'BPCK': ('contains', 'SCD'), 110 | 'GPCK': ('contains', 'SCD'), 111 | 'SBD': ('tradename_of', 'SCD'), 112 | 'SBDC': ('tradename_of', 'SCDC'), 113 | 'SBDF': ('tradename_of', 'SCDF'), 114 | 'SBDG': ('tradename_of', 'SCDG'), 115 | 'SCD': ('consists_of', 'SCDC'), 116 | } 117 | 118 | if tty in map_indirect: 119 | val = map_indirect[tty] 120 | return toIngredients(rxhandle, doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela=?", (rxcui, val[0])), val[1]) 121 | 122 | logging.warn('TTY "{}" is not mapped, skipping ingredient lookup'.format(tty)) 123 | return [] 124 | 125 | 126 | def initVA(rxhandle): 127 | """ Initializes the VA drug class cache table and inserts all known drug 128 | classes by looking them up in the RXNSAT table (ATN = "VA_CLASS_NAME"). 129 | """ 130 | # SELECT DISTINCT tty, COUNT(tty) FROM rxnsat LEFT JOIN rxnconso AS r USING (rxcui) WHERE atn = "VA_CLASS_NAME" GROUP BY tty; 131 | rxhandle.execute('DROP TABLE IF EXISTS va_cache') 132 | rxhandle.execute('''CREATE TABLE va_cache 133 | (rxcui varchar UNIQUE, va text, from_rxcui varchar, rela varchar, level int)''') 134 | rxhandle.execute('''INSERT OR IGNORE INTO va_cache 135 | SELECT rxcui, atv, null, null, 0 FROM rxnsat 136 | WHERE atn = "VA_CLASS_NAME"''') 137 | rxhandle.sqlite.commit() 138 | 139 | def traverseVA(rxhandle, rounds=3, expect=203175): 140 | """ Drug classes are set for a couple of different TTYs, it seems however 141 | most consistently to be defined on CD, SCD and AB TTYs. 142 | We cache the classes in va_cache and loop over rxcuis with known classes, 143 | applying the known classes to certain relationships. 144 | """ 145 | print("-> Starting VA class mapping") 146 | 147 | mapping = { 148 | 'CD': [ 149 | 'has_tradename', # > BD, SBD, ... ; tiny impact on step 2, compensated for in steps 3+ 150 | 'contained_in', # > BPCK; tiny impact in step 2, compansated for in steps 3+ 151 | 'consists_of', # > SCDC; big impact step 2+, starting to be compensated for in steps 5+; NOT IDEAL 152 | #'quantified_form', # > SBD; no impact 153 | ], 154 | 'GPCK': [ 155 | 'has_tradename', # > BPCK; small impact step 3 156 | ], 157 | 158 | 'SBD': [ 159 | 'isa', # > SBDF; big impact step 2+, increasingly important (58% vs 75% coverage after step 5) 160 | 'has_ingredient', # > BN; small impact step 2+ 161 | 'tradename_of', # > SCD; tiny impact step 2, fully compensated by step 4 162 | 'consists_of', # > SBDC; small impact step 4+ 163 | ], 164 | 'SBDF': [ 165 | #'tradename_of', # > SCDF; no impact 166 | 'has_ingredient', # > BN; tiny impact step 2+ 167 | #'inverse_isa', # > SBD; no impact 168 | ], 169 | 'SBDG': [ 170 | 'has_ingredient', # > BN; tiny impact step 2+ 171 | #'tradename_of', # > SCDG; no impact 172 | ], 173 | 'SBDC': [ 174 | 'tradename_of', # > SCDC; tiny impact step 3, compensated by step 5 175 | ], 176 | 177 | 'SCD': [ 178 | 'isa', # > SCDF; big impact step 2+, not compensated (59% vs 75% coverage after step 5) 179 | 'has_quantified_form', # > SCD; tiny impact step 2, fully compensated in step 3 180 | 'contained_in', # > GPCK; tiny impact steps 4+ 181 | 'has_tradename', # > SBD; small impact steps 3+ 182 | ], 183 | 'SCDC': [ 184 | 'constitutes', # > SCD; big impact steps 3+ (63% vs 75% coverage after step 5) 185 | 'has_tradename', # > SBDC; impact in step 3, partially compensated in step 4 186 | ], 187 | 'SCDF': [ 188 | 'inverse_isa', # > SCD; large impact steps 3+ 189 | ], 190 | 'SCDG': [ 191 | #'tradename_of', # > SBDG; no impact 192 | ] 193 | } 194 | 195 | found = set() 196 | per_level_sql = 'SELECT rxcui, va FROM va_cache WHERE level = ?' 197 | 198 | for l in range(0,rounds): 199 | i = 0 200 | existing = rxhandle.fetchAll(per_level_sql, (l,)) 201 | num_drugs = len(existing) 202 | this_round = set(); 203 | 204 | # loop all rxcuis that already have a class and walk their relationships 205 | for rxcui, va_imp in existing: 206 | found.add(rxcui) 207 | this_round.add(rxcui) 208 | vas = va_imp.split('|') 209 | seekRelAndStoreSameVAs(rxhandle, rxcui, set(vas), mapping, l) 210 | 211 | # progress report 212 | i += 1 213 | print('--> Step {} {:.1%}'.format(l+1, i / num_drugs), end="\r") 214 | 215 | # commit after every round 216 | rxhandle.sqlite.commit() 217 | print('==> Step {}, found classes for {} of {} drugs, {:.2%} coverage'.format(l+1, len(this_round), expect, len(found) / expect)) 218 | 219 | print('-> VA class mapping complete') 220 | 221 | def seekRelAndStoreSameVAs(rxhandle, rxcui, vas, mapping, at_level=0): 222 | """ For the given RXCUI retrieves all relations, as defined in `mapping`, 223 | and updates those concepts with the drug classes passed in in `vas`. 224 | """ 225 | assert(rxcui) 226 | assert(len(vas) > 0) 227 | 228 | # get all possible relas by checking the concept's TTY against our mapping 229 | ttys = rxhandle.lookup_tty(rxcui) 230 | desired_relas = set() 231 | for tty in ttys: 232 | if tty in mapping: 233 | desired_relas.update(mapping[tty]) 234 | if 0 == len(desired_relas): 235 | return 236 | 237 | # get all related rxcuis with the possible "rela" value(s) 238 | # Note: I had a "... AND rela IN (...)" in the following statement, but it 239 | # turns out just doing this in Python isn't slower and code is shorter 240 | rel_sql = 'SELECT DISTINCT rxcui1, rela FROM rxnrel WHERE rxcui2 = ?' 241 | for res in rxhandle.fetchAll(rel_sql, [rxcui]): 242 | if res[1] in desired_relas: 243 | storeVAs(rxhandle, res[0], vas, rxcui, res[1], at_level+1) 244 | 245 | def storeVAs(rxhandle, rxcui, vas, from_rxcui, via_rela, level=0): 246 | """ Stores the drug classes `vas` for the given concept id, checking first 247 | if that concept already has classes and updating the set. 248 | """ 249 | assert(rxcui) 250 | assert(len(vas) > 0) 251 | 252 | # do we already have classes? 253 | exist_sql = 'SELECT va FROM va_cache WHERE rxcui = ?' 254 | exist_ret = doQ(rxhandle, exist_sql, [rxcui]) 255 | if exist_ret and len(exist_ret) > 0: 256 | 257 | # bail out if we already have a class (!!!) 258 | return 259 | 260 | # split existing classes, decide if we all have them and if not, update 261 | exist_vas = set(exist_ret[0].split('|')) 262 | if vas <= exist_vas: 263 | return 264 | vas |= exist_vas 265 | 266 | # new, insert 267 | ins_sql = 'INSERT OR REPLACE INTO va_cache (rxcui, va, from_rxcui, rela, level) VALUES (?, ?, ?, ?, ?)' 268 | ins_val = '|'.join(vas) 269 | rxhandle.execute(ins_sql, (rxcui, ins_val, from_rxcui, via_rela, level)) 270 | 271 | def toDrugClasses(rxhandle, rxcui): 272 | sql = 'SELECT va FROM va_cache WHERE rxcui = ?' 273 | res = rxhandle.fetchOne(sql, (rxcui,)) 274 | return res[0].split('|') if res is not None else [] 275 | 276 | 277 | def runImport(doc_handler=None): 278 | """ Run the actual linking. 279 | 280 | You can provide a :class:`DocHandler` subclass which will handle the JSON 281 | documents, for example store them to MongoDB for the MongoDocHandler. These 282 | classes are defined in `rxnorm_link_run.py` for now. 283 | """ 284 | 285 | # install keyboard interrupt handler 286 | def signal_handler(signal, frame): 287 | print("\nx> Aborted") 288 | sys.exit(0) 289 | signal.signal(signal.SIGINT, signal_handler) 290 | 291 | # prepare RxNorm databases 292 | try: 293 | RxNorm.check_database() 294 | rxhandle = RxNormLookup() 295 | rxhandle.prepare_to_cache_classes() 296 | except Exception as e: 297 | logging.error(e) 298 | sys.exit(1) 299 | 300 | # fetch rxcui's for drug-type concepts (i.e. restrict by TTY) 301 | drug_types = ('SCD', 'SCDC', 'SBDG', 'SBD', 'SBDC', 'BN', 'SBDF', 'SCDG', 'SCDF', 'IN', 'MIN', 'PIN', 'BPCK', 'GPCK') 302 | param = ', '.join(['?' for d in drug_types]) 303 | all_sql = "SELECT RXCUI, TTY from RXNCONSO where SAB='RXNORM' and TTY in ({})".format(param) 304 | 305 | all_drugs = rxhandle.fetchAll(all_sql, drug_types) 306 | num_drugs = len(all_drugs) 307 | 308 | # traverse VA classes; starts the VA drug class caching process if needed, 309 | # which runs a minute or two 310 | if rxhandle.can_cache(): 311 | initVA(rxhandle) 312 | traverseVA(rxhandle, rounds=5, expect=num_drugs) 313 | 314 | # loop all concepts 315 | i = 0 316 | w_ti = 0 317 | w_va = 0 318 | w_either = 0 319 | last_report = datetime.now() 320 | print('-> Indexing {} items'.format(num_drugs)) 321 | 322 | for res in all_drugs: 323 | params = [res[0]] 324 | params.extend(drug_types) 325 | label = rxhandle.lookup_rxcui_name(res[0]) # fast (indexed column) 326 | ndc = rxhandle.ndc_for_rxcui(res[0]) # fast (indexed column) 327 | ndc = RxNorm.ndc_normalize_list(ndc) # fast (string permutation) 328 | 329 | # find ingredients, drug classes and more 330 | ingr = toIngredients(rxhandle, [res[0]], res[1]) # rather slow 331 | ti = toTreatmentIntents(rxhandle, ingr, 'IN') # requires "ingr" 332 | va = toDrugClasses(rxhandle, res[0]) # fast, loads from our cached table 333 | gen = toBrandAndGeneric(rxhandle, [res[0]], res[1]) # fast 334 | comp = toComponents(rxhandle, [res[0]], res[1]) # fast 335 | mech = toMechanism(rxhandle, ingr, 'IN') # fast 336 | 337 | # create JSON-ready dictionary (save space by not adding empty properties) 338 | d = { 339 | 'rxcui': res[0], 340 | 'tty': res[1], 341 | 'label': label, 342 | } 343 | if len(ndc) > 0: 344 | d['ndc'] = list(ndc) 345 | 346 | if len(ingr) > 0: 347 | d['ingredients'] = list(ingr) 348 | if len(ti) > 0: 349 | d['treatmentIntents'] = list(ti) 350 | if len(va) > 0: 351 | d['drugClasses'] = list(va) 352 | if len(gen) > 0: 353 | d['generics'] = list(gen) 354 | if len(comp) > 0: 355 | d['components'] = list(comp) 356 | if len(mech) > 0: 357 | d['mechanisms'] = list(mech) 358 | 359 | # count 360 | i += 1 361 | if len(ti) > 0: 362 | w_ti += 1 363 | if len(va) > 0: 364 | w_va += 1 365 | if len(ti) > 0 or len(va) > 0: 366 | w_either += 1 367 | 368 | # The dictionary "d" at this point contains all the drug's precomputed 369 | # properties, to debug print this: 370 | #print(json.dumps(d, sort_keys=True, indent=2)) 371 | if doc_handler: 372 | doc_handler.addDocument(d) 373 | 374 | # log progress every 2 seconds or so 375 | if (datetime.now() - last_report).seconds > 2: 376 | last_report = datetime.now() 377 | print('--> {:.1%} n: {}, ti: {}, va: {}, either: {}'.format(i / num_drugs, i, w_ti, w_va, w_either), end="\r") 378 | 379 | # loop done, finalize 380 | if doc_handler: 381 | doc_handler.finalize() 382 | 383 | print('--> {:.1%} n: {}, ti: {}, va: {}, either: {}'.format(i / num_drugs, i, w_ti, w_va, w_either)) 384 | print('-> Done') 385 | 386 | 387 | if '__main__' == __name__: 388 | logging.basicConfig(level=logging.INFO) 389 | logging.warn(''' Running linking without document handler, meaning no RxNorm document will be stored. 390 | Adjust and run `rxnorm_link_run.sh` for more control.''') 391 | runImport() 392 | -------------------------------------------------------------------------------- /rxnorm_link_run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Run this script to perform the RxNorm linking process and store the 5 | # documents in a database or flat file. 6 | 7 | import os 8 | import sys 9 | import logging 10 | 11 | from rxnorm_link import runImport 12 | 13 | 14 | class DocHandler(object): 15 | """ Superclass for simple database import. 16 | """ 17 | 18 | def __init__(self): 19 | self.documents = [] 20 | 21 | def addDocument(self, doc): 22 | if doc is not None: 23 | self.documents.append(doc) 24 | 25 | def finalize(self): 26 | pass 27 | 28 | 29 | class DebugDocHandler(DocHandler): 30 | """ Simply logs each new document. 31 | """ 32 | def addDocument(self, doc): 33 | print(doc) 34 | 35 | def __str__(self): 36 | return "Debug logger" 37 | 38 | 39 | class SQLiteDocHandler(DocHandler): 40 | """ Handles documents for storage in sqlite3 41 | """ 42 | 43 | def __init__(self): 44 | super().__init__() 45 | from sqlite import SQLite 46 | absolute = os.path.dirname(os.path.realpath(__file__)) 47 | db_file = os.environ.get('SQLITE_FILE') 48 | db_file = db_file if db_file else os.path.join(absolute, 'databases/rxnorm.db') 49 | self.db_file = db_file 50 | self.handled = 0 51 | 52 | self.sqlite = SQLite.get(self.db_file) 53 | self.sqlite.execute('DROP TABLE IF EXISTS drug_cache') 54 | 55 | self.sqlite.execute('''CREATE TABLE drug_cache 56 | (rxcui varchar, property text, value text)''') 57 | 58 | self.sqlite.execute('CREATE INDEX i_drug_cache ON drug_cache (rxcui, property)') 59 | 60 | self.sqlite.execute('DROP VIEW IF EXISTS drug_treatments_by_ndc') 61 | self.sqlite.execute('''CREATE VIEW drug_treatments_by_ndc as 62 | select a.value as ndc, b.value as treatment_intent 63 | from drug_cache a join drug_cache b on a.rxcui=b.rxcui 64 | where a.property='ndc' and b.property='treatment_intent' 65 | ''') 66 | 67 | self.sqlite.execute('DROP VIEW IF EXISTS drug_classes_by_ndc') 68 | self.sqlite.execute('''CREATE VIEW drug_classes_by_ndc as 69 | select a.value as ndc, b.value as drug_class 70 | from drug_cache a join drug_cache b on a.rxcui=b.rxcui 71 | where a.property='ndc' and b.property='drug_class' 72 | ''') 73 | 74 | self.sqlite.execute('DROP VIEW IF EXISTS drug_ingredients_by_ndc') 75 | self.sqlite.execute('''CREATE VIEW drug_ingredients_by_ndc as 76 | select a.value as ndc, b.value as drug_ingredient, c.str as ingredient_name 77 | from drug_cache a join drug_cache b on a.rxcui=b.rxcui 78 | join RXNCONSO c on c.rxcui=b.value 79 | where a.property='ndc' and b.property='ingredient' 80 | and c.sab='RXNORM' and c.tty='IN' 81 | ''') 82 | def addDocument(self, doc): 83 | rxcui = doc.get('rxcui', '0') 84 | fields = { 85 | 'tty': doc.get('tty', None), 86 | 'ndc': doc.get('ndc', None), 87 | 'label': doc.get('label', None), 88 | 'drug_class': doc.get('drugClasses', None), 89 | 'treatment_intent': doc.get('treatmentIntents', None), 90 | 'ingredient': doc.get('ingredients', None) 91 | } 92 | for k, v in fields.items(): 93 | if not v: continue 94 | v = v if isinstance(v, list) else [v] 95 | for vv in v: 96 | self.sqlite.execute( 97 | 'INSERT INTO drug_cache(rxcui, property, value) values(?, ?, ?)', 98 | (rxcui, k, vv)) 99 | self.handled += 1 100 | if (self.handled % 50 == 0): self.sqlite.commit() 101 | 102 | def finalize(self): 103 | self.sqlite.commit() 104 | 105 | def __str__(self): 106 | return "SQLite import {}".format(self.db_file) 107 | 108 | 109 | class MongoDocHandler(DocHandler): 110 | """ Handles documents for storage in MongoDB. 111 | """ 112 | 113 | def __init__(self): 114 | super().__init__() 115 | db_host = os.environ.get('MONGO_HOST') 116 | db_host = db_host if db_host else 'localhost' 117 | db_port = int(os.environ.get('MONGO_PORT')) 118 | db_port = db_port if db_port else 27017 119 | db_name = os.environ.get('MONGO_DB') 120 | db_name = db_name if db_name else 'default' 121 | db_bucket = os.environ.get('MONGO_BUCKET') 122 | db_bucket = db_bucket if db_bucket else 'rxnorm' 123 | 124 | import pymongo # imported here so it's only imported when using Mongo 125 | conn = pymongo.MongoClient(host=db_host, port=db_port) 126 | db = conn[db_name] 127 | 128 | # authenticate 129 | db_user = os.environ.get('MONGO_USER') 130 | db_pass = os.environ.get('MONGO_PASS') 131 | if db_user and db_pass: 132 | db.authenticate(db_user, db_pass) 133 | 134 | self.mng = db[db_bucket] 135 | self.mng.ensure_index('ndc') 136 | self.mng.ensure_index('label', text=pymongo.TEXT) 137 | 138 | def addDocument(self, doc): 139 | lbl = doc.get('label') 140 | if lbl and len(lbl) > 1010: # indexed, cannot be > 1024 in total 141 | doc['fullLabel'] = lbl 142 | doc['label'] = lbl[:1010] 143 | 144 | super().addDocument(doc) 145 | if len(self.documents) > 50: 146 | self._insertAndClear() 147 | 148 | def finalize(self): 149 | self._insertAndClear() 150 | 151 | def _insertAndClear(self): 152 | if len(self.documents) > 0: 153 | self.mng.insert(self.documents) 154 | self.documents.clear() 155 | 156 | def __str__(self): 157 | return "MongoDB at {}".format(self.mng) 158 | 159 | 160 | class CSVHandler(DocHandler): 161 | """ Handles CSV export. """ 162 | 163 | def __init__(self): 164 | super().__init__() 165 | self.csv_file = 'rxnorm.csv' 166 | self.csv_handle = open(self.csv_file, 'w') 167 | self.csv_handle.write("rxcui,tty,ndc,name,va_classes,treating,ingredients\n") 168 | 169 | def addDocument(self, doc): 170 | self.csv_handle.write('{},"{}","{}","{}","{}","{}","{}"{}'.format( 171 | doc.get('rxcui', '0'), 172 | doc.get('tty', ''), 173 | doc.get('ndc', ''), 174 | doc.get('label', ''), 175 | ';'.join(doc.get('drugClasses') or []), 176 | ';'.join(doc.get('treatmentIntents') or []), 177 | ';'.join(doc.get('ingredients') or []), 178 | "\n" 179 | )) 180 | 181 | def __str__(self): 182 | return 'CSV file "{}"'.format(self.csv_file) 183 | 184 | 185 | def runLinking(ex_type): 186 | """ Create the desired handler and run import. 187 | """ 188 | handler = DebugDocHandler() 189 | if ex_type is not None and len(ex_type) > 0: 190 | try: 191 | if 'mongo' == ex_type: 192 | handler = MongoDocHandler() 193 | elif 'couch' == ex_type: 194 | # import couchbase 195 | raise Exception('Couchbase not implemented') 196 | elif 'csv' == ex_type: 197 | handler = CSVHandler() 198 | elif 'sqlite' == ex_type: 199 | handler = SQLiteDocHandler() 200 | else: 201 | raise Exception('Unsupported export type: {}'.format(ex_type)) 202 | except Exception as e: 203 | logging.error(e) 204 | sys.exit(1) 205 | 206 | print('-> Processing to {}'.format(handler)) 207 | runImport(doc_handler=handler) 208 | 209 | 210 | if '__main__' == __name__: 211 | logging.basicConfig(level=logging.INFO) 212 | 213 | cmd_arg = sys.argv[1] if len(sys.argv) > 1 else None 214 | ex_type = os.environ.get('EXPORT_TYPE') or cmd_arg 215 | 216 | runLinking(ex_type) 217 | 218 | -------------------------------------------------------------------------------- /rxnorm_link_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # to make it simple we include the variables here instead of creating yet another file 4 | 5 | # export type, supported are: "csv", "mongo", "sqlite" 6 | # if run without setting a type will simply print to console 7 | export EXPORT_TYPE= 8 | 9 | # MongoDB parameters 10 | export MONGO_HOST='localhost' 11 | export MONGO_PORT=27017 12 | export MONGO_USER= 13 | export MONGO_PASS= 14 | export MONGO_DB= 15 | export MONGO_BUCKET='rxnorm' 16 | 17 | # SQLite parameters 18 | export SQLITE_FILE='databases/rxnorm.db' 19 | 20 | # TODO: add a Couchbase version 21 | 22 | # run the setup script with these environment variables 23 | python3 rxnorm_link_run.py 24 | -------------------------------------------------------------------------------- /rxnorm_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # RxNorm unit testing 5 | # 6 | # 2014-04-18 Created 7 | 8 | import sys 9 | import os.path 10 | thismodule = os.path.abspath(os.path.dirname(__file__)) 11 | if thismodule not in sys.path: 12 | sys.path.insert(0, thismodule) 13 | 14 | import unittest 15 | from rxnorm import RxNorm 16 | 17 | 18 | class RxNormTest(unittest.TestCase): 19 | """ Test :class:`RxNorm`. 20 | """ 21 | 22 | def test_ndc_normalization(self): 23 | """ Test NDC normalization. 24 | """ 25 | # 6-4-2 26 | self.assertEqual('00074148614', RxNorm.ndc_normalize('000074-1486-14')) 27 | self.assertEqual('51227615900', RxNorm.ndc_normalize('051227-6159-**')) 28 | self.assertEqual('58734000101', RxNorm.ndc_normalize('058734-0001-*1')) 29 | 30 | # 6-4-1 31 | self.assertEqual('00854684102', RxNorm.ndc_normalize('000854-6841-2')) 32 | 33 | # 6-4: treat as 6-4-2 with two trailing zeroes 34 | self.assertEqual('57982011000', RxNorm.ndc_normalize('057982-0110')) 35 | self.assertEqual('12579005600', RxNorm.ndc_normalize('012579-*056')) 36 | 37 | # 6-3-2 38 | self.assertEqual('57982012312', RxNorm.ndc_normalize('057982-123-12')) 39 | 40 | # 6-3-1 41 | self.assertEqual('57982098709', RxNorm.ndc_normalize('057982-987-9')) 42 | 43 | # 5-4-2 44 | self.assertEqual('17317093201', RxNorm.ndc_normalize('17317-0932-01')) 45 | 46 | # 5-4-1 47 | self.assertEqual('36987315601', RxNorm.ndc_normalize('36987-3156-1')) 48 | 49 | # 5-3-2 50 | self.assertEqual('24730041205', RxNorm.ndc_normalize('24730-412-05')) 51 | 52 | # 4-4-2 53 | self.assertEqual('00268010310', RxNorm.ndc_normalize('0268-0103-10')) 54 | 55 | # 12 digit VANDF 56 | self.assertEqual('03475476541', RxNorm.ndc_normalize('003475476541')) 57 | 58 | # normalized already 59 | self.assertEqual('04458632698', RxNorm.ndc_normalize('04458632698')) 60 | 61 | # invalid 62 | self.assertIsNone(RxNorm.ndc_normalize('0054478962')) 63 | self.assertIsNone(RxNorm.ndc_normalize('547668531244')) 64 | self.assertIsNone(RxNorm.ndc_normalize('0054478962796')) 65 | self.assertIsNone(RxNorm.ndc_normalize('0a79b2-c87-9')) 66 | self.assertIsNone(RxNorm.ndc_normalize('si-lly-te-st')) 67 | self.assertIsNone(RxNorm.ndc_normalize('just-a-rand-test-string')) 68 | -------------------------------------------------------------------------------- /snomed.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # SNOMED import utilities, extracted from umls.py 5 | # 6 | # 2014-01-20 Created by Pascal Pfiffner 7 | # 8 | 9 | import sys 10 | import os 11 | import csv 12 | import logging 13 | 14 | from sqlite import SQLite # for py-umls standalone 15 | 16 | 17 | class SNOMEDDBNotPresentException(Exception): 18 | pass 19 | 20 | class SNOMED(object): 21 | """ A class for importing UMLS terminologies into an SQLite database. 22 | """ 23 | sqlite_handle = None 24 | 25 | @classmethod 26 | def database_path(cls): 27 | absolute = os.path.dirname(os.path.realpath(__file__)) 28 | return os.path.join(absolute, 'databases/snomed.db') 29 | 30 | @classmethod 31 | def check_database(cls): 32 | """ Check if our database is in place and if not, prompts to create it. 33 | Will raise on errors! 34 | 35 | SNOMED: (snomed.db) 36 | Read SNOMED CT from tab-separated files and create an SQLite database. 37 | """ 38 | snomed_db = cls.database_path() 39 | if not os.path.exists(snomed_db): 40 | raise SNOMEDDBNotPresentException("The SNOMED database at {} does not exist. Run the script `snomed.py`." 41 | .format(os.path.abspath(snomed_db))) 42 | 43 | @classmethod 44 | def find_needed_files(cls, snomed_dir): 45 | 46 | # table to file mapping 47 | prefixes = { 48 | 'descriptions': 'sct2_Description_Full-en_', 49 | 'relationships': 'sct2_Relationship_Full_' 50 | } 51 | found = {} 52 | snomed_dir = sys.argv[1] 53 | 54 | # try to find the files 55 | for table, prefix in prefixes.items(): 56 | found_file = _find_files(snomed_dir, prefix) 57 | if found_file is None: 58 | raise Exception('Unable to locate file starting with "{}" in SNOMED directory at {}'.format(prefix, snomed_dir)) 59 | found[table] = found_file 60 | 61 | return found 62 | 63 | @classmethod 64 | def import_from_files(cls, rx_map): 65 | for table, filepath in rx_map.items(): 66 | num_query = 'SELECT COUNT(*) FROM {}'.format(table) 67 | num_existing = cls.sqlite_handle.executeOne(num_query, ())[0] 68 | if num_existing > 0: 69 | continue 70 | 71 | cls.import_csv_into_table(filepath, table) 72 | 73 | @classmethod 74 | def import_csv_into_table(cls, snomed_file, table_name): 75 | """ Import SNOMED CSV into our SQLite database. 76 | The SNOMED CSV files can be parsed by Python's CSV parser with the 77 | "excel-tab" flavor. 78 | """ 79 | 80 | logging.debug('Importing SNOMED {} into snomed.db...'.format(table_name)) 81 | 82 | # not yet imported, parse tab-separated file and import 83 | with open(snomed_file, encoding='utf-8') as csv_handle: 84 | cls.sqlite_handle.isolation_level = 'EXCLUSIVE' 85 | sql = cls.insert_query_for(table_name) 86 | reader = csv.reader(csv_handle, dialect='excel-tab') 87 | i = 0 88 | try: 89 | for row in reader: 90 | if i > 0: # first row is the header row 91 | 92 | # execute SQL (we just ignore duplicates) 93 | params = cls.insert_tuple_from_csv_row_for(table_name, row) 94 | try: 95 | cls.sqlite_handle.execute(sql, params) 96 | except Exception as e: 97 | sys.exit('Cannot insert {}: {}'.format(params, e)) 98 | i += 1 99 | 100 | # commit to file 101 | cls.sqlite_handle.commit() 102 | cls.did_import(table_name) 103 | cls.sqlite_handle.isolation_level = None 104 | 105 | except csv.Error as e: 106 | cls.sqlite_handle.rollback() 107 | sys.exit('CSV error on line {}: {}'.format(reader.line_num, e)) 108 | 109 | logging.debug('{} concepts parsed'.format(i-1)) 110 | 111 | 112 | @classmethod 113 | def setup_tables(cls): 114 | """ Creates the SQLite tables we need, not the tables we deserve. 115 | Does nothing if the tables/indexes already exist 116 | """ 117 | if cls.sqlite_handle is None: 118 | cls.sqlite_handle = SQLite.get(cls.database_path()) 119 | 120 | # descriptions 121 | cls.sqlite_handle.create('descriptions', '''( 122 | concept_id INTEGER PRIMARY KEY, 123 | lang TEXT, 124 | term TEXT, 125 | isa VARCHAR, 126 | active INT 127 | )''') 128 | 129 | # relationships 130 | cls.sqlite_handle.create('relationships', '''( 131 | relationship_id INTEGER PRIMARY KEY, 132 | source_id INT, 133 | destination_id INT, 134 | rel_type INT, 135 | rel_text VARCHAR, 136 | active INT 137 | )''') 138 | 139 | @classmethod 140 | def insert_query_for(cls, table_name): 141 | """ Returns the insert query needed for the given table 142 | """ 143 | if 'descriptions' == table_name: 144 | return '''INSERT OR IGNORE INTO descriptions 145 | (concept_id, lang, term, isa, active) 146 | VALUES 147 | (?, ?, ?, ?, ?)''' 148 | if 'relationships' == table_name: 149 | return '''INSERT OR IGNORE INTO relationships 150 | (relationship_id, source_id, destination_id, rel_type, active) 151 | VALUES 152 | (?, ?, ?, ?, ?)''' 153 | return None 154 | 155 | @classmethod 156 | def insert_tuple_from_csv_row_for(cls, table_name, row): 157 | if 'descriptions' == table_name: 158 | isa = '' 159 | if len(row) > 6: 160 | if '900000000000013009' == row[6]: 161 | isa = 'synonym' 162 | elif '900000000000003001' == row[6]: 163 | isa = 'full' 164 | return (int(row[4]), row[5], row[7], isa, int(row[2])) 165 | if 'relationships' == table_name: 166 | return (int(row[0]), int(row[4]), int(row[5]), int(row[7]), int(row[2])) 167 | return None 168 | 169 | @classmethod 170 | def did_import(cls, table_name): 171 | """ Allows us to set hooks after tables have been imported. 172 | 173 | Creates indexes and names `isa` and `finding_site` relationships. 174 | """ 175 | # index descriptions 176 | if 'descriptions' == table_name: 177 | print("----- DID IMPORT descriptions") 178 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS isa_index ON descriptions (isa)") 179 | 180 | # update and index relationships 181 | if 'relationships' == table_name: 182 | print("----- DID IMPORT relationships") 183 | cls.sqlite_handle.execute("UPDATE relationships SET rel_text = 'isa' WHERE rel_type = 116680003") 184 | cls.sqlite_handle.execute("UPDATE relationships SET rel_text = 'finding_site' WHERE rel_type = 363698007") 185 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS source_index ON relationships (source_id)") 186 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS destination_index ON relationships (destination_id)") 187 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_text_index ON relationships (rel_text)") 188 | 189 | 190 | class SNOMEDLookup(object): 191 | """ SNOMED lookup """ 192 | 193 | sqlite = None 194 | 195 | def __init__(self): 196 | self.sqlite = SQLite.get(SNOMED.database_path()) 197 | 198 | def lookup_code_meaning(self, snomed_id, preferred=True, no_html=True): 199 | """ Returns HTML for all matches of the given SNOMED id. 200 | The "preferred" flag here currently has no function. 201 | """ 202 | if snomed_id is None or len(snomed_id) < 1: 203 | return '' 204 | 205 | sql = 'SELECT term, isa, active FROM descriptions WHERE concept_id = ?' 206 | names = [] 207 | 208 | # loop over results 209 | for res in self.sqlite.execute(sql, (snomed_id,)): 210 | if not no_html and ('synonym' == res[1] or 0 == res[2]): 211 | names.append("{}".format(res[0])) 212 | else: 213 | names.append(res[0]) 214 | 215 | if no_html: 216 | return ", ".join(names) if len(names) > 0 else '' 217 | return "
\n".join(names) if len(names) > 0 else '' 218 | 219 | def lookup_if_isa(self, child_id, parent_id, checked=None): 220 | """ Determines if a child concept is refining a parent concept, i.e. 221 | if there is a (direct or indirect) "is a" (116680003) relationship from 222 | child to parent. 223 | """ 224 | if not child_id or not parent_id: 225 | return False 226 | if checked is not None and child_id in checked: 227 | return False 228 | 229 | parents = self.lookup_parents_of(child_id) 230 | if parent_id in parents: 231 | return True 232 | 233 | chkd = checked or [] 234 | chkd.append(child_id) 235 | for parent in parents: 236 | flag = self.lookup_if_isa(parent, parent_id, chkd) 237 | if flag: 238 | return True 239 | return False 240 | 241 | def lookup_parents_of(self, snomed_id): 242 | """ Returns a list of concept ids that have a direct "is a" (116680003) 243 | relationship with the given id. 244 | """ 245 | ids = [] 246 | if snomed_id: 247 | #sql = 'SELECT destination_id FROM relationships WHERE source_id = ? AND rel_type = 116680003' # Too slow!! 248 | sql = 'SELECT destination_id, rel_text FROM relationships WHERE source_id = ?' 249 | for res in self.sqlite.execute(sql, (snomed_id,)): 250 | if 'isa' == res[1]: 251 | ids.append(str(res[0])) 252 | return ids 253 | 254 | 255 | class SNOMEDConcept(object): 256 | """ Represents a SNOMED concept. 257 | """ 258 | uplooker = SNOMEDLookup() 259 | 260 | def __init__(self, code): 261 | self.code = code 262 | self._term = None 263 | 264 | @property 265 | def term(self): 266 | if self._term is None: 267 | self._term = self.__class__.uplooker.lookup_code_meaning(self.code) 268 | return self._term 269 | 270 | def isa(self, parent_code): 271 | """ Checks whether the receiver is a child of the given code. 272 | The `parent_code` argument can also be a :class:`SNOMEDConcept` 273 | instance. 274 | 275 | :returns: A bool on whether the receiver is a child of the given 276 | concept 277 | """ 278 | if isinstance(parent_code, SNOMEDConcept): 279 | return self.__class__.uplooker.lookup_if_isa(self.code, parent_code.code) 280 | return self.__class__.uplooker.lookup_if_isa(self.code, parent_code) 281 | 282 | 283 | # find file function 284 | def _find_files(directory, prefix): 285 | for root, dirs, files in os.walk(directory): 286 | for name in files: 287 | if name.startswith(prefix): 288 | return os.path.join(directory, name) 289 | 290 | for name in dirs: 291 | found = _find_files(os.path.join(directory, name), prefix) 292 | if found: 293 | return found 294 | return None 295 | 296 | 297 | # running this as a script does the database setup/check 298 | if '__main__' == __name__: 299 | logging.basicConfig(level=logging.DEBUG) 300 | 301 | # if the database check fails, run import commands 302 | try: 303 | SNOMED.check_database() 304 | except SNOMEDDBNotPresentException as e: 305 | if len(sys.argv) < 2: 306 | print("Provide the path to the extracted SNOMED (RF2) directory as first argument.") 307 | print("Download SNOMED from http://www.nlm.nih.gov/research/umls/licensedcontent/snomedctfiles.html""") 308 | sys.exit(0) 309 | 310 | # import from files 311 | try: 312 | found = SNOMED.find_needed_files(sys.argv[1]) 313 | SNOMED.sqlite_handle = None 314 | SNOMED.setup_tables() 315 | SNOMED.import_from_files(found) 316 | except Exception as e: 317 | print("SNOMED import failed: {}".format(e)) 318 | sys.exit(0) 319 | 320 | # examples 321 | cpt = SNOMEDConcept('215350009') 322 | print('SNOMED code "{0}": {1}'.format(cpt.code, cpt.term)) 323 | 324 | cpt = SNOMEDConcept('315004001') # -> 128462008 -> 363346000 -> 55342001 x> 215350009 325 | for other, expected in [('128462008', True), ('363346000', True), ('55342001', True), ('215350009', False)]: 326 | print('SNOMED code "{0}" refines "{1}": {2}'.format(cpt.code, other, cpt.isa(other))) 327 | assert expected == cpt.isa(other), '"{0}" refines "{1}" should return {2} or the database hasn’t been set up properly'.format(cpt.code, other, 'True' if expected else 'False') 328 | 329 | -------------------------------------------------------------------------------- /snomed_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # RxNorm unit testing 5 | # 6 | # 2014-04-18 Created 7 | 8 | import sys 9 | import os.path 10 | thismodule = os.path.abspath(os.path.dirname(__file__)) 11 | if thismodule not in sys.path: 12 | sys.path.insert(0, thismodule) 13 | 14 | import unittest 15 | from snomed import * 16 | 17 | 18 | class SNOMEDLookupTest(unittest.TestCase): 19 | """ Test :class:`SNOMEDLookup`. 20 | """ 21 | def setUp(self): 22 | SNOMED.check_database() 23 | 24 | def test_term_lookup(self): 25 | """ Test term lookup. 26 | """ 27 | cpt = SNOMEDConcept('215350009') 28 | self.assertEqual(cpt.term, 'Accident involving being caught in door of road vehicle NEC, occupant of tram injured (event)') 29 | cpt = SNOMEDConcept('315004001') 30 | self.assertEqual(cpt.term, 'Metastasis from malignant tumor of breast') 31 | 32 | def test_hierarchy_isa(self): 33 | """ Test hierarchical lookup. 34 | """ 35 | cpt = SNOMEDConcept('315004001') # Metastasis from malignant tumor of breast 36 | child = SNOMEDConcept('128462008') # Metastatic neoplasm (disease) 37 | self.assertTrue(cpt.isa(child.code)) 38 | child = SNOMEDConcept('363346000') # Malignant neoplastic disease (disorder) 39 | self.assertTrue(cpt.isa(child)) 40 | child = SNOMEDConcept('55342001') # Neoplasia 41 | self.assertTrue(cpt.isa(child.code)) 42 | child = SNOMEDConcept('408643008') # Infiltrating duct carcinoma of breast 43 | self.assertFalse(cpt.isa(child.code)) 44 | 45 | -------------------------------------------------------------------------------- /sqlite.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Simplifying SQLite access 4 | # 5 | # 2012-12-14 Created by Pascal Pfiffner 6 | # 7 | 8 | 9 | import sqlite3 10 | import threading 11 | 12 | 13 | SQLITE_INSTANCES = {} 14 | 15 | 16 | class SQLite(object): 17 | """ SQLite access 18 | """ 19 | 20 | @classmethod 21 | def get(cls, database): 22 | """ Use this to get SQLite instances for a given database. Avoids 23 | creating multiple instances for the same database. 24 | 25 | We keep instances around per thread per database, maybe there should be 26 | a way to turn this off. However, here we always release instances for 27 | threads that are no longer alive. If this is better than just always 28 | creating a new instance should be tested. 29 | """ 30 | 31 | global SQLITE_INSTANCES 32 | 33 | # group per thread 34 | thread_id = threading.current_thread().ident 35 | if thread_id not in SQLITE_INSTANCES: 36 | SQLITE_INSTANCES[thread_id] = {} 37 | by_thread = SQLITE_INSTANCES[thread_id] 38 | 39 | # group per database 40 | if database not in by_thread: 41 | sql = SQLite(database) 42 | by_thread[database] = sql 43 | 44 | # free up memory for terminated threads 45 | clean = {} 46 | for alive in threading.enumerate(): 47 | if alive.ident in SQLITE_INSTANCES: 48 | clean[alive.ident] = SQLITE_INSTANCES[alive.ident] 49 | SQLITE_INSTANCES = clean 50 | 51 | return by_thread[database] 52 | 53 | 54 | def __init__(self, database=None): 55 | if database is None: 56 | raise Exception('No database provided') 57 | 58 | self.database = database 59 | self.handle = None 60 | self.cursor = None 61 | 62 | 63 | def execute(self, sql, params=()): 64 | """ Executes an SQL command and returns the cursor.execute, which can 65 | be used as an iterator. 66 | Supply the params as tuple, i.e. (param,) and (param1, param2, ...) 67 | """ 68 | if not sql or 0 == len(sql): 69 | raise Exception('No SQL to execute') 70 | if not self.cursor: 71 | self.connect() 72 | 73 | return self.cursor.execute(sql, params) 74 | 75 | 76 | def executeInsert(self, sql, params=()): 77 | """ Executes an SQL command (should be INSERT OR REPLACE) and returns 78 | the last row id, 0 on failure. 79 | """ 80 | if self.execute(sql, params): 81 | return self.cursor.lastrowid if self.cursor.lastrowid else 0 82 | 83 | return 0 84 | 85 | 86 | def executeUpdate(self, sql, params=()): 87 | """ Executes an SQL command (should be UPDATE) and returns the number 88 | of affected rows. 89 | """ 90 | if self.execute(sql, params): 91 | return self.cursor.rowcount 92 | 93 | return 0 94 | 95 | 96 | def executeOne(self, sql, params): 97 | """ Returns the first row returned by executing the command 98 | """ 99 | self.execute(sql, params) 100 | return self.cursor.fetchone() 101 | 102 | 103 | def hasTable(self, table_name): 104 | """ Returns whether the given table exists. """ 105 | sql = 'SELECT COUNT(*) FROM sqlite_master WHERE type="table" and name=?' 106 | ret = self.executeOne(sql, (table_name,)) 107 | return True if ret and ret[0] > 0 else False 108 | 109 | def create(self, table_name, table_structure): 110 | """ Executes a CREATE TABLE IF NOT EXISTS query with the given structure. 111 | Input is NOT sanitized, watch it! 112 | """ 113 | create_query = 'CREATE TABLE IF NOT EXISTS %s %s' % (table_name, table_structure) 114 | self.execute(create_query) 115 | return True 116 | 117 | 118 | def commit(self): 119 | self.handle.commit() 120 | 121 | def rollback(self): 122 | self.handle.rollback() 123 | 124 | 125 | def connect(self): 126 | if self.cursor is not None: 127 | return 128 | 129 | self.handle = sqlite3.connect(self.database) 130 | self.cursor = self.handle.cursor() 131 | 132 | def close(self): 133 | if self.cursor is None: 134 | return 135 | 136 | self.handle.close() 137 | self.cursor = None 138 | self.handle = None 139 | 140 | -------------------------------------------------------------------------------- /umls.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Utilities to handle UMLS 5 | # 6 | # 2013-01-01 Created by Pascal Pfiffner 7 | # 2014-01-20 Extracted and converted to Python 3 8 | # 9 | 10 | 11 | import sys 12 | import os.path 13 | import logging 14 | 15 | from sqlite import SQLite # for py-umls standalone 16 | 17 | 18 | class UMLS (object): 19 | """ A class for importing UMLS terminologies into an SQLite database. 20 | """ 21 | 22 | @classmethod 23 | def check_database(cls): 24 | """ Check if our database is in place and if not, prompts to import it. 25 | Will raise on errors! 26 | 27 | UMLS: (umls.db) 28 | If missing prompt to use the `umls.sh` script 29 | """ 30 | 31 | umls_db = os.path.join('databases', 'umls.db') 32 | if not os.path.exists(umls_db): 33 | raise Exception("The UMLS database at {} does not exist. Run the import script `databases/umls.sh`." 34 | .format(os.path.abspath(umls_db))) 35 | 36 | 37 | 38 | class UMLSLookup (object): 39 | """ UMLS lookup """ 40 | 41 | sqlite = None 42 | did_check_dbs = False 43 | preferred_sources = ['"SNOMEDCT"', '"MTH"'] 44 | 45 | def __init__(self): 46 | absolute = os.path.dirname(os.path.realpath(__file__)) 47 | self.sqlite = SQLite.get(os.path.join(absolute, 'databases/umls.db')) 48 | 49 | def lookup_code(self, cui, preferred=True): 50 | """ Return a list with triples that contain: 51 | - name 52 | - source 53 | - semantic type 54 | by looking it up in our "descriptions" database. 55 | The "preferred" settings has the effect that only names from SNOMED 56 | (SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in 57 | our "descriptions" table is much faster than combing through the full 58 | MRCONSO table. 59 | 60 | :returns: A list of triples with (name, sab, sty) 61 | """ 62 | if cui is None or len(cui) < 1: 63 | return [] 64 | 65 | # lazy UMLS db checking 66 | if not UMLSLookup.did_check_dbs: 67 | UMLS.check_database() 68 | UMLSLookup.did_check_dbs = True 69 | 70 | # take care of negations 71 | negated = '-' == cui[0] 72 | if negated: 73 | cui = cui[1:] 74 | 75 | parts = cui.split('@', 1) 76 | lookup_cui = parts[0] 77 | 78 | # STR: Name 79 | # SAB: Abbreviated Source Name 80 | # STY: Semantic Type 81 | if preferred: 82 | sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ? AND SAB IN ({})'.format(", ".join(UMLSLookup.preferred_sources)) 83 | else: 84 | sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ?' 85 | 86 | # return as list 87 | arr = [] 88 | for res in self.sqlite.execute(sql, (lookup_cui,)): 89 | if negated: 90 | arr.append(("[NEGATED] {}".format(res[0], res[1], res[2]))) 91 | else: 92 | arr.append(res) 93 | 94 | return arr 95 | 96 | 97 | def lookup_code_meaning(self, cui, preferred=True, no_html=True): 98 | """ Return a string (an empty string if the cui is null or not found) 99 | by looking it up in our "descriptions" database. 100 | The "preferred" settings has the effect that only names from SNOMED 101 | (SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in 102 | our "descriptions" table is much faster than combing through the full 103 | MRCONSO table. 104 | """ 105 | names = [] 106 | for res in self.lookup_code(cui, preferred): 107 | if no_html: 108 | names.append("{} ({}) [{}]".format(res[0], res[1], res[2])) 109 | else: 110 | names.append("{} ({}: {})".format(res[0], res[1], res[2])) 111 | 112 | comp = ", " if no_html else "
\n" 113 | return comp.join(names) if len(names) > 0 else '' 114 | 115 | 116 | def lookup_code_for_name(self, name, preferred=True): 117 | """ Tries to find a good concept code for the given concept name. 118 | 119 | Uses our indexed `descriptions` table. 120 | 121 | :returns: A list of triples with (cui, sab, sty) 122 | """ 123 | if name is None or len(name) < 1: 124 | return None 125 | 126 | # lazy UMLS db checking 127 | if not UMLSLookup.did_check_dbs: 128 | UMLS.check_database() 129 | UMLSLookup.did_check_dbs = True 130 | 131 | # CUI: Concept-ID 132 | # STR: Name 133 | # SAB: Abbreviated Source Name 134 | # STY: Semantic Type 135 | if preferred: 136 | sql = 'SELECT CUI, SAB, STY FROM descriptions WHERE STR LIKE ? AND SAB IN ({})'.format(", ".join(UMLSLookup.preferred_sources)) 137 | else: 138 | sql = 'SELECT CUI, SAB, STY FROM descriptions WHERE STR LIKE ?' 139 | 140 | # return as list 141 | arr = [] 142 | for res in self.sqlite.execute(sql, ('%' + name + '%',)): 143 | arr.append(res) 144 | 145 | return arr 146 | 147 | 148 | 149 | # running this as a script does the database setup/check 150 | if '__main__' == __name__: 151 | UMLS.check_database() 152 | 153 | # examples 154 | look = UMLSLookup() 155 | code = 'C0002962' 156 | meaning = look.lookup_code_meaning(code) 157 | print('UMLS code "{0}": {1}'.format(code, meaning)) 158 | 159 | name = 'Pulmonary Arterial Hypertension' 160 | print('Search for "{}" returns:'.format(name)) 161 | codes = look.lookup_code_for_name(name) 162 | for cd in codes: 163 | print('{}: {}'.format(cd, look.lookup_code_meaning(cd[0]))) 164 | --------------------------------------------------------------------------------