├── .gitignore
├── LICENSE.txt
├── README.md
├── __init__.py
├── csvimporter.py
├── databases
    ├── rxnorm.sh
    └── umls.sh
├── docs
    ├── Makefile
    ├── conf.py
    ├── index.rst
    └── py-umls.rst
├── graphable.py
├── loinc.py
├── rxnorm.py
├── rxnorm_download.py
├── rxnorm_graph.py
├── rxnorm_link.py
├── rxnorm_link_run.py
├── rxnorm_link_run.sh
├── rxnorm_tests.py
├── snomed.py
├── snomed_tests.py
├── sqlite.py
└── umls.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # ignore database files but not the import scripts
 2 | databases/*.db
 3 | 
 4 | # virtualenv
 5 | env
 6 | 
 7 | # docs
 8 | docs/_build
 9 | 
10 | # system files
11 | .DS_Store
12 | __pycache__
13 | 
14 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright 2015 Boston Children's Hospital
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | UMLS for Python
 2 | ===============
 3 | 
 4 | These are basic tools to interact with UMLS lexica, namely UMLS, SNOMED and RxNorm, using Python 3 scripts.
 5 | For each of the three databases there are scripts (2 Bash and 1 Python) that facilitate import of the downloaded data into a local SQLite 3 database.
 6 | 
 7 | > You will need a UMLS license to download UMLS lexica.
 8 | 
 9 | For a simple start, run one of the files (`umls.py`, `snomed.py`, `rxnorm.py`) in your Shell and follow the instructions.
10 | The scripts will prompt you to download and install the databases and, when completed, print a simple example lookup.
11 | 
12 | There are also utility scripts that offer help for specific use cases, see below.
13 | 
14 | Documentation
15 | -------------
16 | 
17 | An [auto-generated documentation](http://chb.github.io/py-umls/) (via Sphinx) is available but not very exhaustive at the moment.
18 | See below for some quick examples.
19 | 
20 | Usage
21 | -----
22 | 
23 | More detailed instructions here:
24 | 
25 | - [**RxNorm**](https://github.com/chb/py-umls/wiki/RxNorm)
26 | - [**SNOMED-CT**](https://github.com/chb/py-umls/wiki/SNOMED)
27 | 
28 | There are `XYLookup` classes in each of the three files which can be used for database lookups (where `XY` stands for `UMLS`, `SNOMED` or `RxNorm`).
29 | The following example code is appended to the end of the respective scripts and will be executed if you run it in the Shell.
30 | You might want to insert `XY.check_databases()` before this code so you will get an exception if the databases haven't been set up.
31 | 
32 |     look_umls = UMLSLookup()
33 |     code_umls = 'C0002962'
34 |     meaning_umls = look_umls.lookup_code_meaning(code_umls)
35 |     print('UMLS code "{0}":     {1}'.format(code_umls, meaning_umls))
36 |     
37 |     look_snomed = SNOMEDLookup()
38 |     code_snomed = '215350009'
39 |     meaning_snomed = look_snomed.lookup_code_meaning(code_snomed)
40 |     print('SNOMED code "{0}":  {1}'.format(code_snomed, meaning_snomed))
41 |     
42 |     look_rxnorm = RxNormLookup()
43 |     code_rxnorm = '328406'
44 |     meaning_rxnorm = look_rxnorm.lookup_code_meaning(code_rxnorm, preferred=False)
45 |     print('RxNorm code "{0}":     {1}'.format(code_rxnorm, meaning_rxnorm))
46 | 
47 | You would typically use this module as a submodule in your own project.
48 | Best add this as a _git submodule_ but that really is up to you.
49 | If you do use this module as a Python module, you can't use the name `py-umls` because it contains a dash, so you must checkout this code to a correctly named directory.
50 | I usually use `umls`.
51 | 
52 | License
53 | -------
54 | 
55 | This work is [Apache licensed](LICENSE.txt).
56 | 
57 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os.path
3 | abspath = os.path.abspath(os.path.dirname(__file__))
4 | if abspath not in sys.path:
5 |     sys.path.insert(0, abspath)
6 | 


--------------------------------------------------------------------------------
/csvimporter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #  Simple CSV importer.
 5 | 
 6 | import re
 7 | import csv
 8 | import sqlite3
 9 | 
10 | 
11 | class CSVImporter(object):
12 | 	""" A simple CSV to SQLite importer class.
13 | 	
14 | 	Expects a CSV file with a header row, will create a table reflecting the
15 | 	header row and import all rows.
16 | 	"""
17 | 	_sqlite = None
18 | 	
19 | 	def __init__(self, csv_path, tablename='rows'):
20 | 		self.filepath = csv_path
21 | 		self.tablename = tablename
22 | 	
23 | 	def sqlite_handle(self, dbpath):
24 | 		if self._sqlite is None:
25 | 			self._sqlite = sqlite3.connect(dbpath)
26 | 		return self._sqlite
27 | 	
28 | 	def import_to(self, dbpath, csv_format='excel'):
29 | 		assert self.filepath
30 | 		assert dbpath
31 | 		
32 | 		# SQLite handling
33 | 		sql_handle = self.sqlite_handle(dbpath)
34 | 		sql_handle.isolation_level = 'EXCLUSIVE'
35 | 		sql_cursor = sql_handle.cursor()
36 | 		create_sql = 'CREATE TABLE {} '.format(self.tablename)
37 | 		insert_sql = 'INSERT INTO {} '.format(self.tablename)
38 | 		all_but_alnum = r'\W+'
39 | 		
40 | 		# loop rows
41 | 		with open(self.filepath, 'r') as csv_handle:
42 | 			reader = csv.reader(csv_handle, quotechar='"', dialect=csv_format)
43 | 			try:
44 | 				i = 0
45 | 				for row in reader:
46 | 					sql = insert_sql
47 | 					params = ()
48 | 					
49 | 					# first row is the header row
50 | 					if 0 == i:
51 | 						fields = []
52 | 						fields_create = []
53 | 						for field in row:
54 | 							field = re.sub(all_but_alnum, '', field)
55 | 							fields.append(field)
56 | 							fields_create.append('{} VARCHAR'.format(field))
57 | 						
58 | 						create_sql += "(\n\t{}\n)".format(",\n\t".join(fields_create))
59 | 						sql = create_sql
60 | 						
61 | 						insert_sql += '({}) VALUES ({})'.format(', '.join(fields), ', '.join(['?' for i in range(len(fields))]))
62 | 					
63 | 					# data rows
64 | 					else:
65 | 						params = tuple(row)
66 | 					
67 | 					# execute SQL statement
68 | 					try:
69 | 						sql_cursor.execute(sql, params)
70 | 					except Exception as e:
71 | 						sys.exit(u'SQL failed: %s  --  %s' % (e, sql))
72 | 					i += 1
73 | 				
74 | 				# commit to file
75 | 				sql_handle.commit()
76 | 				sql_handle.isolation_level = None
77 | 			
78 | 			except csv.Error as e:
79 | 				sys.exit('CSV error on line %d: %s' % (reader.line_num, e))
80 | 
81 | 


--------------------------------------------------------------------------------
/databases/rxnorm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #
 3 | #  create an RxNORM SQLite database (and a relations triple store).
 4 | #
 5 | 
 6 | # our SQLite database does not exist
 7 | if [ ! -e rxnorm.db ]; then
 8 | 	if [ ! -d "$1" ]; then
 9 | 		echo "Provide the path to the RxNorm directory as first argument when invoking this script. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
10 | 		exit 1
11 | 	fi
12 | 	if [ ! -d "$1/rrf" ]; then
13 | 		echo "There is no directory named rrf in the directory you provided. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
14 | 		exit 1
15 | 	fi
16 | 	if ! hash sqlite3 &>/dev/null; then
17 | 		echo "It seems 'sqlite3' is not installed, I will need it. Aborting."
18 | 		exit 1
19 | 	fi
20 | 	
21 | 	# init the database
22 | 	cat "$1/scripts/mysql/Table_scripts_mysql_rxn.sql" | sqlite3 rxnorm.db
23 | 	
24 | 	# convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles)
25 | 	if [ ! -e "$1/rrf/RXNREL.pipe" ]; then
26 | 		current=$(pwd)
27 | 		cd "$1/rrf"
28 | 		echo "->  Converting RRF files for SQLite"
29 | 		for f in *.RRF; do
30 | 			sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe"
31 | 		done
32 | 		cd $current
33 | 	fi
34 | 	
35 | 	# import tables
36 | 	for f in "$1/rrf/"*.pipe; do
37 | 		table=$(basename ${f%.pipe})
38 | 		echo "->  Importing $table"
39 | 		sqlite3 rxnorm.db ".import '$f' '$table'"
40 | 	done
41 | 	
42 | 	# create an NDC table
43 | 	echo "->  Creating extra tables"
44 | 	# sqlite3 rxnorm.db "CREATE TABLE NDC AS SELECT RXCUI, ATV AS NDC FROM RXNSAT WHERE ATN = 'NDC';"	# we do it in 2 steps to create the primary index column
45 | 	sqlite3 rxnorm.db "CREATE TABLE NDC (RXCUI INT, NDC VARCHAR);"
46 | 	sqlite3 rxnorm.db "INSERT INTO NDC SELECT RXCUI, ATV FROM RXNSAT WHERE ATN = 'NDC';"
47 | 	
48 | 	# create drug class tables
49 | 	sqlite3 rxnorm.db "CREATE TABLE VA_DRUG_CLASS (RXCUI int, RXCUI_ORIGINAL int, VA varchar);"
50 | 	sqlite3 rxnorm.db "CREATE TABLE FRIENDLY_CLASS_NAMES (VACODE varchar, FRIENDLY varchar);"
51 | 	sqlite3 rxnorm.db "CREATE INDEX X_FRIENDLY_CLASS_NAMES_VACODE ON FRIENDLY_CLASS_NAMES (VACODE);"
52 | 	
53 | 	# create indices
54 | 	echo "->  Indexing NDC table"
55 | 	sqlite3 rxnorm.db "CREATE INDEX X_NDC_RXCUI ON NDC (RXCUI);"
56 | 	sqlite3 rxnorm.db "CREATE INDEX X_NDC_NDC ON NDC (NDC);"
57 | 	
58 | 	echo "->  Indexing RXNSAT table"
59 | 	sqlite3 rxnorm.db "CREATE INDEX RXNSAT_RXCUI ON RXNSAT (RXCUI);"
60 | 	sqlite3 rxnorm.db "CREATE INDEX RXNSAT_ATN ON RXNSAT (ATN);"
61 | 	
62 | 	echo "->  Indexing RXNREL table"
63 | 	sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RXCUI1 ON RXNREL (RXCUI1);"
64 | 	sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RXCUI2 ON RXNREL (RXCUI2);"
65 | 	sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RXAUI2 ON RXNREL (RXAUI2);"
66 | 	#sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RELA ON RXNREL (RELA);"		# do NOT do this! slows down queries dramatically
67 | 	
68 | 	echo "->  Indexing RXNCONSO table"
69 | 	sqlite3 rxnorm.db "CREATE INDEX X_RXNCONSO_RXCUI ON RXNCONSO (RXCUI);"
70 | 	sqlite3 rxnorm.db "CREATE INDEX X_RXNCONSO_RXAUI ON RXNCONSO (RXAUI);"
71 | 	
72 | 	# How to export from SQLite: export NDC to CSV
73 | 	# .mode csv
74 | 	# .header on
75 | 	# .out va-class.csv
76 | 	# SELECT RXCUI, NDC FROM NDC;
77 | 	# SELECT DISTINCT ATV FROM RXNSAT WHERE ATN = 'VA_CLASS_NAME' ORDER BY ATV ASC;
78 | fi
79 | 
80 | 


--------------------------------------------------------------------------------
/databases/umls.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | #
  3 | #  create a UMLS SQLite database.
  4 | #
  5 | 
  6 | # our SQLite database does not exist
  7 | if [ ! -e umls.db ]; then
  8 | 	if [ ! -d "$1" ]; then
  9 | 		echo "Provide the path to the UMLS install directory, which is named something like \"2014AA\" and contains a \"META\" directory, as first argument when invoking this script."
 10 | 		echo
 11 | 		echo "Downloading and Extracting UMLS Data"
 12 | 		echo "===================================="
 13 | 		echo
 14 | 		echo "Downloading and extracting UMLS data is a painful process."
 15 | 		echo "Begin by downloading most files for the latest version listed on the left side here: http://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html"
 16 | 		echo "Extract 'mmsys.zip' and place every downloaded file, including 'mmsys.zip', into the extracted directory."
 17 | 		echo "Run the respective 'runXX' script inside the mmsys directory; the MetamorphoSys Java GUI will open."
 18 | 		echo "Click \"Install UMLS\", as source directory select the just extracted mmsys directory and your chosen target directory."
 19 | 		echo "Leave the checkboxes alone and click OK."
 20 | 		echo "Now you must generate a configuration and in order to be able to proceed, save the configuration via a command from the menu bar."
 21 | 		echo "Then select \"Begin Subset\", also from the menubar, to start the extraction process."
 22 | 		echo "This should extract all the things and put in in the selected directory, which now contains a META directory with all the files we need to proceed."
 23 | 		echo
 24 | 		echo "Once you have done this, run this script again with the correct path as the first argument."
 25 | 		exit 1
 26 | 	fi
 27 | 	if [ ! -d "$1/META" ]; then
 28 | 		echo "There is no directory named META in the install directory you provided."
 29 | 		echo "Point this script to the directory named something like \"2014AA\"."
 30 | 		exit 1
 31 | 	fi
 32 | 	
 33 | 	# convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles)
 34 | 	if [ ! -e "$1/META/MRDEF.pipe" ]; then
 35 | 		current=$(pwd)
 36 | 		cd "$1/META"
 37 | 		echo "-> Converting RRF files for SQLite"
 38 | 		for f in MRCONSO.RRF MRDEF.RRF MRSTY.RRF; do
 39 | 			sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe"
 40 | 		done
 41 | 		cd $current
 42 | 	fi
 43 | 	
 44 | 	# init the database for MRDEF
 45 | 	# table structure here: http://www.ncbi.nlm.nih.gov/books/NBK9685/
 46 | 	sqlite3 umls.db "CREATE TABLE MRDEF (
 47 | 		CUI varchar,
 48 | 		AUI varchar,
 49 | 		ATUI varchar,
 50 | 		SATUI varchar,
 51 | 		SAB varchar,
 52 | 		DEF text,
 53 | 		SUPPRESS varchar,
 54 | 		CVF varchar
 55 | 	)"
 56 | 	
 57 | 	# init the database for MRCONSO
 58 | 	sqlite3 umls.db "CREATE TABLE MRCONSO (
 59 | 		CUI varchar,
 60 | 		LAT varchar,
 61 | 		TS varchar,
 62 | 		LUI varchar,
 63 | 		STT varchar,
 64 | 		SUI varchar,
 65 | 		ISPREF varchar,
 66 | 		AUI varchar,
 67 | 		SAUI varchar,
 68 | 		SCUI varchar,
 69 | 		SDUI varchar,
 70 | 		SAB varchar,
 71 | 		TTY varchar,
 72 | 		CODE varchar,
 73 | 		STR text,
 74 | 		SRL varchar,
 75 | 		SUPPRESS varchar,
 76 | 		CVF varchar
 77 | 	)"
 78 | 	
 79 | 	# init the database for MRSTY
 80 | 	sqlite3 umls.db "CREATE TABLE MRSTY (
 81 | 		CUI varchar,
 82 | 		TUI varchar,
 83 | 		STN varchar,
 84 | 		STY text,
 85 | 		ATUI varchar,
 86 | 		CVF varchar
 87 | 	)"
 88 | 	
 89 | 	# import tables
 90 | 	for f in "$1/META/"*.pipe; do
 91 | 		table=$(basename ${f%.pipe})
 92 | 		echo "-> Importing $table"
 93 | 		sqlite3 umls.db ".import '$f' '$table'"
 94 | 	done
 95 | 	
 96 | 	# create indexes
 97 | 	echo "-> Creating indexes"
 98 | 	sqlite3 umls.db "CREATE INDEX X_CUI_MRDEF ON MRDEF (CUI);"
 99 | 	sqlite3 umls.db "CREATE INDEX X_SAB_MRDEF ON MRDEF (SAB);"
100 | 	sqlite3 umls.db "CREATE INDEX X_CUI_MRCONSO ON MRCONSO (CUI);"
101 | 	sqlite3 umls.db "CREATE INDEX X_LAT_MRCONSO ON MRCONSO (LAT);"
102 | 	sqlite3 umls.db "CREATE INDEX X_TS_MRCONSO ON MRCONSO (TS);"
103 | 	sqlite3 umls.db "CREATE INDEX X_CUI_MRSTY ON MRSTY (CUI);"
104 | 	sqlite3 umls.db "CREATE INDEX X_TUI_MRSTY ON MRSTY (TUI);"
105 | 	
106 | 	# create faster lookup table
107 | 	echo "-> Creating fast lookup table"
108 | 	sqlite3 umls.db "CREATE TABLE descriptions AS SELECT CUI, LAT, SAB, TTY, STR FROM MRCONSO WHERE LAT = 'ENG' AND TS = 'P' AND ISPREF = 'Y'"
109 | 	sqlite3 umls.db "ALTER TABLE descriptions ADD COLUMN STY TEXT"
110 | 	sqlite3 umls.db "CREATE INDEX X_CUI_desc ON descriptions (CUI)"
111 | 	sqlite3 umls.db "UPDATE descriptions SET STY = (SELECT GROUP_CONCAT(MRSTY.TUI, '|') FROM MRSTY WHERE MRSTY.CUI = descriptions.CUI GROUP BY MRSTY.CUI)"
112 | else
113 | 	echo "=> umls.db already exists"
114 | fi
115 | 
116 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/py-umls.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/py-umls.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/py-umls"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/py-umls"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # py-umls documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri Apr 18 20:08:31 2014.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | sys.path.insert(0, os.path.abspath('..'))
 22 | 
 23 | # -- General configuration ------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | #needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be
 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 30 | # ones.
 31 | extensions = [
 32 |     'sphinx.ext.autodoc',
 33 |     'sphinx.ext.todo',
 34 | ]
 35 | 
 36 | # Add any paths that contain templates here, relative to this directory.
 37 | templates_path = ['_templates']
 38 | 
 39 | # The suffix of source filenames.
 40 | source_suffix = '.rst'
 41 | 
 42 | # The encoding of source files.
 43 | #source_encoding = 'utf-8-sig'
 44 | 
 45 | # The master toctree document.
 46 | master_doc = 'index'
 47 | 
 48 | # General information about the project.
 49 | project = u'py-umls'
 50 | copyright = u'2014, Pascal Pfiffner'
 51 | 
 52 | # The version info for the project you're documenting, acts as replacement for
 53 | # |version| and |release|, also used in various other places throughout the
 54 | # built documents.
 55 | #
 56 | # The short X.Y version.
 57 | version = ''
 58 | # The full version, including alpha/beta/rc tags.
 59 | release = ''
 60 | 
 61 | # The language for content autogenerated by Sphinx. Refer to documentation
 62 | # for a list of supported languages.
 63 | #language = None
 64 | 
 65 | # There are two options for replacing |today|: either, you set today to some
 66 | # non-false value, then it is used:
 67 | #today = ''
 68 | # Else, today_fmt is used as the format for a strftime call.
 69 | #today_fmt = '%B %d, %Y'
 70 | 
 71 | # List of patterns, relative to source directory, that match files and
 72 | # directories to ignore when looking for source files.
 73 | exclude_patterns = ['_build']
 74 | 
 75 | # The reST default role (used for this markup: `text`) to use for all
 76 | # documents.
 77 | #default_role = None
 78 | 
 79 | # If true, '()' will be appended to :func: etc. cross-reference text.
 80 | #add_function_parentheses = True
 81 | 
 82 | # If true, the current module name will be prepended to all description
 83 | # unit titles (such as .. function::).
 84 | #add_module_names = True
 85 | 
 86 | # If true, sectionauthor and moduleauthor directives will be shown in the
 87 | # output. They are ignored by default.
 88 | #show_authors = False
 89 | 
 90 | # The name of the Pygments (syntax highlighting) style to use.
 91 | pygments_style = 'sphinx'
 92 | 
 93 | # A list of ignored prefixes for module index sorting.
 94 | #modindex_common_prefix = []
 95 | 
 96 | # If true, keep warnings as "system message" paragraphs in the built documents.
 97 | #keep_warnings = False
 98 | 
 99 | 
100 | # -- Options for HTML output ----------------------------------------------
101 | 
102 | # The theme to use for HTML and HTML Help pages.  See the documentation for
103 | # a list of builtin themes.
104 | html_theme = 'default'
105 | 
106 | # Theme options are theme-specific and customize the look and feel of a theme
107 | # further.  For a list of options available for each theme, see the
108 | # documentation.
109 | #html_theme_options = {}
110 | 
111 | # Add any paths that contain custom themes here, relative to this directory.
112 | #html_theme_path = []
113 | 
114 | # The name for this set of Sphinx documents.  If None, it defaults to
115 | # "<project> v<release> documentation".
116 | #html_title = None
117 | 
118 | # A shorter title for the navigation bar.  Default is the same as html_title.
119 | #html_short_title = None
120 | 
121 | # The name of an image file (relative to this directory) to place at the top
122 | # of the sidebar.
123 | #html_logo = None
124 | 
125 | # The name of an image file (within the static path) to use as favicon of the
126 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
127 | # pixels large.
128 | #html_favicon = None
129 | 
130 | # Add any paths that contain custom static files (such as style sheets) here,
131 | # relative to this directory. They are copied after the builtin static files,
132 | # so a file named "default.css" will overwrite the builtin "default.css".
133 | html_static_path = ['_static']
134 | 
135 | # Add any extra paths that contain custom files (such as robots.txt or
136 | # .htaccess) here, relative to this directory. These files are copied
137 | # directly to the root of the documentation.
138 | #html_extra_path = []
139 | 
140 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
141 | # using the given strftime format.
142 | #html_last_updated_fmt = '%b %d, %Y'
143 | 
144 | # If true, SmartyPants will be used to convert quotes and dashes to
145 | # typographically correct entities.
146 | #html_use_smartypants = True
147 | 
148 | # Custom sidebar templates, maps document names to template names.
149 | #html_sidebars = {}
150 | 
151 | # Additional templates that should be rendered to pages, maps page names to
152 | # template names.
153 | #html_additional_pages = {}
154 | 
155 | # If false, no module index is generated.
156 | #html_domain_indices = True
157 | 
158 | # If false, no index is generated.
159 | #html_use_index = True
160 | 
161 | # If true, the index is split into individual pages for each letter.
162 | #html_split_index = False
163 | 
164 | # If true, links to the reST sources are added to the pages.
165 | #html_show_sourcelink = True
166 | 
167 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
168 | #html_show_sphinx = True
169 | 
170 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
171 | #html_show_copyright = True
172 | 
173 | # If true, an OpenSearch description file will be output, and all pages will
174 | # contain a <link> tag referring to it.  The value of this option must be the
175 | # base URL from which the finished HTML is served.
176 | #html_use_opensearch = ''
177 | 
178 | # This is the file name suffix for HTML files (e.g. ".xhtml").
179 | #html_file_suffix = None
180 | 
181 | # Output file base name for HTML help builder.
182 | htmlhelp_basename = 'py-umlsdoc'
183 | 
184 | 
185 | # -- Options for LaTeX output ---------------------------------------------
186 | 
187 | latex_elements = {
188 | # The paper size ('letterpaper' or 'a4paper').
189 | #'papersize': 'letterpaper',
190 | 
191 | # The font size ('10pt', '11pt' or '12pt').
192 | #'pointsize': '10pt',
193 | 
194 | # Additional stuff for the LaTeX preamble.
195 | #'preamble': '',
196 | }
197 | 
198 | # Grouping the document tree into LaTeX files. List of tuples
199 | # (source start file, target name, title,
200 | #  author, documentclass [howto, manual, or own class]).
201 | latex_documents = [
202 |   ('index', 'py-umls.tex', u'py-umls Documentation',
203 |    u'Pascal Pfiffner', 'manual'),
204 | ]
205 | 
206 | # The name of an image file (relative to this directory) to place at the top of
207 | # the title page.
208 | #latex_logo = None
209 | 
210 | # For "manual" documents, if this is true, then toplevel headings are parts,
211 | # not chapters.
212 | #latex_use_parts = False
213 | 
214 | # If true, show page references after internal links.
215 | #latex_show_pagerefs = False
216 | 
217 | # If true, show URL addresses after external links.
218 | #latex_show_urls = False
219 | 
220 | # Documents to append as an appendix to all manuals.
221 | #latex_appendices = []
222 | 
223 | # If false, no module index is generated.
224 | #latex_domain_indices = True
225 | 
226 | 
227 | # -- Options for manual page output ---------------------------------------
228 | 
229 | # One entry per manual page. List of tuples
230 | # (source start file, name, description, authors, manual section).
231 | man_pages = [
232 |     ('index', 'py-umls', u'py-umls Documentation',
233 |      [u'Pascal Pfiffner'], 1)
234 | ]
235 | 
236 | # If true, show URL addresses after external links.
237 | #man_show_urls = False
238 | 
239 | 
240 | # -- Options for Texinfo output -------------------------------------------
241 | 
242 | # Grouping the document tree into Texinfo files. List of tuples
243 | # (source start file, target name, title, author,
244 | #  dir menu entry, description, category)
245 | texinfo_documents = [
246 |   ('index', 'py-umls', u'py-umls Documentation',
247 |    u'Pascal Pfiffner', 'py-umls', 'One line description of project.',
248 |    'Miscellaneous'),
249 | ]
250 | 
251 | # Documents to append as an appendix to all manuals.
252 | #texinfo_appendices = []
253 | 
254 | # If false, no module index is generated.
255 | #texinfo_domain_indices = True
256 | 
257 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
258 | #texinfo_show_urls = 'footnote'
259 | 
260 | # If true, do not generate a @detailmenu in the "Top" node's menu.
261 | #texinfo_no_detailmenu = False
262 | 
263 | 
264 | # -- Options for Epub output ----------------------------------------------
265 | 
266 | # Bibliographic Dublin Core info.
267 | epub_title = u'py-umls'
268 | epub_author = u'Pascal Pfiffner'
269 | epub_publisher = u'Pascal Pfiffner'
270 | epub_copyright = u'2014, Pascal Pfiffner'
271 | 
272 | # The basename for the epub file. It defaults to the project name.
273 | #epub_basename = u'py-umls'
274 | 
275 | # The HTML theme for the epub output. Since the default themes are not optimized
276 | # for small screen space, using the same theme for HTML and epub output is
277 | # usually not wise. This defaults to 'epub', a theme designed to save visual
278 | # space.
279 | #epub_theme = 'epub'
280 | 
281 | # The language of the text. It defaults to the language option
282 | # or en if the language is not set.
283 | #epub_language = ''
284 | 
285 | # The scheme of the identifier. Typical schemes are ISBN or URL.
286 | #epub_scheme = ''
287 | 
288 | # The unique identifier of the text. This can be a ISBN number
289 | # or the project homepage.
290 | #epub_identifier = ''
291 | 
292 | # A unique identification for the text.
293 | #epub_uid = ''
294 | 
295 | # A tuple containing the cover image and cover page html template filenames.
296 | #epub_cover = ()
297 | 
298 | # A sequence of (type, uri, title) tuples for the guide element of content.opf.
299 | #epub_guide = ()
300 | 
301 | # HTML files that should be inserted before the pages created by sphinx.
302 | # The format is a list of tuples containing the path and title.
303 | #epub_pre_files = []
304 | 
305 | # HTML files shat should be inserted after the pages created by sphinx.
306 | # The format is a list of tuples containing the path and title.
307 | #epub_post_files = []
308 | 
309 | # A list of files that should not be packed into the epub file.
310 | epub_exclude_files = ['search.html']
311 | 
312 | # The depth of the table of contents in toc.ncx.
313 | #epub_tocdepth = 3
314 | 
315 | # Allow duplicate toc entries.
316 | #epub_tocdup = True
317 | 
318 | # Choose between 'default' and 'includehidden'.
319 | #epub_tocscope = 'default'
320 | 
321 | # Fix unsupported image types using the PIL.
322 | #epub_fix_images = False
323 | 
324 | # Scale large images.
325 | #epub_max_image_width = 0
326 | 
327 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
328 | #epub_show_urls = 'inline'
329 | 
330 | # If false, no index is generated.
331 | #epub_use_index = True
332 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. py-umls documentation master file, created by
 2 |    sphinx-quickstart on Fri Apr 18 20:08:31 2014.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | py-umls
 7 | =======
 8 | 
 9 | This is the documentation for `py-umls` (https://github.com/chb/py-umls), a simple Python 3 library that helps deal with RxNorm, SNOMED and UMLS resources.
10 | Development is ongoing, based on the needs of the developer, and documentation is sparse at best.
11 | 
12 | This modules creates **SQLite** databases from UMLS downloads.
13 | The scripts that perform this task can be found in the `databases` directory.
14 | 
15 | Contents:
16 | 
17 | .. toctree::
18 |    :maxdepth: 4
19 | 
20 |    py-umls
21 | 
22 | 
23 | Index & Search
24 | --------------
25 | 
26 | * :ref:`genindex`
27 | * :ref:`modindex`
28 | * :ref:`search`
29 | 
30 | 


--------------------------------------------------------------------------------
/docs/py-umls.rst:
--------------------------------------------------------------------------------
 1 | py-umls package
 2 | ===============
 3 | 
 4 | This package contains three modules with classes useful for dealing with **RxNorm**, then a module each for UMLS and SNOMED handling.
 5 | 
 6 | 
 7 | rxnorm
 8 | ------
 9 | 
10 | Provides classes that deal with RxNorm. This is very much WiP!
11 | 
12 | .. automodule:: rxnorm
13 |     :members:
14 |     :undoc-members:
15 |     :show-inheritance:
16 | 
17 | rxnorm_link
18 | -----------
19 | 
20 | A script used to create JSON documents from most RxNorm concepts and store them into a NoSQL database. This is very much WiP!
21 | 
22 | .. automodule:: rxnorm_link
23 |     :members:
24 |     :undoc-members:
25 |     :show-inheritance:
26 | 
27 | rxnorm_graph
28 | ------------
29 | 
30 | A useful script to help visualize relationships between RxNorm concepts, starting from a given RXCUI.
31 | Just run this script in your command line and follow the leader.
32 | 
33 | .. automodule:: rxnorm_graph
34 |     :members:
35 |     :undoc-members:
36 |     :show-inheritance:
37 | 
38 | umls
39 | ----
40 | 
41 | Module to deal with UMLS lexica.
42 | 
43 | .. automodule:: umls
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 | 
48 | snomed
49 | ------
50 | 
51 | Module to deal with the SNOMED terminology.
52 | 
53 | .. automodule:: snomed
54 |     :members:
55 |     :undoc-members:
56 |     :show-inheritance:
57 | 
58 | graphable
59 | ---------
60 | 
61 | Provides classes that can be used to create an interdependency graph.
62 | 
63 | .. automodule:: graphable
64 |     :members:
65 |     :undoc-members:
66 |     :show-inheritance:
67 | 
68 | sqlite
69 | ------
70 | 
71 | Our SQLite connection class.
72 | 
73 | .. automodule:: sqlite
74 |     :members:
75 |     :undoc-members:
76 |     :show-inheritance:
77 | 
78 | 


--------------------------------------------------------------------------------
/graphable.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	Graphable objects for fun
  5 | #
  6 | #	2014-02-18	Created by Pascal Pfiffner
  7 | 
  8 | import os
  9 | import uuid
 10 | import subprocess
 11 | import tempfile
 12 | 
 13 | 
 14 | class GraphableObject (object):
 15 | 	_name = None		# The name uniquely identifying the object
 16 | 	label = None		# The label to show in place of the name
 17 | 	shape = None
 18 | 	style = None
 19 | 	color = None
 20 | 	announced_via = None
 21 | 	
 22 | 	def __init__(self, name, label=None):
 23 | 		self._name = name if name else 'o' + uuid.uuid4().hex
 24 | 		self.label = label
 25 | 	
 26 | 	@property
 27 | 	def name(self):
 28 | 		return self._name if self._name else 'unnamed'
 29 | 	
 30 | 	def inner_dot(self):
 31 | 		if self.label or self.style or self.color or self.shape:
 32 | 			inner = []
 33 | 			if self.shape:
 34 | 				inner.append("shape={}".format(self.shape))
 35 | 			if self.style:
 36 | 				inner.append("style={}".format(self.style))
 37 | 			if self.color:
 38 | 				inner.append("color={}".format(self.color))
 39 | 			if self.label:
 40 | 				inner.append('label="{}"'.format(self.label))
 41 | 			return "[{}]".format(','.join(inner))
 42 | 		return None
 43 | 	
 44 | 	def dot_representation(self):
 45 | 		inner = self.inner_dot()
 46 | 		if inner:
 47 | 			return "\t{} {};\n".format(self.name, inner)
 48 | 		return "\t{};\n".format(self.name)
 49 | 	
 50 | 	def announce_to(self, dot_context, via=None):
 51 | 		""" Announce the receiver to the context.
 52 | 		
 53 | 		Subclasses MUST NOT announce other graphable objects they are holding
 54 | 		on to here but they MUST announce them in "deliver_to" if appropriate.
 55 | 		
 56 | 		- dot_context The context to announce to
 57 | 		- via If not-None the other GraphableObject that is responsible for
 58 | 			announcing the receiver
 59 | 		"""
 60 | 		self.announced_via = via
 61 | 		dot_context.announce(self)
 62 | 	
 63 | 	def deliver_to(self, dot_context, is_leaf):
 64 | 		""" Call the context's "deliver" method.
 65 | 		
 66 | 		This method is guaranteed to only be called once per context. Hence
 67 | 		subclasses that hold on to other graphable objects MUST ANNOUNCE those
 68 | 		instances here (but NOT deliver them) but ONLY IF "is_leaf" is not True.
 69 | 		- dot_context The context to deliver to
 70 | 		- is_leaf If True means the receiver is intended to be a leaf object
 71 | 		"""
 72 | 		dot_context.deliver(self)
 73 | 
 74 | 
 75 | class GraphableRelation (GraphableObject):
 76 | 	relation_from = None			# first GraphableObject instance
 77 | 	relation_to = None				# second GraphableObject instance
 78 | 	
 79 | 	def __init__(self, rel_from, label, rel_to):
 80 | 		name = "{}->{}".format(rel_from.name, rel_to.name)
 81 | 		super().__init__(name, label)
 82 | 		self.relation_from = rel_from
 83 | 		self.relation_to = rel_to
 84 | 	
 85 | 	def dot_representation(self):
 86 | 		if self.relation_to:
 87 | 			return "\t{} -> {} {};\n".format(
 88 | 				self.relation_from.name,
 89 | 				self.relation_to.name,
 90 | 				self.inner_dot() or ''
 91 | 			)
 92 | 		return ''
 93 | 	
 94 | 	def deliver_to(self, dot_context, is_leaf):
 95 | 		self.relation_from.announce_to(dot_context, self)
 96 | 		self.relation_to.announce_to(dot_context, self)
 97 | 		super().deliver_to(dot_context, is_leaf)		# deliver after announcing our nodes!
 98 | 
 99 | 
100 | class DotContext (object):
101 | 	items = None
102 | 	source = None
103 | 	depth = 0
104 | 	max_depth = 8		# there is something fishy still, make this double the tree depth you want
105 | 	max_width = 15		# pass to graphable objects, they will decide what to do with this
106 | 	
107 | 	def __init__(self, max_depth=None, max_width=None):
108 | 		self.items = set()
109 | 		self.source = ''
110 | 		self.depth = 0
111 | 		if max_depth is not None:
112 | 			self.max_depth = max_depth
113 | 		if max_width is not None:
114 | 			self.max_width = max_width
115 | 	
116 | 	def announce(self, obj):
117 | 		if obj.name not in self.items:
118 | 			self.items.add(obj.name)
119 | 			
120 | 			self.depth += 1
121 | 			obj.deliver_to(self, self.depth > self.max_depth)
122 | 			self.depth -= 1
123 | 	
124 | 	def deliver(self, obj):
125 | 		self.source += obj.dot_representation()
126 | 	
127 | 	def get(self):
128 | 		return self.source
129 | 
130 | 
131 | class GraphvizGraphic (object):
132 | 	cmd = 'dot'
133 | 	out_dot = None
134 | 	out_type = 'pdf'
135 | 	out_file = None
136 | 	max_depth = None
137 | 	max_width = None
138 | 	
139 | 	def __init__(self, out_file='rxgraph.png'):
140 | 		self.out_file = out_file
141 | 	
142 | 	def executableCommand(self, infile):
143 | 		return [
144 | 			self.cmd,
145 | 			'-T{}'.format(self.out_type),
146 | 			infile,
147 | 			'-o', format(self.out_file),
148 | 		]
149 | 	
150 | 	def write_dot_graph(self, obj):
151 | 		if self.out_file is None:
152 | 			raise Exception('Please assign an output filename to "out_file"')
153 | 		
154 | 		context = DotContext(max_depth=self.max_depth, max_width=self.max_width)
155 | 		obj.announce_to(context)
156 | 		source = """digraph G {{
157 | 	ranksep=equally;\n{}}}\n""".format(context.get())
158 | 		
159 | 		# write to a temporary file
160 | 		filedesc, tmpname = tempfile.mkstemp()
161 | 		with os.fdopen(filedesc, 'w') as handle:
162 | 			handle.write(source)
163 | 		
164 | 		# execute command
165 | 		cmd = self.executableCommand(tmpname)
166 | 		ret = subprocess.call(cmd)
167 | 		
168 | 		if self.out_dot:
169 | 			os.rename(tmpname, self.out_dot)
170 | 		else:
171 | 			os.unlink(tmpname)
172 | 		
173 | 		if ret > 0:
174 | 			raise Exception('Failed executing: "{}"'.format(' '.join(cmd)))
175 | 
176 | 


--------------------------------------------------------------------------------
/loinc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #  LOINC import and lookup utilities
 5 | 
 6 | 
 7 | import sys
 8 | import os.path
 9 | import logging
10 | 
11 | 
12 | class LOINCLookup(object):
13 | 	pass
14 | 
15 | 
16 | class LOINC(object):
17 | 	""" Class that helps with setting up a local LOINC SQLite database.
18 | 	"""
19 | 	
20 | 	@classmethod
21 | 	def check_database(cls):
22 | 		""" Check if our database is in place and if not, prompts to create it.
23 | 		Will raise on errors!
24 | 		
25 | 		Reads LOINC from CSV files and create an SQLite database, if needed.
26 | 		"""
27 | 		
28 | 		dbpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'databases/loinc.db')
29 | 		if not os.path.exists(dbpath):
30 | 			raise Exception("The LOINC database at {} does not exist. Run the script `loinc.py`."
31 | 				.format(dbpath))
32 | 	
33 | 	@classmethod
34 | 	def import_from_files(cls, dirpath):
35 | 		""" Imports LOINC from the downloaded CSV files.
36 | 		"""
37 | 		import sqlite
38 | 		import csvimporter
39 | 		
40 | 		mapping = {
41 | 			'loinc.csv': 'loinc',
42 | 			'map_to.csv': 'map_to',
43 | 			'source_organization.csv': 'sources'
44 | 		}
45 | 		dbpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'databases/loinc.db')
46 | 		
47 | 		# import
48 | 		for csvfile, table in mapping.items():
49 | 			print("Importing LOINC table {}".format(csvfile))
50 | 			imp = csvimporter.CSVImporter(os.path.join(dirpath, csvfile), table)
51 | 			imp.import_to(dbpath)
52 | 		
53 | 		# index
54 | 		print("Creating indexes")
55 | 		sql_handle = sqlite.SQLite(dbpath)
56 | 		sql_handle.execute('CREATE INDEX x_loinc_num_loinc ON loinc (LOINC_NUM)')
57 | 		sql_handle.execute('CREATE INDEX x_shortname_loinc ON loinc (SHORTNAME)')
58 | 		sql_handle.execute('CREATE INDEX x_long_common_name_loinc ON loinc (LONG_COMMON_NAME)')
59 | 
60 | 
61 | 
62 | # running this as a script performs the database setup/check
63 | if '__main__' == __name__:
64 | 	logging.basicConfig(level=logging.DEBUG)
65 | 	
66 | 	# if the database check fails, run import commands
67 | 	try:
68 | 		LOINC.check_database()
69 | 	except Exception as e:
70 | 		csv_path = sys.argv[1] if 2 == len(sys.argv) else None
71 | 		if csv_path is not None and os.path.exists(csv_path):
72 | 			try:
73 | 				LOINC.import_from_files(csv_path)
74 | 			except Exception as e:
75 | 				raise Exception("SNOMED import failed: {}".format(e))
76 | 		else:
77 | 			print("Provide the path to the directory containing the LOINC CSV files as first argument.")
78 | 			print("Download the LOINC Table File in CSV format (free registration required) here:")
79 | 			print("http://loinc.org/downloads/loinc")
80 | 	
81 | 	# TODO: lookup examples
82 | 


--------------------------------------------------------------------------------
/rxnorm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	Utilities to handle RxNorm
  5 | #
  6 | #	2014-01-28	Extracted from UMLS.py
  7 | 
  8 | import os.path
  9 | import logging
 10 | import re
 11 | import requests
 12 | import xml.etree.ElementTree as ET
 13 | 
 14 | from collections import Counter, OrderedDict
 15 | from sqlite import SQLite
 16 | from graphable import GraphableObject, GraphableRelation
 17 | 
 18 | 
 19 | class RxNorm (object):
 20 | 	""" A class for handling RxNorm in an SQLite database and performing a
 21 | 	handful of RxNorm-related tasks.
 22 | 	"""
 23 | 	
 24 | 	@classmethod
 25 | 	def check_database(cls):
 26 | 		""" Check if our database is in place and if not, import them.
 27 | 		Will raise on errors!
 28 | 		
 29 | 		RxNorm: (rxnorm.db)
 30 | 		If missing prompt to use the `rxnorm.sh` script
 31 | 		"""
 32 | 		
 33 | 		# RxNorm
 34 | 		rxnorm_db = os.path.join(os.path.dirname(__file__), 'databases/rxnorm.db')
 35 | 		if not os.path.exists(rxnorm_db):
 36 | 			raise Exception("The RxNorm database at {} does not exist. Run the import script `databases/rxnorm.sh`."
 37 | 				.format(os.path.abspath(rxnorm_db)))
 38 | 
 39 | 	@classmethod
 40 | 	def ndc_normalize_list(cls, ndc_list):
 41 | 		ndc_set = set([cls.ndc_normalize(ndc) for ndc in ndc_list])
 42 | 		return list(ndc_set)
 43 | 		
 44 | 	@classmethod
 45 | 	def ndc_normalize(cls, ndc):
 46 | 		""" Normalizes an NDC (National Drug Code) number.
 47 | 		
 48 | 		The pseudo-code published by NIH
 49 | 		(http://www.nlm.nih.gov/research/umls/rxnorm/NDC_Normalization_Code.rtf)
 50 | 		first identifies the format (e.g. "6-3-2") and then normalizes based on
 51 | 		that finding. However since the normalized string is always 5-4-2,
 52 | 		padded with leading zeroes and removing all dashes afterwards, this
 53 | 		implementation goes a much simpler route.
 54 | 		
 55 | 		NDCs that only contain one dash are treated as if they were missing the
 56 | 		package specifier, so they get a "-00" appended before normalization.
 57 | 
 58 | 		
 59 | 		:param str ndc: The NDC to normalize as string
 60 | 		:returns: A string with the normalized NDC, or `None` if the number
 61 | 			couldn't be normalized
 62 | 		"""
 63 | 		if ndc is None or 0 == len(ndc) or len(ndc) > 14:
 64 | 			return None
 65 | 		
 66 | 		# replace '*' with '0' as some of the NDCs from MTHFDA contain * instead of 0
 67 | 		norm = ndc.replace('*', '0')
 68 | 		
 69 | 		# split at dashes, pad with leading zeroes, cut to desired length
 70 | 		parts = norm.split('-')
 71 | 		
 72 | 		# Code with only one dash; this is NOT mentioned in the above cited
 73 | 		# reference but I see a lot of codes with 6-4 format.
 74 | 		# These are likely codes without package specifier, though some that I
 75 | 		# checked seem to not or no longer exist.
 76 | 		# We append "-00" to get a 6-4-2 format and are done with it.
 77 | 		if 2 == len(parts):
 78 | 			parts.append('00')
 79 | 		
 80 | 		# two dashes, 6-4-1 or 5-3-2 or similar formats, concat to 5-4-2
 81 | 		if 3 == len(parts):
 82 | 			norm = '{}{}{}'.format(('00000'+parts[0])[-5:], ('0000'+parts[1])[-4:], ('00'+parts[2])[-2:])
 83 | 		
 84 | 		# no dashes
 85 | 		elif 1 == len(parts):
 86 | 			
 87 | 			# "if NDC passed has 12 digits and first char is '0' and it's from
 88 | 			# VANDF then trim first char". We do NOT check if it's from the VA
 89 | 			# as this would require more information than just the NDC
 90 | 			if 12 == len(norm) and '0' == norm[:1]:
 91 | 				norm = norm[1:]
 92 | 			
 93 | 			# only valid if it's 11 digits
 94 | 			elif 11 != len(norm):
 95 | 				return None
 96 | 		
 97 | 		# reject NDCs that still contain non-numeric chars
 98 | 		return norm if norm.isdigit() else None
 99 | 
100 | 
101 | class RxNormLookup (object):
102 | 	""" Class for RxNorm lookup. """
103 | 	
104 | 	sqlite = None
105 | 	cache_drug_class = False		# will be set to true when the prepare_to_cache_classes method gets called
106 | 	
107 | 	
108 | 	def __init__(self):
109 | 		absolute = os.path.dirname(os.path.realpath(__file__))
110 | 		self.sqlite = SQLite.get(os.path.join(absolute, 'databases/rxnorm.db'))
111 | 	
112 | 	
113 | 	# MARK: - "name" lookup
114 | 	
115 | 	def lookup_rxcui(self, rxcui, preferred=True):
116 | 		""" Return a tuple with (str, tty, rxcui, rxaui) or - if "preferred" is
117 | 		False - a tuple with (preferred-name, list-of-tuples)
118 | 		"""
119 | 		if rxcui is None or len(rxcui) < 1:
120 | 			return None
121 | 		
122 | 		# retrieve all matches
123 | 		sql = 'SELECT str, tty, rxcui, rxaui FROM rxnconso WHERE rxcui = ? AND lat = "ENG"'
124 | 		
125 | 		found = []
126 | 		for res in self.sqlite.execute(sql, (rxcui,)):
127 | 			found.append(res)
128 | 		
129 | 		if 0 == len(found):
130 | 			logging.error("RxNormLookup.lookup_rxcui: RxCUI {} not found".format(rxcui))
131 | 			return None
132 | 		
133 | 		# preferred name
134 | 		pref_match = None
135 | 		for tty in ['SBDC', 'SCDC', 'SBD', 'SCD', 'CD', 'SBDF', 'SCDF', 'BN', 'IN', 'PIN', 'MIN']:
136 | 			for res in found:
137 | 				if tty == res[1]:
138 | 					pref_match = res
139 | 					break
140 | 			if pref_match is not None:
141 | 				break
142 | 		
143 | 		if preferred:
144 | 			return pref_match if pref_match is not None else found[0]
145 | 		
146 | 		return (pref_match[0] if pref_match is not None else None, found)
147 | 	
148 | 	def lookup_rxcui_name(self, rxcui, preferred=True, no_html=True):
149 | 		""" Return a string or HTML for the meaning of the given code.
150 | 		If preferred is True (the default), only one match will be returned,
151 | 		looking for specific TTY and using the "best" one.
152 | 		There is currently NO SUPPORT FOR preferred = False
153 | 		"""
154 | 		
155 | 		res = self.lookup_rxcui(rxcui, preferred=True)
156 | 		if res is None:
157 | 			return ''
158 | 		
159 | 		if no_html:
160 | 			str_format = "{0} [{1}]"
161 | 		else:
162 | 			str_format = "<span title=\"RXAUI: {3}\">{0} <span style=\"color:#888;\">[{1}]</span></span>"
163 | 		
164 | 		return str_format.format(*res)
165 | 	
166 | 	
167 | 	# MARK: - Relations
168 | 	
169 | 	def lookup_tty(self, rxcui):
170 | 		""" Returns a set of TTYs for the given RXCUI. """
171 | 		if rxcui is None:
172 | 			return None
173 | 		
174 | 		sql = 'SELECT tty FROM rxnconso WHERE rxcui = ?'
175 | 		ttys = set()
176 | 		for res in self.sqlite.execute(sql, (rxcui,)):
177 | 			ttys.add(res[0])
178 | 		
179 | 		return ttys
180 | 	
181 | 	def lookup_related(self, rxcui, relation=None, to_rxcui=None):
182 | 		""" Returns a set of tuples containing the RXCUI and the actual relation
183 | 		for the desired relation, or all if the relation is not specified.
184 | 		
185 | 		:param str rxcui: The RXCUI for which to look up relations
186 | 		:param str relation: Optional: the type of the relation, e.g. "has_ingredient"
187 | 		:param str to_rxcui: An optional second rxcui, to return all relations
188 | 			between the two given rxcuis. Ignored if `relation` is present.
189 | 		:returns: A set of tuples, where tuples are (rxcui, rela)
190 | 		"""
191 | 		if rxcui is None:
192 | 			return None
193 | 		
194 | 		found = set()
195 | 		if relation is not None:
196 | 			sql = "SELECT rxcui1, rela FROM rxnrel WHERE rxcui2 = ? AND rela = ?"
197 | 			for res in self.sqlite.execute(sql, (rxcui, relation)):
198 | 				found.add(res)
199 | 		elif to_rxcui is not None:
200 | 			sql = "SELECT rxcui1, rela FROM rxnrel WHERE rxcui2 = ? AND rxcui1 = ?"
201 | 			for res in self.sqlite.execute(sql, (rxcui, to_rxcui)):
202 | 				found.add(res)
203 | 		else:
204 | 			sql = "SELECT rxcui1, rela FROM rxnrel WHERE rxcui2 = ?"
205 | 			for res in self.sqlite.execute(sql, (rxcui,)):
206 | 				found.add(res)
207 | 		
208 | 		return found
209 | 	
210 | 	
211 | 	# MARK: - RxCUI
212 | 	
213 | 	def rxcui_for_ndc(self, ndc):
214 | 		""" Find the RXCUI for the given NDC from our NDC-cache-table.
215 | 		
216 | 		This method only does exact lookup for now, it should be extended to
217 | 		use normalized NDC formats.
218 | 		
219 | 		:param str ndc: The NDC to look up
220 | 		:returns: The matching RXCUI as string, or None
221 | 		"""
222 | 		if ndc is None:
223 | 			return None
224 | 		# TODO: ensure NDC normalization
225 | 		
226 | 		rxcuis = {}
227 | 		sql = "SELECT RXCUI FROM NDC WHERE NDC = ?"
228 | 		for res in self.sqlite.execute(sql, (ndc,)):
229 | 			rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1
230 | 		
231 | 		rxcui = list(rxcuis.keys())[0] if len(rxcuis) > 0 else None
232 | 		if len(rxcuis) > 1:
233 | 			popular = OrderedDict(Counter(rxcuis).most_common())
234 | 			rxcui = popular.popitem(False)[0]
235 | 		
236 | 		return str(rxcui) if rxcui is not None else None
237 | 	
238 | 	def ndc_for_rxcui(self, rxcui):
239 | 		""" Find the NDC from our NDC-cache-table for the given RXCUI.
240 | 		"""
241 | 		if rxcui is None:
242 | 			return None
243 | 		
244 | 		sql = 'SELECT distinct ndc FROM ndc WHERE rxcui = ?'
245 | 		return [res[0] for res in self.sqlite.execute(sql, (rxcui,))]
246 | 	
247 | 	def rxcui_for_name(self, name, limit_tty=None):
248 | 		""" Tries to find an RXCUI for the concept name.
249 | 		
250 | 		Does this by performing a "starts with" against the STR column on
251 | 		RXNCONSO, then replaces any spaces with wildcards and finally chops off
252 | 		one word after the other until a match is found.
253 | 		
254 | 		This works but is slow and far from perfect. RxNav's ``approxMatch`` is
255 | 		definitely better, you can use ``rxcui_for_name_approx`` to get an
256 | 		RXCUI using that service.
257 | 		
258 | 		:param str name: The name to get an RXCUI for
259 | 		:param list limit_tty: Optional: limit search to a given list of TTYs
260 | 		:returns: The best matching rxcui, if any, as string
261 | 		"""
262 | 		if name is None:
263 | 			return None
264 | 		
265 | 		rxcuis = {}
266 | 		lim = 'tty IN ("{}") AND'.format('","'.join(limit_tty)) if limit_tty else ''
267 | 		sql = 'SELECT rxcui, tty FROM rxnconso WHERE {} str LIKE ?'.format(lim)
268 | 		
269 | 		# try the full string, allowing wildcard at the trailing end
270 | 		for res in self.sqlite.execute(sql, (name + '%',)):
271 | 			rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1
272 | 		
273 | 		# nothing yet, replace spaces with '%'
274 | 		for res in self.sqlite.execute(sql, (name.replace(' ', '%') + '%',)):
275 | 			rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1
276 | 		
277 | 		# still nothing, try chopping off parts from the right
278 | 		if 0 == len(rxcuis):
279 | 			parts = name.split()
280 | 			for x in range(len(parts) - 1):
281 | 				comp = '%'.join(parts[:-(x+1)])
282 | 				for res in self.sqlite.execute(sql, (comp + '%',)):
283 | 					rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1
284 | 				if len(rxcuis) > 0:
285 | 					break
286 | 		
287 | 		rxcui = list(rxcuis.keys())[0] if len(rxcuis) > 0 else None
288 | 		if len(rxcuis) > 1:
289 | 			popular = OrderedDict(Counter(rxcuis).most_common())
290 | 			rxcui = popular.popitem(False)[0]
291 | 		
292 | 		return str(rxcui) if rxcui is not None else None
293 | 	
294 | 	def rxcui_for_name_approx(self, name):
295 | 		""" Returns the best ``approxMatch`` RXCUI as found when using RxNav's
296 | 		service against the provided name. Runs synchronously.
297 | 		
298 | 		:param str name: The name to get an RXCUI for
299 | 		:returns: The top ranked rxcui, if any, as string
300 | 		"""
301 | 		matches = self.rxnav_approx_match(name, nmax=1)
302 | 		return str(matches[0]) if matches is not None and len(matches) > 0 else None
303 | 	
304 | 	def rxnav_approx_match(self, name, nmax=10):
305 | 		""" Returns the top #nmax ``approximateTerm`` rxcuis as found when using
306 | 		RxNav's service against the provided name. Runs synchronously.
307 | 		
308 | 		:param str name: The name to get an RXCUI for
309 | 		:param int nmax: The maximum number of unique rxcuis to return, 10 by
310 | 			default
311 | 		:returns: The top ranked rxcuis, if any, as a list
312 | 		"""
313 | 		if name is None:
314 | 			return None
315 | 		
316 | 		url = 'http://rxnav.nlm.nih.gov/REST/approximateTerm'
317 | 		r = requests.get(url, params={'term': name, 'option': 1})	# we don't use `maxEntries` as duplicate rxcuis count separately
318 | 		root = ET.fromstring(r.text)
319 | 		candidates = root.findall('.//candidate')
320 | 		rxcuis = []
321 | 		for cand in candidates:
322 | 			rxcui = cand.find('rxcui')
323 | 			if rxcui is not None and rxcui.text is not None:
324 | 				#rank = cand.find('rank')		# rely on RxNav's order for now
325 | 				if rxcui.text not in rxcuis:
326 | 					rxcuis.append(rxcui.text)
327 | 				
328 | 				# stop after nmax
329 | 				if nmax is not None and len(rxcuis) >= nmax:
330 | 					break
331 | 		
332 | 		return rxcuis
333 | 	
334 | 	
335 | 	# MARK: - Drug Class OBSOLETE, WILL BE GONE
336 | 	
337 | 	def can_cache(self):
338 | 		return self.sqlite.hasTable('va_cache')
339 | 	
340 | 	def prepare_to_cache_classes(self):
341 | 		if self.sqlite.create('va_cache', '(rxcui primary key, va varchar)'):
342 | 			self.cache_drug_class = True
343 | 	
344 | 	def va_drug_class(self, rxcui):
345 | 		""" Returns a list of VA class names for a given RXCUI. EXPERIMENTAL.
346 | 		"""
347 | 		#if not self.cache_drug_class:
348 | 		#	return None		
349 | 		if rxcui is None:
350 | 			return None
351 | 		
352 | 		# check dedicated dable
353 | 		sql = 'SELECT va FROM va_cache WHERE rxcui = ?'
354 | 		res = self.sqlite.executeOne(sql, (rxcui,))
355 | 		return res[0].split('|') if res else None
356 | 	
357 | 	def friendly_class_format(self, va_name):
358 | 		""" Tries to reformat the VA drug class name so it's suitable for
359 | 		display.
360 | 		"""
361 | 		if va_name is None or 0 == len(va_name):
362 | 			return None
363 | 		
364 | 		# remove identifier
365 | 		if ']' in va_name:
366 | 			va_name = va_name[va_name.index(']')+1:]
367 | 			va_name = va_name.strip()
368 | 		
369 | 		# remove appended specificiers
370 | 		if ',' in va_name and va_name.index(',') > 2:
371 | 			va_name = va_name[0:va_name.index(',')]
372 | 		
373 | 		if '/' in va_name and va_name.index('/') > 2:
374 | 			va_name = va_name[0:va_name.index('/')]
375 | 		
376 | 		# capitalize nicely
377 | 		va_name = va_name.lower();
378 | 		va_name = re.sub(r'(^| )(\w)', lambda match: r'{}{}'.format(match.group(1), match.group(2).upper()), va_name)
379 | 		
380 | 		return va_name
381 | 	
382 | 	
383 | 	# MARK: - Bare Metal
384 | 	
385 | 	def execute(self, sql, params=()):
386 | 		""" Execute and return the pointer of an SQLite execute() query. """
387 | 		return self.sqlite.execute(sql, params)
388 | 	
389 | 	def fetchOne(self, sql, params=()):
390 | 		""" Execute and return the result of fetchone() on a raw SQL query. """
391 | 		return self.sqlite.execute(sql, params).fetchone()
392 | 	
393 | 	def fetchAll(self, sql, params=()):
394 | 		""" Execute and return the result of fetchall() on a raw SQL query. """
395 | 		return self.sqlite.execute(sql, params).fetchall()
396 | 
397 | 
398 | class RxNormCUI (GraphableObject):
399 | 	rxcui = None
400 | 	_ttys = None
401 | 	relations = None
402 | 	rxlookup = RxNormLookup()
403 | 	
404 | 	def __init__(self, rxcui, label=None):
405 | 		super().__init__(rxcui, rxcui)
406 | 		self.shape = 'box'
407 | 		self.rxcui = rxcui
408 | 	
409 | 	@property
410 | 	def ttys(self):
411 | 		return self._ttys
412 | 	
413 | 	@ttys.setter
414 | 	def ttys(self, val):
415 | 		self._ttys = val
416 | 		self.update_shape_from_ttys()
417 | 	
418 | 	
419 | 	def find_relations(self, to_rxcui=None, max_width=10):
420 | 		counted = {}
421 | 		for rxcui, rela in self.rxlookup.lookup_related(self.rxcui, None, to_rxcui):
422 | 			if rela in counted:
423 | 				counted[rela].append(rxcui)
424 | 			else:
425 | 				counted[rela] = [rxcui]
426 | 		
427 | 		found = []
428 | 		for rela, items in sorted(counted.items()):		# sort to generate mostly consistent dot files
429 | 			if len(items) > max_width:
430 | 				proxy = GraphableObject(None, rela)
431 | 				rel = GraphableRelation(self, str(len(items)), proxy)
432 | 				
433 | 				if self.announced_via:					# if our announcer is here, be nice and link back
434 | 					for rxcui in items:
435 | 						if rxcui == self.announced_via.rxcui1.rxcui:
436 | 							via = RxNormCUI(rxcui)
437 | 							found.append(RxNormConceptRelation(self, rela, via))
438 | 			else:
439 | 				for rxcui in sorted(items):				# sort to generate mostly consistent dot files
440 | 					obj = RxNormCUI(rxcui)
441 | 					rel = RxNormConceptRelation(self, rela, obj)
442 | 			found.append(rel)
443 | 		
444 | 		return found
445 | 	
446 | 	
447 | 	def deliver_to(self, dot_context, is_leaf):
448 | 		self.update_self_from_rxcui()
449 | 		super().deliver_to(dot_context, is_leaf)
450 | 		
451 | 		# if we are a leaf, still fetch the relation going back to our announcer
452 | 		if is_leaf:
453 | 			if self.relations is None and self.announced_via:
454 | 				rela = self.find_relations(
455 | 					to_rxcui=self.announced_via.rxcui1.rxcui,
456 | 					max_width=dot_context.max_width
457 | 				)
458 | 				if rela:
459 | 					rela[0].announce_to(dot_context)
460 | 		else:
461 | 			if self.relations is None:
462 | 				self.relations = self.find_relations(max_width=dot_context.max_width)
463 | 			
464 | 			for rel in self.relations:
465 | 				rel.announce_to(dot_context)
466 | 	
467 | 	
468 | 	def update_self_from_rxcui(self):
469 | 		if self.rxcui:
470 | 			ret = self.rxlookup.lookup_rxcui(self.rxcui, preferred=False)
471 | 			if ret is not None and len(ret) > 1 and len(ret[1]) > 0:
472 | 				pref = ret[0]
473 | 				found = ret[1]
474 | 				self.ttys = set([res[1] for res in found])
475 | 				self.label = _splitted_string(pref if pref else found[0][0])
476 | 				self.label += "\n[{} - {}]".format(self.rxcui, ', '.join(sorted(self._ttys)))
477 | 			
478 | 			vas = self.rxlookup.va_drug_class(self.rxcui)
479 | 			if vas:
480 | 				self.style = 'bold'
481 | 				self.color = 'violet'
482 | 				self.label += "\n{}".format(_splitted_string(', '.join(vas)))
483 | 	
484 | 	def update_shape_from_ttys(self):
485 | 		if self._ttys:
486 | 			if 'BD' in self._ttys or 'BN' in self._ttys:
487 | 				self.style = 'bold'
488 | 			elif 'SBD' in [tty[:3] for tty in self._ttys]:
489 | 				self.shape = 'box,peripheries=2'
490 | 			elif 'MIN' in self._ttys:
491 | 				self.shape = 'polygon,sides=5,peripheries=2'
492 | 			elif 'IN' in self._ttys or 'PIN' in self._ttys:
493 | 				self.shape = 'polygon,sides=5'
494 | 
495 | class RxNormConceptRelation (GraphableRelation):
496 | 	rxcui1 = None
497 | 	rxcui2 = None
498 | 	
499 | 	def __init__(self, rxcuiobj1, rela, rxcuiobj2):
500 | 		super().__init__(rxcuiobj1, rela, rxcuiobj2)
501 | 		self.rxcui1 = rxcuiobj1
502 | 		self.rxcui2 = rxcuiobj2
503 | 		
504 | 		if 'isa' == rela[-3:]:
505 | 			self.style = 'dashed'
506 | 
507 | 
508 | def _splitted_string(string, maxlen=60):
509 | 	if len(string) > maxlen:
510 | 		at = 0
511 | 		newstr = ''
512 | 		for word in string.split():
513 | 			if at > maxlen:
514 | 				newstr += "\n"
515 | 				at = 0
516 | 			if at > 0:
517 | 				newstr += ' '
518 | 				at += 1
519 | 			newstr += word
520 | 			at += len(word)
521 | 		return newstr
522 | 	return string
523 | 
524 | 
525 | # running this as a script does the database setup/check
526 | if '__main__' == __name__:
527 | 	RxNorm.check_database()
528 | 	
529 | 	import sys
530 | 	rxcuis = sys.argv[1:] if len(sys.argv) > 1 else None
531 | 	if rxcuis is None:
532 | 		print('x>  Provide RXCUIs as arguments on the command line')
533 | 		sys.exit(0)
534 | 	
535 | 	look = RxNormLookup()
536 | 	for rxcui in rxcuis:
537 | 		print('-----')
538 | 		meaning = look.lookup_rxcui_name(rxcui, preferred=False)
539 | 		ttys = look.lookup_tty(rxcui)
540 | 		related = look.lookup_related(rxcui)
541 | 		
542 | 		print('RxCUI          "{0}":  {1}'.format(rxcui, meaning))
543 | 		print('Concept type   "{0}":  {1}'.format(rxcui, ', '.join(ttys)))
544 | 		print('Relationships  "{0}":'.format(rxcui))
545 | 		for rrxcui, rrela in sorted(related, key=lambda x: x[1]):
546 | 			rname, rtty, a, b = look.lookup_rxcui(rrxcui)
547 | 			sp1 = ''.join([' ' for i in range(17+len(rxcui)-len(rrela))])
548 | 			sp2 = ''.join([' ' for i in range(9-len(rrxcui))])
549 | 			sp3 = ''.join([' ' for i in range(6-len(rtty))])
550 | 			print('{}{}:{}{}{}{}  {}'.format(sp1, rrela, sp2, rrxcui, sp3, rtty, rname))
551 | 


--------------------------------------------------------------------------------
/rxnorm_download.py:
--------------------------------------------------------------------------------
 1 | import mechanize
 2 | import zipfile
 3 | import re
 4 | import sys
 5 | import argparse
 6 | 
 7 | DOWNLOADS_URL = "https://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
 8 | ZIP_URL = "http://download.nlm.nih.gov/umls/kss/rxnorm/RxNorm_full_%s.zip"
 9 | CHUNK_SIZE= 1000
10 | LINK_PATTERN = re.compile("download.nlm.nih.gov.*full")
11 | 
12 | def download_rxnorm(args):
13 | 
14 |     br = mechanize.Browser()
15 |     br.set_handle_robots(False)
16 | 
17 |     if args.release:
18 |         url = ZIP_URL%args.release
19 |     else:
20 |         br.open(DOWNLOADS_URL)
21 |         url = br.links(url_regex=LINK_PATTERN).next().url
22 | 
23 |     print("Signing in to download %s"%(url))
24 |     br.open(url)
25 | 
26 |     br.select_form(nr=0)
27 |     br["username"] = args.username
28 |     br["password"] = args.password
29 |     zip_request = br.submit()
30 | 
31 |     try:
32 |         bytes = int(zip_request.info().getheader('Content-Length'))
33 |     except:
34 |         print "Failed to download file. Check your credentials."
35 |         sys.exit(1)
36 | 
37 |     with open(args.file, "wb") as outfile:
38 |         while zip_request.tell() < bytes:
39 |             outfile.write(zip_request.read(size=CHUNK_SIZE))
40 |             read = zip_request.tell()
41 |             print "\rDownload:  %.2f%% of %sMB"%(
42 |                     read * 100.0 / bytes,
43 |                     bytes / 1000000),
44 | 
45 |     print("Extracting zip")
46 |     with zipfile.ZipFile(args.file) as zf:
47 |         zf.extractall()
48 | 
49 | if __name__ == "__main__":
50 |     parser = argparse.ArgumentParser(description='Download RxNorm Release')
51 | 
52 |     parser.add_argument('--username',  help='UMLS username', required=True)
53 |     parser.add_argument('--password',  help='UMLS password', required=True)
54 |     parser.add_argument(
55 |             "--release",
56 |             help="specify release version (e.g. '10052015'). Default: latest.",
57 |             default=None)
58 |     parser.add_argument(
59 |             '--file',
60 |             help='Where to save .zip download. Default: "rxnorm-download.zip"',
61 |             default="rxnorm-download.zip")
62 | 
63 |     args = parser.parse_args()
64 |     download_rxnorm(args)
65 | 


--------------------------------------------------------------------------------
/rxnorm_graph.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #	Draw an RxNorm graph for a given RxCUI.
 5 | #	You must have "dot" installed (Graphviz)
 6 | #
 7 | #	2014-02-18	Created by Pascal Pfiffner
 8 | 
 9 | import sys
10 | import subprocess
11 | 
12 | from rxnorm import RxNormCUI
13 | from graphable import GraphvizGraphic
14 | 
15 | 
16 | if '__main__' == __name__:
17 | 	rxcui = sys.argv[1] if 2 == len(sys.argv) else None
18 | 	if rxcui is None:
19 | 		print('x>  Provide a RXCUI as first argument')
20 | 		sys.exit(0)
21 | 	
22 | 	rx = RxNormCUI(rxcui)
23 | 	gv = GraphvizGraphic('rxgraph.pdf')
24 | 	gv.out_dot = 'rxgraph.dot'
25 | 	gv.max_depth = 8
26 | 	gv.max_width = 15
27 | 	
28 | 	gv.write_dot_graph(rx)
29 | 	
30 | 	print('->  DOT file:   {}'.format(gv.out_dot))
31 | 	print('->  PNG graph:  {}'.format(gv.out_file))
32 | 	
33 | 	subprocess.call(['open', gv.out_file])
34 | 


--------------------------------------------------------------------------------
/rxnorm_link.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	Precompute interesting RXCUI relationships into a dictionary. Use the script
  5 | #	`rxnorm_link_run.sh` to store these dictionaries into a JSON database. See
  6 | #	that script for parameters to change.
  7 | #
  8 | #	2012-09-28	Created by Josh Mandel
  9 | #	2014-02-10	Stolen by Pascal Pfiffner
 10 | #
 11 | #	For profiling: pycallgraph graphviz -- rxnorm_link.py
 12 | 
 13 | import sys
 14 | import os.path
 15 | sys.path.insert(0, os.path.dirname(__file__))
 16 | 
 17 | import json
 18 | import signal
 19 | import logging
 20 | from datetime import datetime
 21 | 
 22 | from rxnorm import RxNorm, RxNormLookup
 23 | 
 24 | 
 25 | def doQ(rxhandle, q, p):
 26 | 	return [x[0] for x in rxhandle.fetchAll(q, p)]
 27 | 
 28 | def toBrandAndGeneric(rxhandle, rxcuis, tty):
 29 | 	ret = set()
 30 | 	for rxcui in rxcuis:
 31 | 		ret.update(doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela='tradename_of'", (rxcui,)))
 32 | 	return ret
 33 | 
 34 | def toComponents(rxhandle, rxcuis, tty):
 35 | 	ret = set()
 36 | 
 37 | 	if tty not in ("SBD", "SCD"):
 38 | 		return ret
 39 | 
 40 | 	for rxcui in rxcuis:
 41 | 		cs = doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela='consists_of'", (rxcui,))
 42 | 		for c in cs:
 43 | 			ret.update(doQ(rxhandle, "SELECT rxcui from rxnconso where rxcui=? and sab='RXNORM' and tty='SCDC'", (c,)))        
 44 | 
 45 | 	return ret
 46 | 
 47 | def toTreatmentIntents(rxhandle, rxcuis, tty):
 48 | 	ret = set()
 49 | 	for rxcui in rxcuis:
 50 | 		ret.update(toTreatmentIntents_helper(rxhandle, rxcui, tty))
 51 | 	return ret
 52 | 
 53 | def toTreatmentIntents_helper(rxhandle, rxcui, tty):
 54 | 	assert tty=='IN'
 55 | 	ret = []
 56 | 	rxauis = doQ(rxhandle, "SELECT rxaui from rxnconso where rxcui=? and tty='FN' and sab='NDFRT'", (rxcui,))
 57 | 	for rxaui in rxauis:
 58 | 		rxauis1 = doQ(rxhandle, "SELECT rxaui1 from rxnrel where rxaui2=? and rela='may_treat'", (rxaui,))
 59 | 		for rxaui1 in rxauis1:
 60 | 			name = doQ(rxhandle, "SELECT str from rxnconso where rxaui=? and tty='FN' and sab='NDFRT'", (rxaui1,))
 61 | 			name = map(lambda x: x.replace(" [Disease/Finding]", ""), name)
 62 | 			ret.extend(name)
 63 | 	return ret
 64 | 
 65 | def toMechanism(rxhandle, rxcuis, tty):
 66 | 	ret = set()
 67 | 	for v in rxcuis:
 68 | 		ret.update(toMechanism_helper(rxhandle, v, tty))
 69 | 	return ret
 70 | 
 71 | def toMechanism_helper(rxhandle, rxcui, tty):
 72 | 	assert tty=='IN'
 73 | 	ret = set()
 74 | 	rxauis = doQ(rxhandle, "SELECT rxaui from rxnconso where rxcui=? and tty='FN' and sab='NDFRT'", (rxcui,))
 75 | 	for a in rxauis:
 76 | 		a1 = doQ(rxhandle, "SELECT rxaui1 from rxnrel where rxaui2=? and rela='has_mechanism_of_action'", (a,))
 77 | 		if len(a1) > 0:
 78 | 			moa = doQ(rxhandle, "SELECT str from rxnconso where rxaui=? and tty='FN' and sab='NDFRT'", (a1[0],))
 79 | 			moa = map(lambda x: x.replace(" [MoA]", ""), moa)
 80 | 			ret.update(moa)
 81 | 	return ret
 82 | 
 83 | 
 84 | def toIngredients(rxhandle, rxcuis, tty):
 85 | 	ret = set()
 86 | 	for v in rxcuis:
 87 | 		ret.update(toIngredients_helper(rxhandle, v, tty))
 88 | 	return ret
 89 | 
 90 | def toIngredients_helper(rxhandle, rxcui, tty):
 91 | 	if 'IN' == tty:
 92 | 		return []
 93 | 	
 94 | 	# can lookup ingredient directly
 95 | 	map_direct = {
 96 | 		'MIN': 'has_part',
 97 | 		'PIN': 'form_of',
 98 | 		'BN': 'tradename_of',
 99 | 		'SCDC': 'has_ingredient',
100 | 		'SCDF': 'has_ingredient',
101 | 		'SCDG': 'has_ingredient',
102 | 	}
103 | 	
104 | 	if tty in map_direct:
105 | 		return doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela=?", (rxcui, map_direct[tty]))
106 | 	
107 | 	# indirect ingredient lookup
108 | 	map_indirect = {
109 | 		'BPCK': ('contains', 'SCD'),
110 | 		'GPCK': ('contains', 'SCD'),
111 | 		'SBD': ('tradename_of', 'SCD'),
112 | 		'SBDC': ('tradename_of', 'SCDC'),
113 | 		'SBDF': ('tradename_of', 'SCDF'),
114 | 		'SBDG': ('tradename_of', 'SCDG'),
115 | 		'SCD': ('consists_of', 'SCDC'),
116 | 	}
117 | 	
118 | 	if tty in map_indirect:
119 | 		val = map_indirect[tty]
120 | 		return toIngredients(rxhandle, doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela=?", (rxcui, val[0])), val[1])
121 | 	
122 | 	logging.warn('TTY "{}" is not mapped, skipping ingredient lookup'.format(tty))
123 | 	return []
124 | 
125 | 
126 | def initVA(rxhandle):
127 | 	""" Initializes the VA drug class cache table and inserts all known drug
128 | 	classes by looking them up in the RXNSAT table (ATN = "VA_CLASS_NAME").
129 | 	"""
130 | 	# SELECT DISTINCT tty, COUNT(tty) FROM rxnsat LEFT JOIN rxnconso AS r USING (rxcui) WHERE atn = "VA_CLASS_NAME" GROUP BY tty;
131 | 	rxhandle.execute('DROP TABLE IF EXISTS va_cache')
132 | 	rxhandle.execute('''CREATE TABLE va_cache
133 | 						(rxcui varchar UNIQUE, va text, from_rxcui varchar, rela varchar, level int)''')
134 | 	rxhandle.execute('''INSERT OR IGNORE INTO va_cache
135 | 						SELECT rxcui, atv, null, null, 0 FROM rxnsat
136 | 						WHERE atn = "VA_CLASS_NAME"''')
137 | 	rxhandle.sqlite.commit()
138 | 
139 | def traverseVA(rxhandle, rounds=3, expect=203175):
140 | 	""" Drug classes are set for a couple of different TTYs, it seems however
141 | 	most consistently to be defined on CD, SCD and AB TTYs.
142 | 	We cache the classes in va_cache and loop over rxcuis with known classes,
143 | 	applying the known classes to certain relationships.
144 | 	"""
145 | 	print("->  Starting VA class mapping")
146 | 	
147 | 	mapping = {
148 | 		'CD': [
149 | 			'has_tradename',			# > BD, SBD, ... ; tiny impact on step 2, compensated for in steps 3+
150 | 			'contained_in',				# > BPCK; tiny impact in step 2, compansated for in steps 3+
151 | 			'consists_of',				# > SCDC; big impact step 2+, starting to be compensated for in steps 5+; NOT IDEAL
152 | 			#'quantified_form',			# > SBD; no impact
153 | 		],
154 | 		'GPCK': [
155 | 			'has_tradename',			# > BPCK; small impact step 3
156 | 		],
157 | 		
158 | 		'SBD': [
159 | 			'isa',						# > SBDF; big impact step 2+, increasingly important (58% vs 75% coverage after step 5)
160 | 			'has_ingredient',			# > BN; small impact step 2+
161 | 			'tradename_of',				# > SCD; tiny impact step 2, fully compensated by step 4
162 | 			'consists_of',				# > SBDC; small impact step 4+
163 | 		],
164 | 		'SBDF': [
165 | 			#'tradename_of',			# > SCDF; no impact
166 | 			'has_ingredient',			# > BN; tiny impact step 2+
167 | 			#'inverse_isa',				# > SBD; no impact
168 | 		],
169 | 		'SBDG': [
170 | 			'has_ingredient',			# > BN; tiny impact step 2+
171 | 			#'tradename_of',			# > SCDG; no impact
172 | 		],
173 | 		'SBDC': [
174 | 			'tradename_of',				# > SCDC; tiny impact step 3, compensated by step 5
175 | 		],
176 | 		
177 | 		'SCD': [
178 | 			'isa',						# > SCDF; big impact step 2+, not compensated (59% vs 75% coverage after step 5)
179 | 			'has_quantified_form',		# > SCD; tiny impact step 2, fully compensated in step 3
180 | 			'contained_in',				# > GPCK; tiny impact steps 4+
181 | 			'has_tradename',			# > SBD; small impact steps 3+
182 | 		],
183 | 		'SCDC': [
184 | 			'constitutes',				# > SCD; big impact steps 3+ (63% vs 75% coverage after step 5)
185 | 			'has_tradename',			# > SBDC; impact in step 3, partially compensated in step 4
186 | 		],
187 | 		'SCDF': [
188 | 			'inverse_isa',				# > SCD; large impact steps 3+
189 | 		],
190 | 		'SCDG': [
191 | 			#'tradename_of',			# > SBDG; no impact
192 | 		]
193 | 	}
194 | 	
195 | 	found = set()
196 | 	per_level_sql = 'SELECT rxcui, va FROM va_cache WHERE level = ?'
197 | 	
198 | 	for l in range(0,rounds):
199 | 		i = 0
200 | 		existing = rxhandle.fetchAll(per_level_sql, (l,))
201 | 		num_drugs = len(existing)
202 | 		this_round = set();
203 | 		
204 | 		# loop all rxcuis that already have a class and walk their relationships
205 | 		for rxcui, va_imp in existing:
206 | 			found.add(rxcui)
207 | 			this_round.add(rxcui)
208 | 			vas = va_imp.split('|')
209 | 			seekRelAndStoreSameVAs(rxhandle, rxcui, set(vas), mapping, l)
210 | 			
211 | 			# progress report
212 | 			i += 1
213 | 			print('-->  Step {}  {:.1%}'.format(l+1, i / num_drugs), end="\r")
214 | 		
215 | 		# commit after every round
216 | 		rxhandle.sqlite.commit()
217 | 		print('==>  Step {}, found classes for {} of {} drugs, {:.2%} coverage'.format(l+1, len(this_round), expect, len(found) / expect))
218 | 	
219 | 	print('->  VA class mapping complete')
220 | 
221 | def seekRelAndStoreSameVAs(rxhandle, rxcui, vas, mapping, at_level=0):
222 | 	""" For the given RXCUI retrieves all relations, as defined in `mapping`,
223 | 	and updates those concepts with the drug classes passed in in `vas`.
224 | 	"""
225 | 	assert(rxcui)
226 | 	assert(len(vas) > 0)
227 | 	
228 | 	# get all possible relas by checking the concept's TTY against our mapping
229 | 	ttys = rxhandle.lookup_tty(rxcui)
230 | 	desired_relas = set()
231 | 	for tty in ttys:
232 | 		if tty in mapping:
233 | 			desired_relas.update(mapping[tty])
234 | 	if 0 == len(desired_relas):
235 | 		return
236 | 	
237 | 	# get all related rxcuis with the possible "rela" value(s)
238 | 	# Note: I had a "... AND rela IN (...)" in the following statement, but it
239 | 	# turns out just doing this in Python isn't slower and code is shorter
240 | 	rel_sql = 'SELECT DISTINCT rxcui1, rela FROM rxnrel WHERE rxcui2 = ?'
241 | 	for res in rxhandle.fetchAll(rel_sql, [rxcui]):
242 | 		if res[1] in desired_relas:
243 | 			storeVAs(rxhandle, res[0], vas, rxcui, res[1], at_level+1)
244 | 
245 | def storeVAs(rxhandle, rxcui, vas, from_rxcui, via_rela, level=0):
246 | 	""" Stores the drug classes `vas` for the given concept id, checking first
247 | 	if that concept already has classes and updating the set.
248 | 	"""
249 | 	assert(rxcui)
250 | 	assert(len(vas) > 0)
251 | 	
252 | 	# do we already have classes?
253 | 	exist_sql = 'SELECT va FROM va_cache WHERE rxcui = ?'
254 | 	exist_ret = doQ(rxhandle, exist_sql, [rxcui])
255 | 	if exist_ret and len(exist_ret) > 0:
256 | 		
257 | 		# bail out if we already have a class (!!!)
258 | 		return
259 | 		
260 | 		# split existing classes, decide if we all have them and if not, update
261 | 		exist_vas = set(exist_ret[0].split('|'))
262 | 		if vas <= exist_vas:
263 | 			return
264 | 		vas |= exist_vas
265 | 	
266 | 	# new, insert
267 | 	ins_sql = 'INSERT OR REPLACE INTO va_cache (rxcui, va, from_rxcui, rela, level) VALUES (?, ?, ?, ?, ?)'
268 | 	ins_val = '|'.join(vas)
269 | 	rxhandle.execute(ins_sql, (rxcui, ins_val, from_rxcui, via_rela, level))
270 | 
271 | def toDrugClasses(rxhandle, rxcui):
272 | 	sql = 'SELECT va FROM va_cache WHERE rxcui = ?'
273 | 	res = rxhandle.fetchOne(sql, (rxcui,))
274 | 	return res[0].split('|') if res is not None else []
275 | 
276 | 
277 | def runImport(doc_handler=None):
278 | 	""" Run the actual linking.
279 | 	
280 | 	You can provide a :class:`DocHandler` subclass which will handle the JSON
281 | 	documents, for example store them to MongoDB for the MongoDocHandler. These
282 | 	classes are defined in `rxnorm_link_run.py` for now.
283 | 	"""
284 | 	
285 | 	# install keyboard interrupt handler
286 | 	def signal_handler(signal, frame):
287 | 		print("\nx>  Aborted")
288 | 		sys.exit(0)
289 | 	signal.signal(signal.SIGINT, signal_handler)
290 | 	
291 | 	# prepare RxNorm databases
292 | 	try:
293 | 		RxNorm.check_database()
294 | 		rxhandle = RxNormLookup()
295 | 		rxhandle.prepare_to_cache_classes()
296 | 	except Exception as e:
297 | 		logging.error(e)
298 | 		sys.exit(1)
299 | 	
300 | 	# fetch rxcui's for drug-type concepts (i.e. restrict by TTY)
301 | 	drug_types = ('SCD', 'SCDC', 'SBDG', 'SBD', 'SBDC', 'BN', 'SBDF', 'SCDG', 'SCDF', 'IN', 'MIN', 'PIN', 'BPCK', 'GPCK')
302 | 	param = ', '.join(['?' for d in drug_types])
303 | 	all_sql = "SELECT RXCUI, TTY from RXNCONSO where SAB='RXNORM' and TTY in ({})".format(param)
304 | 	
305 | 	all_drugs = rxhandle.fetchAll(all_sql, drug_types)
306 | 	num_drugs = len(all_drugs)
307 | 	
308 | 	# traverse VA classes; starts the VA drug class caching process if needed,
309 | 	# which runs a minute or two
310 | 	if rxhandle.can_cache():
311 | 		initVA(rxhandle)
312 | 		traverseVA(rxhandle, rounds=5, expect=num_drugs)
313 | 	
314 | 	# loop all concepts
315 | 	i = 0
316 | 	w_ti = 0
317 | 	w_va = 0
318 | 	w_either = 0
319 | 	last_report = datetime.now()
320 | 	print('->  Indexing {} items'.format(num_drugs))
321 | 	
322 | 	for res in all_drugs:
323 | 		params = [res[0]]
324 | 		params.extend(drug_types)
325 | 		label = rxhandle.lookup_rxcui_name(res[0])				# fast (indexed column)
326 | 		ndc = rxhandle.ndc_for_rxcui(res[0])					# fast (indexed column)
327 | 		ndc = RxNorm.ndc_normalize_list(ndc)			        # fast (string permutation)
328 | 		
329 | 		# find ingredients, drug classes and more
330 | 		ingr = toIngredients(rxhandle, [res[0]], res[1])		# rather slow
331 | 		ti = toTreatmentIntents(rxhandle, ingr, 'IN')			# requires "ingr"
332 | 		va = toDrugClasses(rxhandle, res[0])					# fast, loads from our cached table
333 | 		gen = toBrandAndGeneric(rxhandle, [res[0]], res[1])		# fast
334 | 		comp = toComponents(rxhandle, [res[0]], res[1])			# fast
335 | 		mech = toMechanism(rxhandle, ingr, 'IN')				# fast
336 | 		
337 | 		# create JSON-ready dictionary (save space by not adding empty properties)
338 | 		d = {
339 | 			'rxcui': res[0],
340 | 			'tty': res[1],
341 | 			'label': label,
342 | 		}
343 | 		if len(ndc) > 0:
344 | 			d['ndc'] = list(ndc)
345 | 		
346 | 		if len(ingr) > 0:
347 | 			d['ingredients'] = list(ingr)
348 | 		if len(ti) > 0:
349 | 			d['treatmentIntents'] = list(ti)
350 | 		if len(va) > 0:
351 | 			d['drugClasses'] = list(va)
352 | 		if len(gen) > 0:
353 | 			d['generics'] = list(gen)
354 | 		if len(comp) > 0:
355 | 			d['components'] = list(comp)
356 | 		if len(mech) > 0:
357 | 			d['mechanisms'] = list(mech)
358 | 		
359 | 		# count
360 | 		i += 1
361 | 		if len(ti) > 0:
362 | 			w_ti += 1
363 | 		if len(va) > 0:
364 | 			w_va += 1
365 | 		if len(ti) > 0 or len(va) > 0:
366 | 			w_either += 1
367 | 		
368 | 		# The dictionary "d" at this point contains all the drug's precomputed
369 | 		# properties, to debug print this:
370 | 		#print(json.dumps(d, sort_keys=True, indent=2))
371 | 		if doc_handler:
372 | 			doc_handler.addDocument(d)
373 | 		
374 | 		# log progress every 2 seconds or so
375 | 		if (datetime.now() - last_report).seconds > 2:
376 | 			last_report = datetime.now()
377 | 			print('-->  {:.1%}   n: {}, ti: {}, va: {}, either: {}'.format(i / num_drugs, i, w_ti, w_va, w_either), end="\r")
378 | 	
379 | 	# loop done, finalize
380 | 	if doc_handler:
381 | 		doc_handler.finalize()
382 | 	
383 | 	print('-->  {:.1%}   n: {}, ti: {}, va: {}, either: {}'.format(i / num_drugs, i, w_ti, w_va, w_either))
384 | 	print('->  Done')
385 | 
386 | 
387 | if '__main__' == __name__:
388 | 	logging.basicConfig(level=logging.INFO)
389 | 	logging.warn('''  Running linking without document handler, meaning no RxNorm document will be stored.
390 |                Adjust and run `rxnorm_link_run.sh` for more control.''')
391 | 	runImport()
392 | 


--------------------------------------------------------------------------------
/rxnorm_link_run.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	Run this script to perform the RxNorm linking process and store the
  5 | #	documents in a database or flat file.
  6 | 
  7 | import os
  8 | import sys
  9 | import logging
 10 | 
 11 | from rxnorm_link import runImport
 12 | 
 13 | 
 14 | class DocHandler(object):
 15 | 	""" Superclass for simple database import.
 16 | 	"""
 17 | 	
 18 | 	def __init__(self):
 19 | 		self.documents = []
 20 | 	
 21 | 	def addDocument(self, doc):
 22 | 		if doc is not None:
 23 | 			self.documents.append(doc)
 24 | 	
 25 | 	def finalize(self):
 26 | 		pass
 27 | 
 28 | 
 29 | class DebugDocHandler(DocHandler):
 30 | 	""" Simply logs each new document.
 31 | 	"""
 32 | 	def addDocument(self, doc):
 33 | 		print(doc)
 34 | 	
 35 | 	def __str__(self):
 36 | 		return "Debug logger"
 37 | 
 38 | 
 39 | class SQLiteDocHandler(DocHandler):
 40 | 	""" Handles documents for storage in sqlite3
 41 | 	"""
 42 | 	
 43 | 	def __init__(self):
 44 | 		super().__init__()
 45 | 		from sqlite import SQLite
 46 | 		absolute = os.path.dirname(os.path.realpath(__file__))
 47 | 		db_file = os.environ.get('SQLITE_FILE')
 48 | 		db_file = db_file if db_file else os.path.join(absolute, 'databases/rxnorm.db')
 49 | 		self.db_file = db_file
 50 | 		self.handled = 0
 51 | 
 52 | 		self.sqlite = SQLite.get(self.db_file)
 53 | 		self.sqlite.execute('DROP TABLE IF EXISTS drug_cache')
 54 | 
 55 | 		self.sqlite.execute('''CREATE TABLE drug_cache
 56 | 						(rxcui varchar, property text, value text)''')
 57 | 
 58 | 		self.sqlite.execute('CREATE INDEX i_drug_cache ON drug_cache (rxcui, property)')
 59 | 
 60 | 		self.sqlite.execute('DROP VIEW IF EXISTS drug_treatments_by_ndc')
 61 | 		self.sqlite.execute('''CREATE VIEW drug_treatments_by_ndc as
 62 | 				select a.value as ndc, b.value as treatment_intent
 63 | 				from drug_cache a join drug_cache b on a.rxcui=b.rxcui
 64 | 				where a.property='ndc' and b.property='treatment_intent'
 65 | 				''')
 66 | 
 67 | 		self.sqlite.execute('DROP VIEW IF EXISTS drug_classes_by_ndc')
 68 | 		self.sqlite.execute('''CREATE VIEW drug_classes_by_ndc as
 69 | 				select a.value as ndc, b.value as drug_class
 70 | 				from drug_cache a join drug_cache b on a.rxcui=b.rxcui
 71 | 				where a.property='ndc' and b.property='drug_class'
 72 | 				''')
 73 | 
 74 | 		self.sqlite.execute('DROP VIEW IF EXISTS drug_ingredients_by_ndc')
 75 | 		self.sqlite.execute('''CREATE VIEW drug_ingredients_by_ndc as
 76 | 				select a.value as ndc, b.value as drug_ingredient, c.str as ingredient_name
 77 | 				from drug_cache a join drug_cache b on a.rxcui=b.rxcui
 78 | 				join RXNCONSO c on c.rxcui=b.value
 79 | 				where a.property='ndc' and b.property='ingredient'
 80 | 				and c.sab='RXNORM' and c.tty='IN'
 81 |                 ''')
 82 | 	def addDocument(self, doc):
 83 | 		rxcui =  doc.get('rxcui', '0')
 84 | 		fields = {
 85 | 			'tty': doc.get('tty', None),
 86 | 			'ndc': doc.get('ndc', None),
 87 | 			'label': doc.get('label', None),
 88 | 			'drug_class': doc.get('drugClasses', None),
 89 | 			'treatment_intent': doc.get('treatmentIntents', None),
 90 | 			'ingredient': doc.get('ingredients', None)
 91 | 			}
 92 | 		for k, v in fields.items():
 93 | 			if not v: continue
 94 | 			v = v if isinstance(v, list) else [v]
 95 | 			for vv in v:
 96 | 				self.sqlite.execute(
 97 | 					'INSERT INTO drug_cache(rxcui, property, value) values(?, ?, ?)',
 98 | 					(rxcui, k, vv))
 99 | 		self.handled += 1
100 | 		if (self.handled % 50 == 0): self.sqlite.commit()
101 | 		
102 | 	def finalize(self): 
103 | 		self.sqlite.commit()
104 | 	
105 | 	def __str__(self):
106 | 		return "SQLite import {}".format(self.db_file)
107 | 
108 | 
109 | class MongoDocHandler(DocHandler):
110 | 	""" Handles documents for storage in MongoDB.
111 | 	"""
112 | 	
113 | 	def __init__(self):
114 | 		super().__init__()
115 | 		db_host = os.environ.get('MONGO_HOST')
116 | 		db_host = db_host if db_host else 'localhost'
117 | 		db_port = int(os.environ.get('MONGO_PORT'))
118 | 		db_port = db_port if db_port else 27017
119 | 		db_name = os.environ.get('MONGO_DB')
120 | 		db_name = db_name if db_name else 'default'
121 | 		db_bucket = os.environ.get('MONGO_BUCKET')
122 | 		db_bucket = db_bucket if db_bucket else 'rxnorm'
123 | 		
124 | 		import pymongo		# imported here so it's only imported when using Mongo
125 | 		conn = pymongo.MongoClient(host=db_host, port=db_port)
126 | 		db = conn[db_name]
127 | 		
128 | 		# authenticate
129 | 		db_user = os.environ.get('MONGO_USER')
130 | 		db_pass = os.environ.get('MONGO_PASS')
131 | 		if db_user and db_pass:
132 | 			db.authenticate(db_user, db_pass)
133 | 		
134 | 		self.mng = db[db_bucket]
135 | 		self.mng.ensure_index('ndc')
136 | 		self.mng.ensure_index('label', text=pymongo.TEXT)
137 | 	
138 | 	def addDocument(self, doc):
139 | 		lbl = doc.get('label')
140 | 		if lbl and len(lbl) > 1010:			# indexed, cannot be > 1024 in total
141 | 			doc['fullLabel'] = lbl
142 | 			doc['label'] = lbl[:1010]
143 | 		
144 | 		super().addDocument(doc)
145 | 		if len(self.documents) > 50:
146 | 			self._insertAndClear()
147 | 	
148 | 	def finalize(self):
149 | 		self._insertAndClear()
150 | 	
151 | 	def _insertAndClear(self):
152 | 		if len(self.documents) > 0:
153 | 			self.mng.insert(self.documents)
154 | 			self.documents.clear()
155 | 	
156 | 	def __str__(self):
157 | 		return "MongoDB at {}".format(self.mng)
158 | 
159 | 
160 | class CSVHandler(DocHandler):
161 | 	""" Handles CSV export. """
162 | 	
163 | 	def __init__(self):
164 | 		super().__init__()
165 | 		self.csv_file = 'rxnorm.csv'
166 | 		self.csv_handle = open(self.csv_file, 'w')
167 | 		self.csv_handle.write("rxcui,tty,ndc,name,va_classes,treating,ingredients\n")
168 | 	
169 | 	def addDocument(self, doc):
170 | 		self.csv_handle.write('{},"{}","{}","{}","{}","{}","{}"{}'.format(
171 | 			doc.get('rxcui', '0'),
172 | 			doc.get('tty', ''),
173 | 			doc.get('ndc', ''),
174 | 			doc.get('label', ''),
175 | 			';'.join(doc.get('drugClasses') or []),
176 | 			';'.join(doc.get('treatmentIntents') or []),
177 | 			';'.join(doc.get('ingredients') or []),
178 | 			"\n"
179 | 		))
180 | 	
181 | 	def __str__(self):
182 | 		return 'CSV file "{}"'.format(self.csv_file)
183 | 
184 | 
185 | def runLinking(ex_type):
186 | 	""" Create the desired handler and run import.
187 | 	"""
188 | 	handler = DebugDocHandler()
189 | 	if ex_type is not None and len(ex_type) > 0:
190 | 		try:
191 | 			if 'mongo' == ex_type:
192 | 				handler = MongoDocHandler()
193 | 			elif 'couch' == ex_type:
194 | 				# import couchbase
195 | 				raise Exception('Couchbase not implemented')
196 | 			elif 'csv' == ex_type:
197 | 				handler = CSVHandler()
198 | 			elif 'sqlite' == ex_type:
199 | 				handler = SQLiteDocHandler()
200 | 			else:
201 | 				raise Exception('Unsupported export type: {}'.format(ex_type))
202 | 		except Exception as e:
203 | 			logging.error(e)
204 | 			sys.exit(1)
205 | 	
206 | 	print('->  Processing to {}'.format(handler))
207 | 	runImport(doc_handler=handler)
208 | 
209 | 
210 | if '__main__' == __name__:
211 | 	logging.basicConfig(level=logging.INFO)
212 | 	
213 | 	cmd_arg = sys.argv[1] if len(sys.argv) > 1 else None
214 | 	ex_type = os.environ.get('EXPORT_TYPE') or cmd_arg
215 | 	
216 | 	runLinking(ex_type)
217 | 
218 | 


--------------------------------------------------------------------------------
/rxnorm_link_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # to make it simple we include the variables here instead of creating yet another file
 4 | 
 5 | # export type, supported are: "csv", "mongo", "sqlite"
 6 | # if run without setting a type will simply print to console
 7 | export EXPORT_TYPE=
 8 | 
 9 | # MongoDB parameters
10 | export MONGO_HOST='localhost'
11 | export MONGO_PORT=27017
12 | export MONGO_USER=
13 | export MONGO_PASS=
14 | export MONGO_DB=
15 | export MONGO_BUCKET='rxnorm'
16 | 
17 | # SQLite parameters
18 | export SQLITE_FILE='databases/rxnorm.db'
19 | 
20 | # TODO: add a Couchbase version
21 | 
22 | # run the setup script with these environment variables
23 | python3 rxnorm_link_run.py
24 | 


--------------------------------------------------------------------------------
/rxnorm_tests.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #	RxNorm unit testing
 5 | #
 6 | #	2014-04-18	Created
 7 | 
 8 | import sys
 9 | import os.path
10 | thismodule = os.path.abspath(os.path.dirname(__file__))
11 | if thismodule not in sys.path:
12 | 	sys.path.insert(0, thismodule)
13 | 
14 | import unittest
15 | from rxnorm import RxNorm
16 | 
17 | 
18 | class RxNormTest(unittest.TestCase):
19 | 	""" Test :class:`RxNorm`.
20 | 	"""
21 | 	
22 | 	def test_ndc_normalization(self):
23 | 		""" Test NDC normalization.
24 | 		"""
25 | 		# 6-4-2
26 | 		self.assertEqual('00074148614', RxNorm.ndc_normalize('000074-1486-14'))
27 | 		self.assertEqual('51227615900', RxNorm.ndc_normalize('051227-6159-**'))
28 | 		self.assertEqual('58734000101', RxNorm.ndc_normalize('058734-0001-*1'))
29 | 		
30 | 		# 6-4-1
31 | 		self.assertEqual('00854684102', RxNorm.ndc_normalize('000854-6841-2'))
32 | 		
33 | 		# 6-4: treat as 6-4-2 with two trailing zeroes
34 | 		self.assertEqual('57982011000', RxNorm.ndc_normalize('057982-0110'))
35 | 		self.assertEqual('12579005600', RxNorm.ndc_normalize('012579-*056'))
36 | 		
37 | 		# 6-3-2
38 | 		self.assertEqual('57982012312', RxNorm.ndc_normalize('057982-123-12'))
39 | 		
40 | 		# 6-3-1
41 | 		self.assertEqual('57982098709', RxNorm.ndc_normalize('057982-987-9'))
42 | 		
43 | 		# 5-4-2
44 | 		self.assertEqual('17317093201', RxNorm.ndc_normalize('17317-0932-01'))
45 | 		
46 | 		# 5-4-1
47 | 		self.assertEqual('36987315601', RxNorm.ndc_normalize('36987-3156-1'))
48 | 		
49 | 		# 5-3-2
50 | 		self.assertEqual('24730041205', RxNorm.ndc_normalize('24730-412-05'))
51 | 		
52 | 		# 4-4-2
53 | 		self.assertEqual('00268010310', RxNorm.ndc_normalize('0268-0103-10'))
54 | 		
55 | 		# 12 digit VANDF
56 | 		self.assertEqual('03475476541', RxNorm.ndc_normalize('003475476541'))
57 | 		
58 | 		# normalized already
59 | 		self.assertEqual('04458632698', RxNorm.ndc_normalize('04458632698'))
60 | 		
61 | 		# invalid
62 | 		self.assertIsNone(RxNorm.ndc_normalize('0054478962'))
63 | 		self.assertIsNone(RxNorm.ndc_normalize('547668531244'))
64 | 		self.assertIsNone(RxNorm.ndc_normalize('0054478962796'))
65 | 		self.assertIsNone(RxNorm.ndc_normalize('0a79b2-c87-9'))
66 | 		self.assertIsNone(RxNorm.ndc_normalize('si-lly-te-st'))
67 | 		self.assertIsNone(RxNorm.ndc_normalize('just-a-rand-test-string'))
68 | 


--------------------------------------------------------------------------------
/snomed.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	SNOMED import utilities, extracted from umls.py
  5 | #
  6 | #	2014-01-20	Created by Pascal Pfiffner
  7 | #
  8 | 
  9 | import sys
 10 | import os
 11 | import csv
 12 | import logging
 13 | 
 14 | from sqlite import SQLite			# for py-umls standalone
 15 | 
 16 | 
 17 | class SNOMEDDBNotPresentException(Exception):
 18 | 	pass
 19 | 
 20 | class SNOMED(object):
 21 | 	""" A class for importing UMLS terminologies into an SQLite database.
 22 | 	"""
 23 | 	sqlite_handle = None
 24 | 	
 25 | 	@classmethod
 26 | 	def database_path(cls):
 27 | 		absolute = os.path.dirname(os.path.realpath(__file__))
 28 | 		return os.path.join(absolute, 'databases/snomed.db')
 29 | 	
 30 | 	@classmethod
 31 | 	def check_database(cls):
 32 | 		""" Check if our database is in place and if not, prompts to create it.
 33 | 		Will raise on errors!
 34 | 		
 35 | 		SNOMED: (snomed.db)
 36 | 		Read SNOMED CT from tab-separated files and create an SQLite database.
 37 | 		"""
 38 | 		snomed_db = cls.database_path()
 39 | 		if not os.path.exists(snomed_db):
 40 | 			raise SNOMEDDBNotPresentException("The SNOMED database at {} does not exist. Run the script `snomed.py`."
 41 | 				.format(os.path.abspath(snomed_db)))
 42 | 	
 43 | 	@classmethod
 44 | 	def find_needed_files(cls, snomed_dir):
 45 | 		
 46 | 		# table to file mapping
 47 | 		prefixes = {
 48 | 			'descriptions': 'sct2_Description_Full-en_',
 49 | 			'relationships': 'sct2_Relationship_Full_'
 50 | 		}
 51 | 		found = {}
 52 | 		snomed_dir = sys.argv[1]
 53 | 		
 54 | 		# try to find the files
 55 | 		for table, prefix in prefixes.items():
 56 | 			found_file = _find_files(snomed_dir, prefix)
 57 | 			if found_file is None:
 58 | 				raise Exception('Unable to locate file starting with "{}" in SNOMED directory at {}'.format(prefix, snomed_dir))
 59 | 			found[table] = found_file
 60 | 		
 61 | 		return found
 62 | 	
 63 | 	@classmethod
 64 | 	def import_from_files(cls, rx_map):
 65 | 		for table, filepath in rx_map.items():
 66 | 			num_query = 'SELECT COUNT(*) FROM {}'.format(table)
 67 | 			num_existing = cls.sqlite_handle.executeOne(num_query, ())[0]
 68 | 			if num_existing > 0:
 69 | 				continue
 70 | 			
 71 | 			cls.import_csv_into_table(filepath, table)
 72 | 	
 73 | 	@classmethod
 74 | 	def import_csv_into_table(cls, snomed_file, table_name):
 75 | 		""" Import SNOMED CSV into our SQLite database.
 76 | 		The SNOMED CSV files can be parsed by Python's CSV parser with the
 77 | 		"excel-tab" flavor.
 78 | 		"""
 79 | 		
 80 | 		logging.debug('Importing SNOMED {} into snomed.db...'.format(table_name))
 81 | 		
 82 | 		# not yet imported, parse tab-separated file and import
 83 | 		with open(snomed_file, encoding='utf-8') as csv_handle:
 84 | 			cls.sqlite_handle.isolation_level = 'EXCLUSIVE'
 85 | 			sql = cls.insert_query_for(table_name)
 86 | 			reader = csv.reader(csv_handle, dialect='excel-tab')
 87 | 			i = 0
 88 | 			try:
 89 | 				for row in reader:
 90 | 					if i > 0:			# first row is the header row
 91 | 						
 92 | 						# execute SQL (we just ignore duplicates)
 93 | 						params = cls.insert_tuple_from_csv_row_for(table_name, row)
 94 | 						try:
 95 | 							cls.sqlite_handle.execute(sql, params)
 96 | 						except Exception as e:
 97 | 							sys.exit('Cannot insert {}: {}'.format(params, e))
 98 | 					i += 1
 99 | 				
100 | 				# commit to file
101 | 				cls.sqlite_handle.commit()
102 | 				cls.did_import(table_name)
103 | 				cls.sqlite_handle.isolation_level = None
104 | 			
105 | 			except csv.Error as e:
106 | 				cls.sqlite_handle.rollback()
107 | 				sys.exit('CSV error on line {}: {}'.format(reader.line_num, e))
108 | 
109 | 		logging.debug('{} concepts parsed'.format(i-1))
110 | 
111 | 
112 | 	@classmethod
113 | 	def setup_tables(cls):
114 | 		""" Creates the SQLite tables we need, not the tables we deserve.
115 | 		Does nothing if the tables/indexes already exist
116 | 		"""
117 | 		if cls.sqlite_handle is None:
118 | 			cls.sqlite_handle = SQLite.get(cls.database_path())
119 | 		
120 | 		# descriptions
121 | 		cls.sqlite_handle.create('descriptions', '''(
122 | 				concept_id INTEGER PRIMARY KEY,
123 | 				lang TEXT,
124 | 				term TEXT,
125 | 				isa VARCHAR,
126 | 				active INT
127 | 			)''')
128 | 		
129 | 		# relationships
130 | 		cls.sqlite_handle.create('relationships', '''(
131 | 				relationship_id INTEGER PRIMARY KEY,
132 | 				source_id INT,
133 | 				destination_id INT,
134 | 				rel_type INT,
135 | 				rel_text VARCHAR,
136 | 				active INT
137 | 			)''')
138 | 	
139 | 	@classmethod
140 | 	def insert_query_for(cls, table_name):
141 | 		""" Returns the insert query needed for the given table
142 | 		"""
143 | 		if 'descriptions' == table_name:
144 | 			return '''INSERT OR IGNORE INTO descriptions
145 | 						(concept_id, lang, term, isa, active)
146 | 						VALUES
147 | 						(?, ?, ?, ?, ?)'''
148 | 		if 'relationships' == table_name:
149 | 			return '''INSERT OR IGNORE INTO relationships
150 | 						(relationship_id, source_id, destination_id, rel_type, active)
151 | 						VALUES
152 | 						(?, ?, ?, ?, ?)'''
153 | 		return None
154 | 	
155 | 	@classmethod
156 | 	def insert_tuple_from_csv_row_for(cls, table_name, row):
157 | 		if 'descriptions' == table_name:
158 | 			isa = ''
159 | 			if len(row) > 6:
160 | 				if '900000000000013009' == row[6]:
161 | 					isa = 'synonym'
162 | 				elif '900000000000003001' == row[6]:
163 | 					isa = 'full'
164 | 			return (int(row[4]), row[5], row[7], isa, int(row[2]))
165 | 		if 'relationships' == table_name:
166 | 			return (int(row[0]), int(row[4]), int(row[5]), int(row[7]), int(row[2]))
167 | 		return None
168 | 	
169 | 	@classmethod
170 | 	def did_import(cls, table_name):
171 | 		""" Allows us to set hooks after tables have been imported.
172 | 		
173 | 		Creates indexes and names `isa` and `finding_site` relationships.
174 | 		"""
175 | 		# index descriptions
176 | 		if 'descriptions' == table_name:
177 | 			print("----- DID IMPORT descriptions")
178 | 			cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS isa_index ON descriptions (isa)")
179 | 		
180 | 		# update and index relationships
181 | 		if 'relationships' == table_name:
182 | 			print("----- DID IMPORT relationships")
183 | 			cls.sqlite_handle.execute("UPDATE relationships SET rel_text = 'isa' WHERE rel_type = 116680003")
184 | 			cls.sqlite_handle.execute("UPDATE relationships SET rel_text = 'finding_site' WHERE rel_type = 363698007")
185 | 			cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS source_index ON relationships (source_id)")
186 | 			cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS destination_index ON relationships (destination_id)")
187 | 			cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_text_index ON relationships (rel_text)")
188 | 
189 | 
190 | class SNOMEDLookup(object):
191 | 	""" SNOMED lookup """
192 | 	
193 | 	sqlite = None
194 | 	
195 | 	def __init__(self):
196 | 		self.sqlite = SQLite.get(SNOMED.database_path())
197 | 	
198 | 	def lookup_code_meaning(self, snomed_id, preferred=True, no_html=True):
199 | 		""" Returns HTML for all matches of the given SNOMED id.
200 | 		The "preferred" flag here currently has no function.
201 | 		"""
202 | 		if snomed_id is None or len(snomed_id) < 1:
203 | 			return ''
204 | 		
205 | 		sql = 'SELECT term, isa, active FROM descriptions WHERE concept_id = ?'
206 | 		names = []
207 | 		
208 | 		# loop over results
209 | 		for res in self.sqlite.execute(sql, (snomed_id,)):
210 | 			if not no_html and ('synonym' == res[1] or 0 == res[2]):
211 | 				names.append("<span style=\"color:#888;\">{}</span>".format(res[0]))
212 | 			else:
213 | 				names.append(res[0])
214 | 		
215 | 		if no_html:
216 | 			return ", ".join(names) if len(names) > 0 else ''
217 | 		return "<br/>\n".join(names) if len(names) > 0 else ''
218 | 	
219 | 	def lookup_if_isa(self, child_id, parent_id, checked=None):
220 | 		""" Determines if a child concept is refining a parent concept, i.e.
221 | 		if there is a (direct or indirect) "is a" (116680003) relationship from
222 | 		child to parent.
223 | 		"""
224 | 		if not child_id or not parent_id:
225 | 			return False
226 | 		if checked is not None and child_id in checked:
227 | 			return False
228 | 		
229 | 		parents = self.lookup_parents_of(child_id)
230 | 		if parent_id in parents:
231 | 			return True
232 | 		
233 | 		chkd = checked or []
234 | 		chkd.append(child_id)
235 | 		for parent in parents:
236 | 			flag = self.lookup_if_isa(parent, parent_id, chkd)
237 | 			if flag:
238 | 				return True
239 | 		return False
240 | 	
241 | 	def lookup_parents_of(self, snomed_id):
242 | 		""" Returns a list of concept ids that have a direct "is a" (116680003)
243 | 		relationship with the given id.
244 | 		"""
245 | 		ids = []
246 | 		if snomed_id:
247 | 			#sql = 'SELECT destination_id FROM relationships WHERE source_id = ? AND rel_type = 116680003'	# Too slow!!
248 | 			sql = 'SELECT destination_id, rel_text FROM relationships WHERE source_id = ?'
249 | 			for res in self.sqlite.execute(sql, (snomed_id,)):
250 | 				if 'isa' == res[1]:
251 | 					ids.append(str(res[0]))
252 | 		return ids
253 | 
254 | 
255 | class SNOMEDConcept(object):
256 | 	""" Represents a SNOMED concept.
257 | 	"""
258 | 	uplooker = SNOMEDLookup()
259 | 	
260 | 	def __init__(self, code):
261 | 		self.code = code
262 | 		self._term = None
263 | 	
264 | 	@property
265 | 	def term(self):
266 | 		if self._term is None:
267 | 			self._term = self.__class__.uplooker.lookup_code_meaning(self.code)
268 | 		return self._term
269 | 	
270 | 	def isa(self, parent_code):
271 | 		""" Checks whether the receiver is a child of the given code.
272 | 		The `parent_code` argument can also be a :class:`SNOMEDConcept`
273 | 		instance.
274 | 		
275 | 		:returns: A bool on whether the receiver is a child of the given
276 | 		    concept
277 | 		"""
278 | 		if isinstance(parent_code, SNOMEDConcept):
279 | 			return self.__class__.uplooker.lookup_if_isa(self.code, parent_code.code)
280 | 		return self.__class__.uplooker.lookup_if_isa(self.code, parent_code)
281 | 
282 | 
283 | # find file function
284 | def _find_files(directory, prefix):
285 | 	for root, dirs, files in os.walk(directory):
286 | 		for name in files:
287 | 			if name.startswith(prefix):
288 | 				return os.path.join(directory, name)
289 | 		
290 | 		for name in dirs:
291 | 			found = _find_files(os.path.join(directory, name), prefix)
292 | 			if found:
293 | 				return found
294 | 	return None
295 | 
296 | 
297 | # running this as a script does the database setup/check
298 | if '__main__' == __name__:
299 | 	logging.basicConfig(level=logging.DEBUG)
300 | 	
301 | 	# if the database check fails, run import commands
302 | 	try:
303 | 		SNOMED.check_database()
304 | 	except SNOMEDDBNotPresentException as e:
305 | 		if len(sys.argv) < 2:
306 | 			print("Provide the path to the extracted SNOMED (RF2) directory as first argument.")
307 | 			print("Download SNOMED from http://www.nlm.nih.gov/research/umls/licensedcontent/snomedctfiles.html""")
308 | 			sys.exit(0)
309 | 		
310 | 		# import from files
311 | 		try:
312 | 			found = SNOMED.find_needed_files(sys.argv[1])
313 | 			SNOMED.sqlite_handle = None
314 | 			SNOMED.setup_tables()
315 | 			SNOMED.import_from_files(found)
316 | 		except Exception as e:
317 | 			print("SNOMED import failed: {}".format(e))
318 | 		sys.exit(0)
319 | 	
320 | 	# examples
321 | 	cpt = SNOMEDConcept('215350009')
322 | 	print('SNOMED code "{0}":  {1}'.format(cpt.code, cpt.term))
323 | 	
324 | 	cpt = SNOMEDConcept('315004001')	# -> 128462008 -> 363346000 -> 55342001 x> 215350009
325 | 	for other, expected in [('128462008', True), ('363346000', True), ('55342001', True), ('215350009', False)]:
326 | 		print('SNOMED code "{0}" refines "{1}":  {2}'.format(cpt.code, other, cpt.isa(other)))
327 | 		assert expected == cpt.isa(other), '"{0}" refines "{1}" should return {2} or the database hasn’t been set up properly'.format(cpt.code, other, 'True' if expected else 'False')
328 | 
329 | 


--------------------------------------------------------------------------------
/snomed_tests.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #	RxNorm unit testing
 5 | #
 6 | #	2014-04-18	Created
 7 | 
 8 | import sys
 9 | import os.path
10 | thismodule = os.path.abspath(os.path.dirname(__file__))
11 | if thismodule not in sys.path:
12 | 	sys.path.insert(0, thismodule)
13 | 
14 | import unittest
15 | from snomed import *
16 | 
17 | 
18 | class SNOMEDLookupTest(unittest.TestCase):
19 | 	""" Test :class:`SNOMEDLookup`.
20 | 	"""
21 | 	def setUp(self):
22 | 		SNOMED.check_database()
23 | 	
24 | 	def test_term_lookup(self):
25 | 		""" Test term lookup.
26 | 		"""
27 | 		cpt = SNOMEDConcept('215350009')
28 | 		self.assertEqual(cpt.term, 'Accident involving being caught in door of road vehicle NEC, occupant of tram injured (event)')
29 | 		cpt = SNOMEDConcept('315004001')
30 | 		self.assertEqual(cpt.term, 'Metastasis from malignant tumor of breast')
31 | 	
32 | 	def test_hierarchy_isa(self):
33 | 		""" Test hierarchical lookup.
34 | 		"""
35 | 		cpt = SNOMEDConcept('315004001')        # Metastasis from malignant tumor of breast
36 | 		child = SNOMEDConcept('128462008')      # Metastatic neoplasm (disease)
37 | 		self.assertTrue(cpt.isa(child.code))
38 | 		child = SNOMEDConcept('363346000')      # Malignant neoplastic disease (disorder)
39 | 		self.assertTrue(cpt.isa(child))
40 | 		child = SNOMEDConcept('55342001')       # Neoplasia
41 | 		self.assertTrue(cpt.isa(child.code))
42 | 		child = SNOMEDConcept('408643008')      # Infiltrating duct carcinoma of breast
43 | 		self.assertFalse(cpt.isa(child.code))
44 | 	
45 | 


--------------------------------------------------------------------------------
/sqlite.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | #	Simplifying SQLite access
  4 | #
  5 | #	2012-12-14	Created by Pascal Pfiffner
  6 | #
  7 | 
  8 | 
  9 | import sqlite3
 10 | import threading
 11 | 
 12 | 
 13 | SQLITE_INSTANCES = {}
 14 | 
 15 | 
 16 | class SQLite(object):
 17 | 	""" SQLite access
 18 | 	"""
 19 | 
 20 | 	@classmethod
 21 | 	def get(cls, database):
 22 | 		""" Use this to get SQLite instances for a given database. Avoids
 23 | 		creating multiple instances for the same database.
 24 | 		
 25 | 		We keep instances around per thread per database, maybe there should be
 26 | 		a way to turn this off. However, here we always release instances for
 27 | 		threads that are no longer alive. If this is better than just always
 28 | 		creating a new instance should be tested.
 29 | 		"""
 30 | 		
 31 | 		global SQLITE_INSTANCES
 32 | 		
 33 | 		# group per thread
 34 | 		thread_id = threading.current_thread().ident
 35 | 		if thread_id not in SQLITE_INSTANCES:
 36 | 			SQLITE_INSTANCES[thread_id] = {}
 37 | 		by_thread = SQLITE_INSTANCES[thread_id]
 38 | 		
 39 | 		# group per database
 40 | 		if database not in by_thread:
 41 | 			sql = SQLite(database)
 42 | 			by_thread[database] = sql
 43 | 		
 44 | 		# free up memory for terminated threads
 45 | 		clean = {}
 46 | 		for alive in threading.enumerate():
 47 | 			if alive.ident in SQLITE_INSTANCES:
 48 | 				clean[alive.ident] = SQLITE_INSTANCES[alive.ident]
 49 | 		SQLITE_INSTANCES = clean
 50 | 		
 51 | 		return by_thread[database]
 52 | 
 53 | 
 54 | 	def __init__(self, database=None):
 55 | 		if database is None:
 56 | 			raise Exception('No database provided')
 57 | 		
 58 | 		self.database = database
 59 | 		self.handle = None
 60 | 		self.cursor = None
 61 | 
 62 | 
 63 | 	def execute(self, sql, params=()):
 64 | 		""" Executes an SQL command and returns the cursor.execute, which can
 65 | 		be used as an iterator.
 66 | 		Supply the params as tuple, i.e. (param,) and (param1, param2, ...)
 67 | 		"""
 68 | 		if not sql or 0 == len(sql):
 69 | 			raise Exception('No SQL to execute')
 70 | 		if not self.cursor:
 71 | 			self.connect()
 72 | 		
 73 | 		return self.cursor.execute(sql, params)
 74 | 
 75 | 
 76 | 	def executeInsert(self, sql, params=()):
 77 | 		""" Executes an SQL command (should be INSERT OR REPLACE) and returns
 78 | 		the last row id, 0 on failure.
 79 | 		"""
 80 | 		if self.execute(sql, params):
 81 | 			return self.cursor.lastrowid if self.cursor.lastrowid else 0
 82 | 		
 83 | 		return 0
 84 | 
 85 | 
 86 | 	def executeUpdate(self, sql, params=()):
 87 | 		""" Executes an SQL command (should be UPDATE) and returns the number
 88 | 		of affected rows.
 89 | 		"""
 90 | 		if self.execute(sql, params):
 91 | 			return self.cursor.rowcount
 92 | 		
 93 | 		return 0
 94 | 
 95 | 
 96 | 	def executeOne(self, sql, params):
 97 | 		""" Returns the first row returned by executing the command
 98 | 		"""
 99 | 		self.execute(sql, params)
100 | 		return self.cursor.fetchone()
101 | 
102 | 
103 | 	def hasTable(self, table_name):
104 | 		""" Returns whether the given table exists. """
105 | 		sql = 'SELECT COUNT(*) FROM sqlite_master WHERE type="table" and name=?'
106 | 		ret = self.executeOne(sql, (table_name,))
107 | 		return True if ret and ret[0] > 0 else False
108 | 	
109 | 	def create(self, table_name, table_structure):
110 | 		""" Executes a CREATE TABLE IF NOT EXISTS query with the given structure.
111 | 		Input is NOT sanitized, watch it!
112 | 		"""
113 | 		create_query = 'CREATE TABLE IF NOT EXISTS %s %s' % (table_name, table_structure)
114 | 		self.execute(create_query)
115 | 		return True
116 | 
117 | 
118 | 	def commit(self):
119 | 		self.handle.commit()
120 | 
121 | 	def rollback(self):
122 | 		self.handle.rollback()
123 | 
124 | 
125 | 	def connect(self):
126 | 		if self.cursor is not None:
127 | 			return
128 | 		
129 | 		self.handle = sqlite3.connect(self.database)
130 | 		self.cursor = self.handle.cursor()
131 | 
132 | 	def close(self):
133 | 		if self.cursor is None:
134 | 			return
135 | 
136 | 		self.handle.close()
137 | 		self.cursor = None
138 | 		self.handle = None
139 | 
140 | 


--------------------------------------------------------------------------------
/umls.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	Utilities to handle UMLS
  5 | #
  6 | #	2013-01-01	Created by Pascal Pfiffner
  7 | #	2014-01-20	Extracted and converted to Python 3
  8 | #
  9 | 
 10 | 
 11 | import sys
 12 | import os.path
 13 | import logging
 14 | 
 15 | from sqlite import SQLite			# for py-umls standalone
 16 | 
 17 | 
 18 | class UMLS (object):
 19 | 	""" A class for importing UMLS terminologies into an SQLite database.
 20 | 	"""
 21 | 	
 22 | 	@classmethod
 23 | 	def check_database(cls):
 24 | 		""" Check if our database is in place and if not, prompts to import it.
 25 | 		Will raise on errors!
 26 | 		
 27 | 		UMLS: (umls.db)
 28 | 		If missing prompt to use the `umls.sh` script
 29 | 		"""
 30 | 		
 31 | 		umls_db = os.path.join('databases', 'umls.db')
 32 | 		if not os.path.exists(umls_db):
 33 | 			raise Exception("The UMLS database at {} does not exist. Run the import script `databases/umls.sh`."
 34 | 				.format(os.path.abspath(umls_db)))
 35 | 
 36 | 
 37 | 
 38 | class UMLSLookup (object):
 39 | 	""" UMLS lookup """
 40 | 	
 41 | 	sqlite = None
 42 | 	did_check_dbs = False
 43 | 	preferred_sources = ['"SNOMEDCT"', '"MTH"']	
 44 | 	
 45 | 	def __init__(self):
 46 | 		absolute = os.path.dirname(os.path.realpath(__file__))
 47 | 		self.sqlite = SQLite.get(os.path.join(absolute, 'databases/umls.db'))
 48 | 	
 49 | 	def lookup_code(self, cui, preferred=True):
 50 | 		""" Return a list with triples that contain:
 51 | 		- name
 52 | 		- source
 53 | 		- semantic type
 54 | 		by looking it up in our "descriptions" database.
 55 | 		The "preferred" settings has the effect that only names from SNOMED
 56 | 		(SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in
 57 | 		our "descriptions" table is much faster than combing through the full
 58 | 		MRCONSO table.
 59 | 		
 60 | 		:returns: A list of triples with (name, sab, sty)
 61 | 		"""
 62 | 		if cui is None or len(cui) < 1:
 63 | 			return []
 64 | 		
 65 | 		# lazy UMLS db checking
 66 | 		if not UMLSLookup.did_check_dbs:
 67 | 			UMLS.check_database()
 68 | 			UMLSLookup.did_check_dbs = True
 69 | 		
 70 | 		# take care of negations
 71 | 		negated = '-' == cui[0]
 72 | 		if negated:
 73 | 			cui = cui[1:]
 74 | 		
 75 | 		parts = cui.split('@', 1)
 76 | 		lookup_cui = parts[0]
 77 | 		
 78 | 		# STR: Name
 79 | 		# SAB: Abbreviated Source Name
 80 | 		# STY: Semantic Type
 81 | 		if preferred:
 82 | 			sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ? AND SAB IN ({})'.format(", ".join(UMLSLookup.preferred_sources))
 83 | 		else:
 84 | 			sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ?'
 85 | 		
 86 | 		# return as list
 87 | 		arr = []
 88 | 		for res in self.sqlite.execute(sql, (lookup_cui,)):
 89 | 			if negated:
 90 | 				arr.append(("[NEGATED] {}".format(res[0], res[1], res[2])))
 91 | 			else:
 92 | 				arr.append(res)
 93 | 		
 94 | 		return arr
 95 | 	
 96 | 	
 97 | 	def lookup_code_meaning(self, cui, preferred=True, no_html=True):
 98 | 		""" Return a string (an empty string if the cui is null or not found)
 99 | 		by looking it up in our "descriptions" database.
100 | 		The "preferred" settings has the effect that only names from SNOMED
101 | 		(SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in
102 | 		our "descriptions" table is much faster than combing through the full
103 | 		MRCONSO table.
104 | 		"""
105 | 		names = []
106 | 		for res in self.lookup_code(cui, preferred):
107 | 			if no_html:
108 | 				names.append("{} ({})  [{}]".format(res[0], res[1], res[2]))
109 | 			else:
110 | 				names.append("{} (<span style=\"color:#090;\">{}</span>: {})".format(res[0], res[1], res[2]))
111 | 		
112 | 		comp = ", " if no_html else "<br/>\n"
113 | 		return comp.join(names) if len(names) > 0 else ''
114 | 	
115 | 	
116 | 	def lookup_code_for_name(self, name, preferred=True):
117 | 		""" Tries to find a good concept code for the given concept name.
118 | 		
119 | 		Uses our indexed `descriptions` table.
120 | 		
121 | 		:returns: A list of triples with (cui, sab, sty)
122 | 		"""
123 | 		if name is None or len(name) < 1:
124 | 			return None
125 | 		
126 | 		# lazy UMLS db checking
127 | 		if not UMLSLookup.did_check_dbs:
128 | 			UMLS.check_database()
129 | 			UMLSLookup.did_check_dbs = True
130 | 		
131 | 		# CUI: Concept-ID
132 | 		# STR: Name
133 | 		# SAB: Abbreviated Source Name
134 | 		# STY: Semantic Type
135 | 		if preferred:
136 | 			sql = 'SELECT CUI, SAB, STY FROM descriptions WHERE STR LIKE ? AND SAB IN ({})'.format(", ".join(UMLSLookup.preferred_sources))
137 | 		else:
138 | 			sql = 'SELECT CUI, SAB, STY FROM descriptions WHERE STR LIKE ?'
139 | 		
140 | 		# return as list
141 | 		arr = []
142 | 		for res in self.sqlite.execute(sql, ('%' + name + '%',)):
143 | 			arr.append(res)
144 | 		
145 | 		return arr
146 | 
147 | 
148 | 
149 | # running this as a script does the database setup/check
150 | if '__main__' == __name__:
151 | 	UMLS.check_database()
152 | 	
153 | 	# examples
154 | 	look = UMLSLookup()
155 | 	code = 'C0002962'
156 | 	meaning = look.lookup_code_meaning(code)
157 | 	print('UMLS code "{0}":  {1}'.format(code, meaning))
158 | 	
159 | 	name = 'Pulmonary Arterial Hypertension'
160 | 	print('Search for "{}" returns:'.format(name))
161 | 	codes = look.lookup_code_for_name(name)
162 | 	for cd in codes:
163 | 		print('{}:  {}'.format(cd, look.lookup_code_meaning(cd[0])))
164 | 


--------------------------------------------------------------------------------