├── __init__.py
├── .gitignore
├── LICENSE.txt
├── rxnorm_link_run.sh
├── rxnorm_graph.py
├── docs
├── index.rst
├── py-umls.rst
├── Makefile
└── conf.py
├── snomed_tests.py
├── rxnorm_download.py
├── rxnorm_tests.py
├── csvimporter.py
├── README.md
├── loinc.py
├── databases
├── rxnorm.sh
└── umls.sh
├── sqlite.py
├── umls.py
├── graphable.py
├── rxnorm_link_run.py
├── snomed.py
├── rxnorm_link.py
└── rxnorm.py
/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os.path
3 | abspath = os.path.abspath(os.path.dirname(__file__))
4 | if abspath not in sys.path:
5 | sys.path.insert(0, abspath)
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore database files but not the import scripts
2 | databases/*.db
3 |
4 | # virtualenv
5 | env
6 |
7 | # docs
8 | docs/_build
9 |
10 | # system files
11 | .DS_Store
12 | __pycache__
13 |
14 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2015 Boston Children's Hospital
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 |
15 |
--------------------------------------------------------------------------------
/rxnorm_link_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # to make it simple we include the variables here instead of creating yet another file
4 |
5 | # export type, supported are: "csv", "mongo", "sqlite"
6 | # if run without setting a type will simply print to console
7 | export EXPORT_TYPE=
8 |
9 | # MongoDB parameters
10 | export MONGO_HOST='localhost'
11 | export MONGO_PORT=27017
12 | export MONGO_USER=
13 | export MONGO_PASS=
14 | export MONGO_DB=
15 | export MONGO_BUCKET='rxnorm'
16 |
17 | # SQLite parameters
18 | export SQLITE_FILE='databases/rxnorm.db'
19 |
20 | # TODO: add a Couchbase version
21 |
22 | # run the setup script with these environment variables
23 | python3 rxnorm_link_run.py
24 |
--------------------------------------------------------------------------------
/rxnorm_graph.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Draw an RxNorm graph for a given RxCUI.
5 | # You must have "dot" installed (Graphviz)
6 | #
7 | # 2014-02-18 Created by Pascal Pfiffner
8 |
9 | import sys
10 | import subprocess
11 |
12 | from rxnorm import RxNormCUI
13 | from graphable import GraphvizGraphic
14 |
15 |
16 | if '__main__' == __name__:
17 | rxcui = sys.argv[1] if 2 == len(sys.argv) else None
18 | if rxcui is None:
19 | print('x> Provide a RXCUI as first argument')
20 | sys.exit(0)
21 |
22 | rx = RxNormCUI(rxcui)
23 | gv = GraphvizGraphic('rxgraph.pdf')
24 | gv.out_dot = 'rxgraph.dot'
25 | gv.max_depth = 8
26 | gv.max_width = 15
27 |
28 | gv.write_dot_graph(rx)
29 |
30 | print('-> DOT file: {}'.format(gv.out_dot))
31 | print('-> PNG graph: {}'.format(gv.out_file))
32 |
33 | subprocess.call(['open', gv.out_file])
34 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. py-umls documentation master file, created by
2 | sphinx-quickstart on Fri Apr 18 20:08:31 2014.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | py-umls
7 | =======
8 |
9 | This is the documentation for `py-umls` (https://github.com/chb/py-umls), a simple Python 3 library that helps deal with RxNorm, SNOMED and UMLS resources.
10 | Development is ongoing, based on the needs of the developer, and documentation is sparse at best.
11 |
12 | This modules creates **SQLite** databases from UMLS downloads.
13 | The scripts that perform this task can be found in the `databases` directory.
14 |
15 | Contents:
16 |
17 | .. toctree::
18 | :maxdepth: 4
19 |
20 | py-umls
21 |
22 |
23 | Index & Search
24 | --------------
25 |
26 | * :ref:`genindex`
27 | * :ref:`modindex`
28 | * :ref:`search`
29 |
30 |
--------------------------------------------------------------------------------
/snomed_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # RxNorm unit testing
5 | #
6 | # 2014-04-18 Created
7 |
8 | import sys
9 | import os.path
10 | thismodule = os.path.abspath(os.path.dirname(__file__))
11 | if thismodule not in sys.path:
12 | sys.path.insert(0, thismodule)
13 |
14 | import unittest
15 | from snomed import *
16 |
17 |
18 | class SNOMEDLookupTest(unittest.TestCase):
19 | """ Test :class:`SNOMEDLookup`.
20 | """
21 | def setUp(self):
22 | SNOMED.check_database()
23 |
24 | def test_term_lookup(self):
25 | """ Test term lookup.
26 | """
27 | cpt = SNOMEDConcept('215350009')
28 | self.assertEqual(cpt.term, 'Accident involving being caught in door of road vehicle NEC, occupant of tram injured (event)')
29 | cpt = SNOMEDConcept('315004001')
30 | self.assertEqual(cpt.term, 'Metastasis from malignant tumor of breast')
31 |
32 | def test_hierarchy_isa(self):
33 | """ Test hierarchical lookup.
34 | """
35 | cpt = SNOMEDConcept('315004001') # Metastasis from malignant tumor of breast
36 | child = SNOMEDConcept('128462008') # Metastatic neoplasm (disease)
37 | self.assertTrue(cpt.isa(child.code))
38 | child = SNOMEDConcept('363346000') # Malignant neoplastic disease (disorder)
39 | self.assertTrue(cpt.isa(child))
40 | child = SNOMEDConcept('55342001') # Neoplasia
41 | self.assertTrue(cpt.isa(child.code))
42 | child = SNOMEDConcept('408643008') # Infiltrating duct carcinoma of breast
43 | self.assertFalse(cpt.isa(child.code))
44 |
45 |
--------------------------------------------------------------------------------
/docs/py-umls.rst:
--------------------------------------------------------------------------------
1 | py-umls package
2 | ===============
3 |
4 | This package contains three modules with classes useful for dealing with **RxNorm**, then a module each for UMLS and SNOMED handling.
5 |
6 |
7 | rxnorm
8 | ------
9 |
10 | Provides classes that deal with RxNorm. This is very much WiP!
11 |
12 | .. automodule:: rxnorm
13 | :members:
14 | :undoc-members:
15 | :show-inheritance:
16 |
17 | rxnorm_link
18 | -----------
19 |
20 | A script used to create JSON documents from most RxNorm concepts and store them into a NoSQL database. This is very much WiP!
21 |
22 | .. automodule:: rxnorm_link
23 | :members:
24 | :undoc-members:
25 | :show-inheritance:
26 |
27 | rxnorm_graph
28 | ------------
29 |
30 | A useful script to help visualize relationships between RxNorm concepts, starting from a given RXCUI.
31 | Just run this script in your command line and follow the leader.
32 |
33 | .. automodule:: rxnorm_graph
34 | :members:
35 | :undoc-members:
36 | :show-inheritance:
37 |
38 | umls
39 | ----
40 |
41 | Module to deal with UMLS lexica.
42 |
43 | .. automodule:: umls
44 | :members:
45 | :undoc-members:
46 | :show-inheritance:
47 |
48 | snomed
49 | ------
50 |
51 | Module to deal with the SNOMED terminology.
52 |
53 | .. automodule:: snomed
54 | :members:
55 | :undoc-members:
56 | :show-inheritance:
57 |
58 | graphable
59 | ---------
60 |
61 | Provides classes that can be used to create an interdependency graph.
62 |
63 | .. automodule:: graphable
64 | :members:
65 | :undoc-members:
66 | :show-inheritance:
67 |
68 | sqlite
69 | ------
70 |
71 | Our SQLite connection class.
72 |
73 | .. automodule:: sqlite
74 | :members:
75 | :undoc-members:
76 | :show-inheritance:
77 |
78 |
--------------------------------------------------------------------------------
/rxnorm_download.py:
--------------------------------------------------------------------------------
1 | import mechanize
2 | import zipfile
3 | import re
4 | import sys
5 | import argparse
6 |
7 | DOWNLOADS_URL = "https://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
8 | ZIP_URL = "http://download.nlm.nih.gov/umls/kss/rxnorm/RxNorm_full_%s.zip"
9 | CHUNK_SIZE= 1000
10 | LINK_PATTERN = re.compile("download.nlm.nih.gov.*full")
11 |
12 | def download_rxnorm(args):
13 |
14 | br = mechanize.Browser()
15 | br.set_handle_robots(False)
16 |
17 | if args.release:
18 | url = ZIP_URL%args.release
19 | else:
20 | br.open(DOWNLOADS_URL)
21 | url = br.links(url_regex=LINK_PATTERN).next().url
22 |
23 | print("Signing in to download %s"%(url))
24 | br.open(url)
25 |
26 | br.select_form(nr=0)
27 | br["username"] = args.username
28 | br["password"] = args.password
29 | zip_request = br.submit()
30 |
31 | try:
32 | bytes = int(zip_request.info().getheader('Content-Length'))
33 | except:
34 | print "Failed to download file. Check your credentials."
35 | sys.exit(1)
36 |
37 | with open(args.file, "wb") as outfile:
38 | while zip_request.tell() < bytes:
39 | outfile.write(zip_request.read(size=CHUNK_SIZE))
40 | read = zip_request.tell()
41 | print "\rDownload: %.2f%% of %sMB"%(
42 | read * 100.0 / bytes,
43 | bytes / 1000000),
44 |
45 | print("Extracting zip")
46 | with zipfile.ZipFile(args.file) as zf:
47 | zf.extractall()
48 |
49 | if __name__ == "__main__":
50 | parser = argparse.ArgumentParser(description='Download RxNorm Release')
51 |
52 | parser.add_argument('--username', help='UMLS username', required=True)
53 | parser.add_argument('--password', help='UMLS password', required=True)
54 | parser.add_argument(
55 | "--release",
56 | help="specify release version (e.g. '10052015'). Default: latest.",
57 | default=None)
58 | parser.add_argument(
59 | '--file',
60 | help='Where to save .zip download. Default: "rxnorm-download.zip"',
61 | default="rxnorm-download.zip")
62 |
63 | args = parser.parse_args()
64 | download_rxnorm(args)
65 |
--------------------------------------------------------------------------------
/rxnorm_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # RxNorm unit testing
5 | #
6 | # 2014-04-18 Created
7 |
8 | import sys
9 | import os.path
10 | thismodule = os.path.abspath(os.path.dirname(__file__))
11 | if thismodule not in sys.path:
12 | sys.path.insert(0, thismodule)
13 |
14 | import unittest
15 | from rxnorm import RxNorm
16 |
17 |
18 | class RxNormTest(unittest.TestCase):
19 | """ Test :class:`RxNorm`.
20 | """
21 |
22 | def test_ndc_normalization(self):
23 | """ Test NDC normalization.
24 | """
25 | # 6-4-2
26 | self.assertEqual('00074148614', RxNorm.ndc_normalize('000074-1486-14'))
27 | self.assertEqual('51227615900', RxNorm.ndc_normalize('051227-6159-**'))
28 | self.assertEqual('58734000101', RxNorm.ndc_normalize('058734-0001-*1'))
29 |
30 | # 6-4-1
31 | self.assertEqual('00854684102', RxNorm.ndc_normalize('000854-6841-2'))
32 |
33 | # 6-4: treat as 6-4-2 with two trailing zeroes
34 | self.assertEqual('57982011000', RxNorm.ndc_normalize('057982-0110'))
35 | self.assertEqual('12579005600', RxNorm.ndc_normalize('012579-*056'))
36 |
37 | # 6-3-2
38 | self.assertEqual('57982012312', RxNorm.ndc_normalize('057982-123-12'))
39 |
40 | # 6-3-1
41 | self.assertEqual('57982098709', RxNorm.ndc_normalize('057982-987-9'))
42 |
43 | # 5-4-2
44 | self.assertEqual('17317093201', RxNorm.ndc_normalize('17317-0932-01'))
45 |
46 | # 5-4-1
47 | self.assertEqual('36987315601', RxNorm.ndc_normalize('36987-3156-1'))
48 |
49 | # 5-3-2
50 | self.assertEqual('24730041205', RxNorm.ndc_normalize('24730-412-05'))
51 |
52 | # 4-4-2
53 | self.assertEqual('00268010310', RxNorm.ndc_normalize('0268-0103-10'))
54 |
55 | # 12 digit VANDF
56 | self.assertEqual('03475476541', RxNorm.ndc_normalize('003475476541'))
57 |
58 | # normalized already
59 | self.assertEqual('04458632698', RxNorm.ndc_normalize('04458632698'))
60 |
61 | # invalid
62 | self.assertIsNone(RxNorm.ndc_normalize('0054478962'))
63 | self.assertIsNone(RxNorm.ndc_normalize('547668531244'))
64 | self.assertIsNone(RxNorm.ndc_normalize('0054478962796'))
65 | self.assertIsNone(RxNorm.ndc_normalize('0a79b2-c87-9'))
66 | self.assertIsNone(RxNorm.ndc_normalize('si-lly-te-st'))
67 | self.assertIsNone(RxNorm.ndc_normalize('just-a-rand-test-string'))
68 |
--------------------------------------------------------------------------------
/csvimporter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Simple CSV importer.
5 |
6 | import re
7 | import csv
8 | import sqlite3
9 |
10 |
11 | class CSVImporter(object):
12 | """ A simple CSV to SQLite importer class.
13 |
14 | Expects a CSV file with a header row, will create a table reflecting the
15 | header row and import all rows.
16 | """
17 | _sqlite = None
18 |
19 | def __init__(self, csv_path, tablename='rows'):
20 | self.filepath = csv_path
21 | self.tablename = tablename
22 |
23 | def sqlite_handle(self, dbpath):
24 | if self._sqlite is None:
25 | self._sqlite = sqlite3.connect(dbpath)
26 | return self._sqlite
27 |
28 | def import_to(self, dbpath, csv_format='excel'):
29 | assert self.filepath
30 | assert dbpath
31 |
32 | # SQLite handling
33 | sql_handle = self.sqlite_handle(dbpath)
34 | sql_handle.isolation_level = 'EXCLUSIVE'
35 | sql_cursor = sql_handle.cursor()
36 | create_sql = 'CREATE TABLE {} '.format(self.tablename)
37 | insert_sql = 'INSERT INTO {} '.format(self.tablename)
38 | all_but_alnum = r'\W+'
39 |
40 | # loop rows
41 | with open(self.filepath, 'r') as csv_handle:
42 | reader = csv.reader(csv_handle, quotechar='"', dialect=csv_format)
43 | try:
44 | i = 0
45 | for row in reader:
46 | sql = insert_sql
47 | params = ()
48 |
49 | # first row is the header row
50 | if 0 == i:
51 | fields = []
52 | fields_create = []
53 | for field in row:
54 | field = re.sub(all_but_alnum, '', field)
55 | fields.append(field)
56 | fields_create.append('{} VARCHAR'.format(field))
57 |
58 | create_sql += "(\n\t{}\n)".format(",\n\t".join(fields_create))
59 | sql = create_sql
60 |
61 | insert_sql += '({}) VALUES ({})'.format(', '.join(fields), ', '.join(['?' for i in range(len(fields))]))
62 |
63 | # data rows
64 | else:
65 | params = tuple(row)
66 |
67 | # execute SQL statement
68 | try:
69 | sql_cursor.execute(sql, params)
70 | except Exception as e:
71 | sys.exit(u'SQL failed: %s -- %s' % (e, sql))
72 | i += 1
73 |
74 | # commit to file
75 | sql_handle.commit()
76 | sql_handle.isolation_level = None
77 |
78 | except csv.Error as e:
79 | sys.exit('CSV error on line %d: %s' % (reader.line_num, e))
80 |
81 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | UMLS for Python
2 | ===============
3 |
4 | These are basic tools to interact with UMLS lexica, namely UMLS, SNOMED and RxNorm, using Python 3 scripts.
5 | For each of the three databases there are scripts (2 Bash and 1 Python) that facilitate import of the downloaded data into a local SQLite 3 database.
6 |
7 | > You will need a UMLS license to download UMLS lexica.
8 |
9 | For a simple start, run one of the files (`umls.py`, `snomed.py`, `rxnorm.py`) in your Shell and follow the instructions.
10 | The scripts will prompt you to download and install the databases and, when completed, print a simple example lookup.
11 |
12 | There are also utility scripts that offer help for specific use cases, see below.
13 |
14 | Documentation
15 | -------------
16 |
17 | An [auto-generated documentation](http://chb.github.io/py-umls/) (via Sphinx) is available but not very exhaustive at the moment.
18 | See below for some quick examples.
19 |
20 | Usage
21 | -----
22 |
23 | More detailed instructions here:
24 |
25 | - [**RxNorm**](https://github.com/chb/py-umls/wiki/RxNorm)
26 | - [**SNOMED-CT**](https://github.com/chb/py-umls/wiki/SNOMED)
27 |
28 | There are `XYLookup` classes in each of the three files which can be used for database lookups (where `XY` stands for `UMLS`, `SNOMED` or `RxNorm`).
29 | The following example code is appended to the end of the respective scripts and will be executed if you run it in the Shell.
30 | You might want to insert `XY.check_databases()` before this code so you will get an exception if the databases haven't been set up.
31 |
32 | look_umls = UMLSLookup()
33 | code_umls = 'C0002962'
34 | meaning_umls = look_umls.lookup_code_meaning(code_umls)
35 | print('UMLS code "{0}": {1}'.format(code_umls, meaning_umls))
36 |
37 | look_snomed = SNOMEDLookup()
38 | code_snomed = '215350009'
39 | meaning_snomed = look_snomed.lookup_code_meaning(code_snomed)
40 | print('SNOMED code "{0}": {1}'.format(code_snomed, meaning_snomed))
41 |
42 | look_rxnorm = RxNormLookup()
43 | code_rxnorm = '328406'
44 | meaning_rxnorm = look_rxnorm.lookup_code_meaning(code_rxnorm, preferred=False)
45 | print('RxNorm code "{0}": {1}'.format(code_rxnorm, meaning_rxnorm))
46 |
47 | You would typically use this module as a submodule in your own project.
48 | Best add this as a _git submodule_ but that really is up to you.
49 | If you do use this module as a Python module, you can't use the name `py-umls` because it contains a dash, so you must checkout this code to a correctly named directory.
50 | I usually use `umls`.
51 |
52 | License
53 | -------
54 |
55 | This work is [Apache licensed](LICENSE.txt).
56 |
57 |
--------------------------------------------------------------------------------
/loinc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # LOINC import and lookup utilities
5 |
6 |
7 | import sys
8 | import os.path
9 | import logging
10 |
11 |
12 | class LOINCLookup(object):
13 | pass
14 |
15 |
16 | class LOINC(object):
17 | """ Class that helps with setting up a local LOINC SQLite database.
18 | """
19 |
20 | @classmethod
21 | def check_database(cls):
22 | """ Check if our database is in place and if not, prompts to create it.
23 | Will raise on errors!
24 |
25 | Reads LOINC from CSV files and create an SQLite database, if needed.
26 | """
27 |
28 | dbpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'databases/loinc.db')
29 | if not os.path.exists(dbpath):
30 | raise Exception("The LOINC database at {} does not exist. Run the script `loinc.py`."
31 | .format(dbpath))
32 |
33 | @classmethod
34 | def import_from_files(cls, dirpath):
35 | """ Imports LOINC from the downloaded CSV files.
36 | """
37 | import sqlite
38 | import csvimporter
39 |
40 | mapping = {
41 | 'loinc.csv': 'loinc',
42 | 'map_to.csv': 'map_to',
43 | 'source_organization.csv': 'sources'
44 | }
45 | dbpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'databases/loinc.db')
46 |
47 | # import
48 | for csvfile, table in mapping.items():
49 | print("Importing LOINC table {}".format(csvfile))
50 | imp = csvimporter.CSVImporter(os.path.join(dirpath, csvfile), table)
51 | imp.import_to(dbpath)
52 |
53 | # index
54 | print("Creating indexes")
55 | sql_handle = sqlite.SQLite(dbpath)
56 | sql_handle.execute('CREATE INDEX x_loinc_num_loinc ON loinc (LOINC_NUM)')
57 | sql_handle.execute('CREATE INDEX x_shortname_loinc ON loinc (SHORTNAME)')
58 | sql_handle.execute('CREATE INDEX x_long_common_name_loinc ON loinc (LONG_COMMON_NAME)')
59 |
60 |
61 |
62 | # running this as a script performs the database setup/check
63 | if '__main__' == __name__:
64 | logging.basicConfig(level=logging.DEBUG)
65 |
66 | # if the database check fails, run import commands
67 | try:
68 | LOINC.check_database()
69 | except Exception as e:
70 | csv_path = sys.argv[1] if 2 == len(sys.argv) else None
71 | if csv_path is not None and os.path.exists(csv_path):
72 | try:
73 | LOINC.import_from_files(csv_path)
74 | except Exception as e:
75 | raise Exception("SNOMED import failed: {}".format(e))
76 | else:
77 | print("Provide the path to the directory containing the LOINC CSV files as first argument.")
78 | print("Download the LOINC Table File in CSV format (free registration required) here:")
79 | print("http://loinc.org/downloads/loinc")
80 |
81 | # TODO: lookup examples
82 |
--------------------------------------------------------------------------------
/databases/rxnorm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | # create an RxNORM SQLite database (and a relations triple store).
4 | #
5 |
6 | # our SQLite database does not exist
7 | if [ ! -e rxnorm.db ]; then
8 | if [ ! -d "$1" ]; then
9 | echo "Provide the path to the RxNorm directory as first argument when invoking this script. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
10 | exit 1
11 | fi
12 | if [ ! -d "$1/rrf" ]; then
13 | echo "There is no directory named rrf in the directory you provided. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
14 | exit 1
15 | fi
16 | if ! hash sqlite3 &>/dev/null; then
17 | echo "It seems 'sqlite3' is not installed, I will need it. Aborting."
18 | exit 1
19 | fi
20 |
21 | # init the database
22 | cat "$1/scripts/mysql/Table_scripts_mysql_rxn.sql" | sqlite3 rxnorm.db
23 |
24 | # convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles)
25 | if [ ! -e "$1/rrf/RXNREL.pipe" ]; then
26 | current=$(pwd)
27 | cd "$1/rrf"
28 | echo "-> Converting RRF files for SQLite"
29 | for f in *.RRF; do
30 | sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe"
31 | done
32 | cd $current
33 | fi
34 |
35 | # import tables
36 | for f in "$1/rrf/"*.pipe; do
37 | table=$(basename ${f%.pipe})
38 | echo "-> Importing $table"
39 | sqlite3 rxnorm.db ".import '$f' '$table'"
40 | done
41 |
42 | # create an NDC table
43 | echo "-> Creating extra tables"
44 | # sqlite3 rxnorm.db "CREATE TABLE NDC AS SELECT RXCUI, ATV AS NDC FROM RXNSAT WHERE ATN = 'NDC';" # we do it in 2 steps to create the primary index column
45 | sqlite3 rxnorm.db "CREATE TABLE NDC (RXCUI INT, NDC VARCHAR);"
46 | sqlite3 rxnorm.db "INSERT INTO NDC SELECT RXCUI, ATV FROM RXNSAT WHERE ATN = 'NDC';"
47 |
48 | # create drug class tables
49 | sqlite3 rxnorm.db "CREATE TABLE VA_DRUG_CLASS (RXCUI int, RXCUI_ORIGINAL int, VA varchar);"
50 | sqlite3 rxnorm.db "CREATE TABLE FRIENDLY_CLASS_NAMES (VACODE varchar, FRIENDLY varchar);"
51 | sqlite3 rxnorm.db "CREATE INDEX X_FRIENDLY_CLASS_NAMES_VACODE ON FRIENDLY_CLASS_NAMES (VACODE);"
52 |
53 | # create indices
54 | echo "-> Indexing NDC table"
55 | sqlite3 rxnorm.db "CREATE INDEX X_NDC_RXCUI ON NDC (RXCUI);"
56 | sqlite3 rxnorm.db "CREATE INDEX X_NDC_NDC ON NDC (NDC);"
57 |
58 | echo "-> Indexing RXNSAT table"
59 | sqlite3 rxnorm.db "CREATE INDEX RXNSAT_RXCUI ON RXNSAT (RXCUI);"
60 | sqlite3 rxnorm.db "CREATE INDEX RXNSAT_ATN ON RXNSAT (ATN);"
61 |
62 | echo "-> Indexing RXNREL table"
63 | sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RXCUI1 ON RXNREL (RXCUI1);"
64 | sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RXCUI2 ON RXNREL (RXCUI2);"
65 | sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RXAUI2 ON RXNREL (RXAUI2);"
66 | #sqlite3 rxnorm.db "CREATE INDEX X_RXNREL_RELA ON RXNREL (RELA);" # do NOT do this! slows down queries dramatically
67 |
68 | echo "-> Indexing RXNCONSO table"
69 | sqlite3 rxnorm.db "CREATE INDEX X_RXNCONSO_RXCUI ON RXNCONSO (RXCUI);"
70 | sqlite3 rxnorm.db "CREATE INDEX X_RXNCONSO_RXAUI ON RXNCONSO (RXAUI);"
71 |
72 | # How to export from SQLite: export NDC to CSV
73 | # .mode csv
74 | # .header on
75 | # .out va-class.csv
76 | # SELECT RXCUI, NDC FROM NDC;
77 | # SELECT DISTINCT ATV FROM RXNSAT WHERE ATN = 'VA_CLASS_NAME' ORDER BY ATV ASC;
78 | fi
79 |
80 |
--------------------------------------------------------------------------------
/sqlite.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Simplifying SQLite access
4 | #
5 | # 2012-12-14 Created by Pascal Pfiffner
6 | #
7 |
8 |
9 | import sqlite3
10 | import threading
11 |
12 |
13 | SQLITE_INSTANCES = {}
14 |
15 |
16 | class SQLite(object):
17 | """ SQLite access
18 | """
19 |
20 | @classmethod
21 | def get(cls, database):
22 | """ Use this to get SQLite instances for a given database. Avoids
23 | creating multiple instances for the same database.
24 |
25 | We keep instances around per thread per database, maybe there should be
26 | a way to turn this off. However, here we always release instances for
27 | threads that are no longer alive. If this is better than just always
28 | creating a new instance should be tested.
29 | """
30 |
31 | global SQLITE_INSTANCES
32 |
33 | # group per thread
34 | thread_id = threading.current_thread().ident
35 | if thread_id not in SQLITE_INSTANCES:
36 | SQLITE_INSTANCES[thread_id] = {}
37 | by_thread = SQLITE_INSTANCES[thread_id]
38 |
39 | # group per database
40 | if database not in by_thread:
41 | sql = SQLite(database)
42 | by_thread[database] = sql
43 |
44 | # free up memory for terminated threads
45 | clean = {}
46 | for alive in threading.enumerate():
47 | if alive.ident in SQLITE_INSTANCES:
48 | clean[alive.ident] = SQLITE_INSTANCES[alive.ident]
49 | SQLITE_INSTANCES = clean
50 |
51 | return by_thread[database]
52 |
53 |
54 | def __init__(self, database=None):
55 | if database is None:
56 | raise Exception('No database provided')
57 |
58 | self.database = database
59 | self.handle = None
60 | self.cursor = None
61 |
62 |
63 | def execute(self, sql, params=()):
64 | """ Executes an SQL command and returns the cursor.execute, which can
65 | be used as an iterator.
66 | Supply the params as tuple, i.e. (param,) and (param1, param2, ...)
67 | """
68 | if not sql or 0 == len(sql):
69 | raise Exception('No SQL to execute')
70 | if not self.cursor:
71 | self.connect()
72 |
73 | return self.cursor.execute(sql, params)
74 |
75 |
76 | def executeInsert(self, sql, params=()):
77 | """ Executes an SQL command (should be INSERT OR REPLACE) and returns
78 | the last row id, 0 on failure.
79 | """
80 | if self.execute(sql, params):
81 | return self.cursor.lastrowid if self.cursor.lastrowid else 0
82 |
83 | return 0
84 |
85 |
86 | def executeUpdate(self, sql, params=()):
87 | """ Executes an SQL command (should be UPDATE) and returns the number
88 | of affected rows.
89 | """
90 | if self.execute(sql, params):
91 | return self.cursor.rowcount
92 |
93 | return 0
94 |
95 |
96 | def executeOne(self, sql, params):
97 | """ Returns the first row returned by executing the command
98 | """
99 | self.execute(sql, params)
100 | return self.cursor.fetchone()
101 |
102 |
103 | def hasTable(self, table_name):
104 | """ Returns whether the given table exists. """
105 | sql = 'SELECT COUNT(*) FROM sqlite_master WHERE type="table" and name=?'
106 | ret = self.executeOne(sql, (table_name,))
107 | return True if ret and ret[0] > 0 else False
108 |
109 | def create(self, table_name, table_structure):
110 | """ Executes a CREATE TABLE IF NOT EXISTS query with the given structure.
111 | Input is NOT sanitized, watch it!
112 | """
113 | create_query = 'CREATE TABLE IF NOT EXISTS %s %s' % (table_name, table_structure)
114 | self.execute(create_query)
115 | return True
116 |
117 |
118 | def commit(self):
119 | self.handle.commit()
120 |
121 | def rollback(self):
122 | self.handle.rollback()
123 |
124 |
125 | def connect(self):
126 | if self.cursor is not None:
127 | return
128 |
129 | self.handle = sqlite3.connect(self.database)
130 | self.cursor = self.handle.cursor()
131 |
132 | def close(self):
133 | if self.cursor is None:
134 | return
135 |
136 | self.handle.close()
137 | self.cursor = None
138 | self.handle = None
139 |
140 |
--------------------------------------------------------------------------------
/databases/umls.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | # create a UMLS SQLite database.
4 | #
5 |
6 | # our SQLite database does not exist
7 | if [ ! -e umls.db ]; then
8 | if [ ! -d "$1" ]; then
9 | echo "Provide the path to the UMLS install directory, which is named something like \"2014AA\" and contains a \"META\" directory, as first argument when invoking this script."
10 | echo
11 | echo "Downloading and Extracting UMLS Data"
12 | echo "===================================="
13 | echo
14 | echo "Downloading and extracting UMLS data is a painful process."
15 | echo "Begin by downloading most files for the latest version listed on the left side here: http://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html"
16 | echo "Extract 'mmsys.zip' and place every downloaded file, including 'mmsys.zip', into the extracted directory."
17 | echo "Run the respective 'runXX' script inside the mmsys directory; the MetamorphoSys Java GUI will open."
18 | echo "Click \"Install UMLS\", as source directory select the just extracted mmsys directory and your chosen target directory."
19 | echo "Leave the checkboxes alone and click OK."
20 | echo "Now you must generate a configuration and in order to be able to proceed, save the configuration via a command from the menu bar."
21 | echo "Then select \"Begin Subset\", also from the menubar, to start the extraction process."
22 | echo "This should extract all the things and put in in the selected directory, which now contains a META directory with all the files we need to proceed."
23 | echo
24 | echo "Once you have done this, run this script again with the correct path as the first argument."
25 | exit 1
26 | fi
27 | if [ ! -d "$1/META" ]; then
28 | echo "There is no directory named META in the install directory you provided."
29 | echo "Point this script to the directory named something like \"2014AA\"."
30 | exit 1
31 | fi
32 |
33 | # convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles)
34 | if [ ! -e "$1/META/MRDEF.pipe" ]; then
35 | current=$(pwd)
36 | cd "$1/META"
37 | echo "-> Converting RRF files for SQLite"
38 | for f in MRCONSO.RRF MRDEF.RRF MRSTY.RRF; do
39 | sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe"
40 | done
41 | cd $current
42 | fi
43 |
44 | # init the database for MRDEF
45 | # table structure here: http://www.ncbi.nlm.nih.gov/books/NBK9685/
46 | sqlite3 umls.db "CREATE TABLE MRDEF (
47 | CUI varchar,
48 | AUI varchar,
49 | ATUI varchar,
50 | SATUI varchar,
51 | SAB varchar,
52 | DEF text,
53 | SUPPRESS varchar,
54 | CVF varchar
55 | )"
56 |
57 | # init the database for MRCONSO
58 | sqlite3 umls.db "CREATE TABLE MRCONSO (
59 | CUI varchar,
60 | LAT varchar,
61 | TS varchar,
62 | LUI varchar,
63 | STT varchar,
64 | SUI varchar,
65 | ISPREF varchar,
66 | AUI varchar,
67 | SAUI varchar,
68 | SCUI varchar,
69 | SDUI varchar,
70 | SAB varchar,
71 | TTY varchar,
72 | CODE varchar,
73 | STR text,
74 | SRL varchar,
75 | SUPPRESS varchar,
76 | CVF varchar
77 | )"
78 |
79 | # init the database for MRSTY
80 | sqlite3 umls.db "CREATE TABLE MRSTY (
81 | CUI varchar,
82 | TUI varchar,
83 | STN varchar,
84 | STY text,
85 | ATUI varchar,
86 | CVF varchar
87 | )"
88 |
89 | # import tables
90 | for f in "$1/META/"*.pipe; do
91 | table=$(basename ${f%.pipe})
92 | echo "-> Importing $table"
93 | sqlite3 umls.db ".import '$f' '$table'"
94 | done
95 |
96 | # create indexes
97 | echo "-> Creating indexes"
98 | sqlite3 umls.db "CREATE INDEX X_CUI_MRDEF ON MRDEF (CUI);"
99 | sqlite3 umls.db "CREATE INDEX X_SAB_MRDEF ON MRDEF (SAB);"
100 | sqlite3 umls.db "CREATE INDEX X_CUI_MRCONSO ON MRCONSO (CUI);"
101 | sqlite3 umls.db "CREATE INDEX X_LAT_MRCONSO ON MRCONSO (LAT);"
102 | sqlite3 umls.db "CREATE INDEX X_TS_MRCONSO ON MRCONSO (TS);"
103 | sqlite3 umls.db "CREATE INDEX X_CUI_MRSTY ON MRSTY (CUI);"
104 | sqlite3 umls.db "CREATE INDEX X_TUI_MRSTY ON MRSTY (TUI);"
105 |
106 | # create faster lookup table
107 | echo "-> Creating fast lookup table"
108 | sqlite3 umls.db "CREATE TABLE descriptions AS SELECT CUI, LAT, SAB, TTY, STR FROM MRCONSO WHERE LAT = 'ENG' AND TS = 'P' AND ISPREF = 'Y'"
109 | sqlite3 umls.db "ALTER TABLE descriptions ADD COLUMN STY TEXT"
110 | sqlite3 umls.db "CREATE INDEX X_CUI_desc ON descriptions (CUI)"
111 | sqlite3 umls.db "UPDATE descriptions SET STY = (SELECT GROUP_CONCAT(MRSTY.TUI, '|') FROM MRSTY WHERE MRSTY.CUI = descriptions.CUI GROUP BY MRSTY.CUI)"
112 | else
113 | echo "=> umls.db already exists"
114 | fi
115 |
116 |
--------------------------------------------------------------------------------
/umls.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Utilities to handle UMLS
5 | #
6 | # 2013-01-01 Created by Pascal Pfiffner
7 | # 2014-01-20 Extracted and converted to Python 3
8 | #
9 |
10 |
11 | import sys
12 | import os.path
13 | import logging
14 |
15 | from sqlite import SQLite # for py-umls standalone
16 |
17 |
18 | class UMLS (object):
19 | """ A class for importing UMLS terminologies into an SQLite database.
20 | """
21 |
22 | @classmethod
23 | def check_database(cls):
24 | """ Check if our database is in place and if not, prompts to import it.
25 | Will raise on errors!
26 |
27 | UMLS: (umls.db)
28 | If missing prompt to use the `umls.sh` script
29 | """
30 |
31 | umls_db = os.path.join('databases', 'umls.db')
32 | if not os.path.exists(umls_db):
33 | raise Exception("The UMLS database at {} does not exist. Run the import script `databases/umls.sh`."
34 | .format(os.path.abspath(umls_db)))
35 |
36 |
37 |
38 | class UMLSLookup (object):
39 | """ UMLS lookup """
40 |
41 | sqlite = None
42 | did_check_dbs = False
43 | preferred_sources = ['"SNOMEDCT"', '"MTH"']
44 |
45 | def __init__(self):
46 | absolute = os.path.dirname(os.path.realpath(__file__))
47 | self.sqlite = SQLite.get(os.path.join(absolute, 'databases/umls.db'))
48 |
49 | def lookup_code(self, cui, preferred=True):
50 | """ Return a list with triples that contain:
51 | - name
52 | - source
53 | - semantic type
54 | by looking it up in our "descriptions" database.
55 | The "preferred" settings has the effect that only names from SNOMED
56 | (SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in
57 | our "descriptions" table is much faster than combing through the full
58 | MRCONSO table.
59 |
60 | :returns: A list of triples with (name, sab, sty)
61 | """
62 | if cui is None or len(cui) < 1:
63 | return []
64 |
65 | # lazy UMLS db checking
66 | if not UMLSLookup.did_check_dbs:
67 | UMLS.check_database()
68 | UMLSLookup.did_check_dbs = True
69 |
70 | # take care of negations
71 | negated = '-' == cui[0]
72 | if negated:
73 | cui = cui[1:]
74 |
75 | parts = cui.split('@', 1)
76 | lookup_cui = parts[0]
77 |
78 | # STR: Name
79 | # SAB: Abbreviated Source Name
80 | # STY: Semantic Type
81 | if preferred:
82 | sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ? AND SAB IN ({})'.format(", ".join(UMLSLookup.preferred_sources))
83 | else:
84 | sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ?'
85 |
86 | # return as list
87 | arr = []
88 | for res in self.sqlite.execute(sql, (lookup_cui,)):
89 | if negated:
90 | arr.append(("[NEGATED] {}".format(res[0], res[1], res[2])))
91 | else:
92 | arr.append(res)
93 |
94 | return arr
95 |
96 |
97 | def lookup_code_meaning(self, cui, preferred=True, no_html=True):
98 | """ Return a string (an empty string if the cui is null or not found)
99 | by looking it up in our "descriptions" database.
100 | The "preferred" settings has the effect that only names from SNOMED
101 | (SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in
102 | our "descriptions" table is much faster than combing through the full
103 | MRCONSO table.
104 | """
105 | names = []
106 | for res in self.lookup_code(cui, preferred):
107 | if no_html:
108 | names.append("{} ({}) [{}]".format(res[0], res[1], res[2]))
109 | else:
110 | names.append("{} ({}: {})".format(res[0], res[1], res[2]))
111 |
112 | comp = ", " if no_html else "
\n"
113 | return comp.join(names) if len(names) > 0 else ''
114 |
115 |
116 | def lookup_code_for_name(self, name, preferred=True):
117 | """ Tries to find a good concept code for the given concept name.
118 |
119 | Uses our indexed `descriptions` table.
120 |
121 | :returns: A list of triples with (cui, sab, sty)
122 | """
123 | if name is None or len(name) < 1:
124 | return None
125 |
126 | # lazy UMLS db checking
127 | if not UMLSLookup.did_check_dbs:
128 | UMLS.check_database()
129 | UMLSLookup.did_check_dbs = True
130 |
131 | # CUI: Concept-ID
132 | # STR: Name
133 | # SAB: Abbreviated Source Name
134 | # STY: Semantic Type
135 | if preferred:
136 | sql = 'SELECT CUI, SAB, STY FROM descriptions WHERE STR LIKE ? AND SAB IN ({})'.format(", ".join(UMLSLookup.preferred_sources))
137 | else:
138 | sql = 'SELECT CUI, SAB, STY FROM descriptions WHERE STR LIKE ?'
139 |
140 | # return as list
141 | arr = []
142 | for res in self.sqlite.execute(sql, ('%' + name + '%',)):
143 | arr.append(res)
144 |
145 | return arr
146 |
147 |
148 |
149 | # running this as a script does the database setup/check
150 | if '__main__' == __name__:
151 | UMLS.check_database()
152 |
153 | # examples
154 | look = UMLSLookup()
155 | code = 'C0002962'
156 | meaning = look.lookup_code_meaning(code)
157 | print('UMLS code "{0}": {1}'.format(code, meaning))
158 |
159 | name = 'Pulmonary Arterial Hypertension'
160 | print('Search for "{}" returns:'.format(name))
161 | codes = look.lookup_code_for_name(name)
162 | for cd in codes:
163 | print('{}: {}'.format(cd, look.lookup_code_meaning(cd[0])))
164 |
--------------------------------------------------------------------------------
/graphable.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Graphable objects for fun
5 | #
6 | # 2014-02-18 Created by Pascal Pfiffner
7 |
8 | import os
9 | import uuid
10 | import subprocess
11 | import tempfile
12 |
13 |
14 | class GraphableObject (object):
15 | _name = None # The name uniquely identifying the object
16 | label = None # The label to show in place of the name
17 | shape = None
18 | style = None
19 | color = None
20 | announced_via = None
21 |
22 | def __init__(self, name, label=None):
23 | self._name = name if name else 'o' + uuid.uuid4().hex
24 | self.label = label
25 |
26 | @property
27 | def name(self):
28 | return self._name if self._name else 'unnamed'
29 |
30 | def inner_dot(self):
31 | if self.label or self.style or self.color or self.shape:
32 | inner = []
33 | if self.shape:
34 | inner.append("shape={}".format(self.shape))
35 | if self.style:
36 | inner.append("style={}".format(self.style))
37 | if self.color:
38 | inner.append("color={}".format(self.color))
39 | if self.label:
40 | inner.append('label="{}"'.format(self.label))
41 | return "[{}]".format(','.join(inner))
42 | return None
43 |
44 | def dot_representation(self):
45 | inner = self.inner_dot()
46 | if inner:
47 | return "\t{} {};\n".format(self.name, inner)
48 | return "\t{};\n".format(self.name)
49 |
50 | def announce_to(self, dot_context, via=None):
51 | """ Announce the receiver to the context.
52 |
53 | Subclasses MUST NOT announce other graphable objects they are holding
54 | on to here but they MUST announce them in "deliver_to" if appropriate.
55 |
56 | - dot_context The context to announce to
57 | - via If not-None the other GraphableObject that is responsible for
58 | announcing the receiver
59 | """
60 | self.announced_via = via
61 | dot_context.announce(self)
62 |
63 | def deliver_to(self, dot_context, is_leaf):
64 | """ Call the context's "deliver" method.
65 |
66 | This method is guaranteed to only be called once per context. Hence
67 | subclasses that hold on to other graphable objects MUST ANNOUNCE those
68 | instances here (but NOT deliver them) but ONLY IF "is_leaf" is not True.
69 | - dot_context The context to deliver to
70 | - is_leaf If True means the receiver is intended to be a leaf object
71 | """
72 | dot_context.deliver(self)
73 |
74 |
75 | class GraphableRelation (GraphableObject):
76 | relation_from = None # first GraphableObject instance
77 | relation_to = None # second GraphableObject instance
78 |
79 | def __init__(self, rel_from, label, rel_to):
80 | name = "{}->{}".format(rel_from.name, rel_to.name)
81 | super().__init__(name, label)
82 | self.relation_from = rel_from
83 | self.relation_to = rel_to
84 |
85 | def dot_representation(self):
86 | if self.relation_to:
87 | return "\t{} -> {} {};\n".format(
88 | self.relation_from.name,
89 | self.relation_to.name,
90 | self.inner_dot() or ''
91 | )
92 | return ''
93 |
94 | def deliver_to(self, dot_context, is_leaf):
95 | self.relation_from.announce_to(dot_context, self)
96 | self.relation_to.announce_to(dot_context, self)
97 | super().deliver_to(dot_context, is_leaf) # deliver after announcing our nodes!
98 |
99 |
100 | class DotContext (object):
101 | items = None
102 | source = None
103 | depth = 0
104 | max_depth = 8 # there is something fishy still, make this double the tree depth you want
105 | max_width = 15 # pass to graphable objects, they will decide what to do with this
106 |
107 | def __init__(self, max_depth=None, max_width=None):
108 | self.items = set()
109 | self.source = ''
110 | self.depth = 0
111 | if max_depth is not None:
112 | self.max_depth = max_depth
113 | if max_width is not None:
114 | self.max_width = max_width
115 |
116 | def announce(self, obj):
117 | if obj.name not in self.items:
118 | self.items.add(obj.name)
119 |
120 | self.depth += 1
121 | obj.deliver_to(self, self.depth > self.max_depth)
122 | self.depth -= 1
123 |
124 | def deliver(self, obj):
125 | self.source += obj.dot_representation()
126 |
127 | def get(self):
128 | return self.source
129 |
130 |
131 | class GraphvizGraphic (object):
132 | cmd = 'dot'
133 | out_dot = None
134 | out_type = 'pdf'
135 | out_file = None
136 | max_depth = None
137 | max_width = None
138 |
139 | def __init__(self, out_file='rxgraph.png'):
140 | self.out_file = out_file
141 |
142 | def executableCommand(self, infile):
143 | return [
144 | self.cmd,
145 | '-T{}'.format(self.out_type),
146 | infile,
147 | '-o', format(self.out_file),
148 | ]
149 |
150 | def write_dot_graph(self, obj):
151 | if self.out_file is None:
152 | raise Exception('Please assign an output filename to "out_file"')
153 |
154 | context = DotContext(max_depth=self.max_depth, max_width=self.max_width)
155 | obj.announce_to(context)
156 | source = """digraph G {{
157 | ranksep=equally;\n{}}}\n""".format(context.get())
158 |
159 | # write to a temporary file
160 | filedesc, tmpname = tempfile.mkstemp()
161 | with os.fdopen(filedesc, 'w') as handle:
162 | handle.write(source)
163 |
164 | # execute command
165 | cmd = self.executableCommand(tmpname)
166 | ret = subprocess.call(cmd)
167 |
168 | if self.out_dot:
169 | os.rename(tmpname, self.out_dot)
170 | else:
171 | os.unlink(tmpname)
172 |
173 | if ret > 0:
174 | raise Exception('Failed executing: "{}"'.format(' '.join(cmd)))
175 |
176 |
--------------------------------------------------------------------------------
/rxnorm_link_run.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Run this script to perform the RxNorm linking process and store the
5 | # documents in a database or flat file.
6 |
7 | import os
8 | import sys
9 | import logging
10 |
11 | from rxnorm_link import runImport
12 |
13 |
14 | class DocHandler(object):
15 | """ Superclass for simple database import.
16 | """
17 |
18 | def __init__(self):
19 | self.documents = []
20 |
21 | def addDocument(self, doc):
22 | if doc is not None:
23 | self.documents.append(doc)
24 |
25 | def finalize(self):
26 | pass
27 |
28 |
29 | class DebugDocHandler(DocHandler):
30 | """ Simply logs each new document.
31 | """
32 | def addDocument(self, doc):
33 | print(doc)
34 |
35 | def __str__(self):
36 | return "Debug logger"
37 |
38 |
39 | class SQLiteDocHandler(DocHandler):
40 | """ Handles documents for storage in sqlite3
41 | """
42 |
43 | def __init__(self):
44 | super().__init__()
45 | from sqlite import SQLite
46 | absolute = os.path.dirname(os.path.realpath(__file__))
47 | db_file = os.environ.get('SQLITE_FILE')
48 | db_file = db_file if db_file else os.path.join(absolute, 'databases/rxnorm.db')
49 | self.db_file = db_file
50 | self.handled = 0
51 |
52 | self.sqlite = SQLite.get(self.db_file)
53 | self.sqlite.execute('DROP TABLE IF EXISTS drug_cache')
54 |
55 | self.sqlite.execute('''CREATE TABLE drug_cache
56 | (rxcui varchar, property text, value text)''')
57 |
58 | self.sqlite.execute('CREATE INDEX i_drug_cache ON drug_cache (rxcui, property)')
59 |
60 | self.sqlite.execute('DROP VIEW IF EXISTS drug_treatments_by_ndc')
61 | self.sqlite.execute('''CREATE VIEW drug_treatments_by_ndc as
62 | select a.value as ndc, b.value as treatment_intent
63 | from drug_cache a join drug_cache b on a.rxcui=b.rxcui
64 | where a.property='ndc' and b.property='treatment_intent'
65 | ''')
66 |
67 | self.sqlite.execute('DROP VIEW IF EXISTS drug_classes_by_ndc')
68 | self.sqlite.execute('''CREATE VIEW drug_classes_by_ndc as
69 | select a.value as ndc, b.value as drug_class
70 | from drug_cache a join drug_cache b on a.rxcui=b.rxcui
71 | where a.property='ndc' and b.property='drug_class'
72 | ''')
73 |
74 | self.sqlite.execute('DROP VIEW IF EXISTS drug_ingredients_by_ndc')
75 | self.sqlite.execute('''CREATE VIEW drug_ingredients_by_ndc as
76 | select a.value as ndc, b.value as drug_ingredient, c.str as ingredient_name
77 | from drug_cache a join drug_cache b on a.rxcui=b.rxcui
78 | join RXNCONSO c on c.rxcui=b.value
79 | where a.property='ndc' and b.property='ingredient'
80 | and c.sab='RXNORM' and c.tty='IN'
81 | ''')
82 | def addDocument(self, doc):
83 | rxcui = doc.get('rxcui', '0')
84 | fields = {
85 | 'tty': doc.get('tty', None),
86 | 'ndc': doc.get('ndc', None),
87 | 'label': doc.get('label', None),
88 | 'drug_class': doc.get('drugClasses', None),
89 | 'treatment_intent': doc.get('treatmentIntents', None),
90 | 'ingredient': doc.get('ingredients', None)
91 | }
92 | for k, v in fields.items():
93 | if not v: continue
94 | v = v if isinstance(v, list) else [v]
95 | for vv in v:
96 | self.sqlite.execute(
97 | 'INSERT INTO drug_cache(rxcui, property, value) values(?, ?, ?)',
98 | (rxcui, k, vv))
99 | self.handled += 1
100 | if (self.handled % 50 == 0): self.sqlite.commit()
101 |
102 | def finalize(self):
103 | self.sqlite.commit()
104 |
105 | def __str__(self):
106 | return "SQLite import {}".format(self.db_file)
107 |
108 |
109 | class MongoDocHandler(DocHandler):
110 | """ Handles documents for storage in MongoDB.
111 | """
112 |
113 | def __init__(self):
114 | super().__init__()
115 | db_host = os.environ.get('MONGO_HOST')
116 | db_host = db_host if db_host else 'localhost'
117 | db_port = int(os.environ.get('MONGO_PORT'))
118 | db_port = db_port if db_port else 27017
119 | db_name = os.environ.get('MONGO_DB')
120 | db_name = db_name if db_name else 'default'
121 | db_bucket = os.environ.get('MONGO_BUCKET')
122 | db_bucket = db_bucket if db_bucket else 'rxnorm'
123 |
124 | import pymongo # imported here so it's only imported when using Mongo
125 | conn = pymongo.MongoClient(host=db_host, port=db_port)
126 | db = conn[db_name]
127 |
128 | # authenticate
129 | db_user = os.environ.get('MONGO_USER')
130 | db_pass = os.environ.get('MONGO_PASS')
131 | if db_user and db_pass:
132 | db.authenticate(db_user, db_pass)
133 |
134 | self.mng = db[db_bucket]
135 | self.mng.ensure_index('ndc')
136 | self.mng.ensure_index('label', text=pymongo.TEXT)
137 |
138 | def addDocument(self, doc):
139 | lbl = doc.get('label')
140 | if lbl and len(lbl) > 1010: # indexed, cannot be > 1024 in total
141 | doc['fullLabel'] = lbl
142 | doc['label'] = lbl[:1010]
143 |
144 | super().addDocument(doc)
145 | if len(self.documents) > 50:
146 | self._insertAndClear()
147 |
148 | def finalize(self):
149 | self._insertAndClear()
150 |
151 | def _insertAndClear(self):
152 | if len(self.documents) > 0:
153 | self.mng.insert(self.documents)
154 | self.documents.clear()
155 |
156 | def __str__(self):
157 | return "MongoDB at {}".format(self.mng)
158 |
159 |
160 | class CSVHandler(DocHandler):
161 | """ Handles CSV export. """
162 |
163 | def __init__(self):
164 | super().__init__()
165 | self.csv_file = 'rxnorm.csv'
166 | self.csv_handle = open(self.csv_file, 'w')
167 | self.csv_handle.write("rxcui,tty,ndc,name,va_classes,treating,ingredients\n")
168 |
169 | def addDocument(self, doc):
170 | self.csv_handle.write('{},"{}","{}","{}","{}","{}","{}"{}'.format(
171 | doc.get('rxcui', '0'),
172 | doc.get('tty', ''),
173 | doc.get('ndc', ''),
174 | doc.get('label', ''),
175 | ';'.join(doc.get('drugClasses') or []),
176 | ';'.join(doc.get('treatmentIntents') or []),
177 | ';'.join(doc.get('ingredients') or []),
178 | "\n"
179 | ))
180 |
181 | def __str__(self):
182 | return 'CSV file "{}"'.format(self.csv_file)
183 |
184 |
185 | def runLinking(ex_type):
186 | """ Create the desired handler and run import.
187 | """
188 | handler = DebugDocHandler()
189 | if ex_type is not None and len(ex_type) > 0:
190 | try:
191 | if 'mongo' == ex_type:
192 | handler = MongoDocHandler()
193 | elif 'couch' == ex_type:
194 | # import couchbase
195 | raise Exception('Couchbase not implemented')
196 | elif 'csv' == ex_type:
197 | handler = CSVHandler()
198 | elif 'sqlite' == ex_type:
199 | handler = SQLiteDocHandler()
200 | else:
201 | raise Exception('Unsupported export type: {}'.format(ex_type))
202 | except Exception as e:
203 | logging.error(e)
204 | sys.exit(1)
205 |
206 | print('-> Processing to {}'.format(handler))
207 | runImport(doc_handler=handler)
208 |
209 |
210 | if '__main__' == __name__:
211 | logging.basicConfig(level=logging.INFO)
212 |
213 | cmd_arg = sys.argv[1] if len(sys.argv) > 1 else None
214 | ex_type = os.environ.get('EXPORT_TYPE') or cmd_arg
215 |
216 | runLinking(ex_type)
217 |
218 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
21 |
22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
23 |
24 | help:
25 | @echo "Please use \`make ' where is one of"
26 | @echo " html to make standalone HTML files"
27 | @echo " dirhtml to make HTML files named index.html in directories"
28 | @echo " singlehtml to make a single large HTML file"
29 | @echo " pickle to make pickle files"
30 | @echo " json to make JSON files"
31 | @echo " htmlhelp to make HTML files and a HTML help project"
32 | @echo " qthelp to make HTML files and a qthelp project"
33 | @echo " devhelp to make HTML files and a Devhelp project"
34 | @echo " epub to make an epub"
35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
36 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
38 | @echo " text to make text files"
39 | @echo " man to make manual pages"
40 | @echo " texinfo to make Texinfo files"
41 | @echo " info to make Texinfo files and run them through makeinfo"
42 | @echo " gettext to make PO message catalogs"
43 | @echo " changes to make an overview of all changed/added/deprecated items"
44 | @echo " xml to make Docutils-native XML files"
45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
46 | @echo " linkcheck to check all external links for integrity"
47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
48 |
49 | clean:
50 | rm -rf $(BUILDDIR)/*
51 |
52 | html:
53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
54 | @echo
55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
56 |
57 | dirhtml:
58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
59 | @echo
60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
61 |
62 | singlehtml:
63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
64 | @echo
65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
66 |
67 | pickle:
68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
69 | @echo
70 | @echo "Build finished; now you can process the pickle files."
71 |
72 | json:
73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
74 | @echo
75 | @echo "Build finished; now you can process the JSON files."
76 |
77 | htmlhelp:
78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
79 | @echo
80 | @echo "Build finished; now you can run HTML Help Workshop with the" \
81 | ".hhp project file in $(BUILDDIR)/htmlhelp."
82 |
83 | qthelp:
84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
85 | @echo
86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/py-umls.qhcp"
89 | @echo "To view the help file:"
90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/py-umls.qhc"
91 |
92 | devhelp:
93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
94 | @echo
95 | @echo "Build finished."
96 | @echo "To view the help file:"
97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/py-umls"
98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/py-umls"
99 | @echo "# devhelp"
100 |
101 | epub:
102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | @echo
104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 |
106 | latex:
107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | @echo
109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | "(use \`make latexpdf' here to do that automatically)."
112 |
113 | latexpdf:
114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | @echo "Running LaTeX files through pdflatex..."
116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 |
119 | latexpdfja:
120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | @echo "Running LaTeX files through platex and dvipdfmx..."
122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 |
125 | text:
126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | @echo
128 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
129 |
130 | man:
131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | @echo
133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 |
135 | texinfo:
136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | @echo
138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | @echo "Run \`make' in that directory to run these through makeinfo" \
140 | "(use \`make info' here to do that automatically)."
141 |
142 | info:
143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | @echo "Running Texinfo files through makeinfo..."
145 | make -C $(BUILDDIR)/texinfo info
146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 |
148 | gettext:
149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | @echo
151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 |
153 | changes:
154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | @echo
156 | @echo "The overview file is in $(BUILDDIR)/changes."
157 |
158 | linkcheck:
159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | @echo
161 | @echo "Link check complete; look for any errors in the above output " \
162 | "or in $(BUILDDIR)/linkcheck/output.txt."
163 |
164 | doctest:
165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | @echo "Testing of doctests in the sources finished, look at the " \
167 | "results in $(BUILDDIR)/doctest/output.txt."
168 |
169 | xml:
170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | @echo
172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 |
174 | pseudoxml:
175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | @echo
177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 |
--------------------------------------------------------------------------------
/snomed.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # SNOMED import utilities, extracted from umls.py
5 | #
6 | # 2014-01-20 Created by Pascal Pfiffner
7 | #
8 |
9 | import sys
10 | import os
11 | import csv
12 | import logging
13 |
14 | from sqlite import SQLite # for py-umls standalone
15 |
16 |
17 | class SNOMEDDBNotPresentException(Exception):
18 | pass
19 |
20 | class SNOMED(object):
21 | """ A class for importing UMLS terminologies into an SQLite database.
22 | """
23 | sqlite_handle = None
24 |
25 | @classmethod
26 | def database_path(cls):
27 | absolute = os.path.dirname(os.path.realpath(__file__))
28 | return os.path.join(absolute, 'databases/snomed.db')
29 |
30 | @classmethod
31 | def check_database(cls):
32 | """ Check if our database is in place and if not, prompts to create it.
33 | Will raise on errors!
34 |
35 | SNOMED: (snomed.db)
36 | Read SNOMED CT from tab-separated files and create an SQLite database.
37 | """
38 | snomed_db = cls.database_path()
39 | if not os.path.exists(snomed_db):
40 | raise SNOMEDDBNotPresentException("The SNOMED database at {} does not exist. Run the script `snomed.py`."
41 | .format(os.path.abspath(snomed_db)))
42 |
43 | @classmethod
44 | def find_needed_files(cls, snomed_dir):
45 |
46 | # table to file mapping
47 | prefixes = {
48 | 'descriptions': 'sct2_Description_Full-en_',
49 | 'relationships': 'sct2_Relationship_Full_'
50 | }
51 | found = {}
52 | snomed_dir = sys.argv[1]
53 |
54 | # try to find the files
55 | for table, prefix in prefixes.items():
56 | found_file = _find_files(snomed_dir, prefix)
57 | if found_file is None:
58 | raise Exception('Unable to locate file starting with "{}" in SNOMED directory at {}'.format(prefix, snomed_dir))
59 | found[table] = found_file
60 |
61 | return found
62 |
63 | @classmethod
64 | def import_from_files(cls, rx_map):
65 | for table, filepath in rx_map.items():
66 | num_query = 'SELECT COUNT(*) FROM {}'.format(table)
67 | num_existing = cls.sqlite_handle.executeOne(num_query, ())[0]
68 | if num_existing > 0:
69 | continue
70 |
71 | cls.import_csv_into_table(filepath, table)
72 |
73 | @classmethod
74 | def import_csv_into_table(cls, snomed_file, table_name):
75 | """ Import SNOMED CSV into our SQLite database.
76 | The SNOMED CSV files can be parsed by Python's CSV parser with the
77 | "excel-tab" flavor.
78 | """
79 |
80 | logging.debug('Importing SNOMED {} into snomed.db...'.format(table_name))
81 |
82 | # not yet imported, parse tab-separated file and import
83 | with open(snomed_file, encoding='utf-8') as csv_handle:
84 | cls.sqlite_handle.isolation_level = 'EXCLUSIVE'
85 | sql = cls.insert_query_for(table_name)
86 | reader = csv.reader(csv_handle, dialect='excel-tab')
87 | i = 0
88 | try:
89 | for row in reader:
90 | if i > 0: # first row is the header row
91 |
92 | # execute SQL (we just ignore duplicates)
93 | params = cls.insert_tuple_from_csv_row_for(table_name, row)
94 | try:
95 | cls.sqlite_handle.execute(sql, params)
96 | except Exception as e:
97 | sys.exit('Cannot insert {}: {}'.format(params, e))
98 | i += 1
99 |
100 | # commit to file
101 | cls.sqlite_handle.commit()
102 | cls.did_import(table_name)
103 | cls.sqlite_handle.isolation_level = None
104 |
105 | except csv.Error as e:
106 | cls.sqlite_handle.rollback()
107 | sys.exit('CSV error on line {}: {}'.format(reader.line_num, e))
108 |
109 | logging.debug('{} concepts parsed'.format(i-1))
110 |
111 |
112 | @classmethod
113 | def setup_tables(cls):
114 | """ Creates the SQLite tables we need, not the tables we deserve.
115 | Does nothing if the tables/indexes already exist
116 | """
117 | if cls.sqlite_handle is None:
118 | cls.sqlite_handle = SQLite.get(cls.database_path())
119 |
120 | # descriptions
121 | cls.sqlite_handle.create('descriptions', '''(
122 | concept_id INTEGER PRIMARY KEY,
123 | lang TEXT,
124 | term TEXT,
125 | isa VARCHAR,
126 | active INT
127 | )''')
128 |
129 | # relationships
130 | cls.sqlite_handle.create('relationships', '''(
131 | relationship_id INTEGER PRIMARY KEY,
132 | source_id INT,
133 | destination_id INT,
134 | rel_type INT,
135 | rel_text VARCHAR,
136 | active INT
137 | )''')
138 |
139 | @classmethod
140 | def insert_query_for(cls, table_name):
141 | """ Returns the insert query needed for the given table
142 | """
143 | if 'descriptions' == table_name:
144 | return '''INSERT OR IGNORE INTO descriptions
145 | (concept_id, lang, term, isa, active)
146 | VALUES
147 | (?, ?, ?, ?, ?)'''
148 | if 'relationships' == table_name:
149 | return '''INSERT OR IGNORE INTO relationships
150 | (relationship_id, source_id, destination_id, rel_type, active)
151 | VALUES
152 | (?, ?, ?, ?, ?)'''
153 | return None
154 |
155 | @classmethod
156 | def insert_tuple_from_csv_row_for(cls, table_name, row):
157 | if 'descriptions' == table_name:
158 | isa = ''
159 | if len(row) > 6:
160 | if '900000000000013009' == row[6]:
161 | isa = 'synonym'
162 | elif '900000000000003001' == row[6]:
163 | isa = 'full'
164 | return (int(row[4]), row[5], row[7], isa, int(row[2]))
165 | if 'relationships' == table_name:
166 | return (int(row[0]), int(row[4]), int(row[5]), int(row[7]), int(row[2]))
167 | return None
168 |
169 | @classmethod
170 | def did_import(cls, table_name):
171 | """ Allows us to set hooks after tables have been imported.
172 |
173 | Creates indexes and names `isa` and `finding_site` relationships.
174 | """
175 | # index descriptions
176 | if 'descriptions' == table_name:
177 | print("----- DID IMPORT descriptions")
178 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS isa_index ON descriptions (isa)")
179 |
180 | # update and index relationships
181 | if 'relationships' == table_name:
182 | print("----- DID IMPORT relationships")
183 | cls.sqlite_handle.execute("UPDATE relationships SET rel_text = 'isa' WHERE rel_type = 116680003")
184 | cls.sqlite_handle.execute("UPDATE relationships SET rel_text = 'finding_site' WHERE rel_type = 363698007")
185 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS source_index ON relationships (source_id)")
186 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS destination_index ON relationships (destination_id)")
187 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_text_index ON relationships (rel_text)")
188 |
189 |
190 | class SNOMEDLookup(object):
191 | """ SNOMED lookup """
192 |
193 | sqlite = None
194 |
195 | def __init__(self):
196 | self.sqlite = SQLite.get(SNOMED.database_path())
197 |
198 | def lookup_code_meaning(self, snomed_id, preferred=True, no_html=True):
199 | """ Returns HTML for all matches of the given SNOMED id.
200 | The "preferred" flag here currently has no function.
201 | """
202 | if snomed_id is None or len(snomed_id) < 1:
203 | return ''
204 |
205 | sql = 'SELECT term, isa, active FROM descriptions WHERE concept_id = ?'
206 | names = []
207 |
208 | # loop over results
209 | for res in self.sqlite.execute(sql, (snomed_id,)):
210 | if not no_html and ('synonym' == res[1] or 0 == res[2]):
211 | names.append("{}".format(res[0]))
212 | else:
213 | names.append(res[0])
214 |
215 | if no_html:
216 | return ", ".join(names) if len(names) > 0 else ''
217 | return "
\n".join(names) if len(names) > 0 else ''
218 |
219 | def lookup_if_isa(self, child_id, parent_id, checked=None):
220 | """ Determines if a child concept is refining a parent concept, i.e.
221 | if there is a (direct or indirect) "is a" (116680003) relationship from
222 | child to parent.
223 | """
224 | if not child_id or not parent_id:
225 | return False
226 | if checked is not None and child_id in checked:
227 | return False
228 |
229 | parents = self.lookup_parents_of(child_id)
230 | if parent_id in parents:
231 | return True
232 |
233 | chkd = checked or []
234 | chkd.append(child_id)
235 | for parent in parents:
236 | flag = self.lookup_if_isa(parent, parent_id, chkd)
237 | if flag:
238 | return True
239 | return False
240 |
241 | def lookup_parents_of(self, snomed_id):
242 | """ Returns a list of concept ids that have a direct "is a" (116680003)
243 | relationship with the given id.
244 | """
245 | ids = []
246 | if snomed_id:
247 | #sql = 'SELECT destination_id FROM relationships WHERE source_id = ? AND rel_type = 116680003' # Too slow!!
248 | sql = 'SELECT destination_id, rel_text FROM relationships WHERE source_id = ?'
249 | for res in self.sqlite.execute(sql, (snomed_id,)):
250 | if 'isa' == res[1]:
251 | ids.append(str(res[0]))
252 | return ids
253 |
254 |
255 | class SNOMEDConcept(object):
256 | """ Represents a SNOMED concept.
257 | """
258 | uplooker = SNOMEDLookup()
259 |
260 | def __init__(self, code):
261 | self.code = code
262 | self._term = None
263 |
264 | @property
265 | def term(self):
266 | if self._term is None:
267 | self._term = self.__class__.uplooker.lookup_code_meaning(self.code)
268 | return self._term
269 |
270 | def isa(self, parent_code):
271 | """ Checks whether the receiver is a child of the given code.
272 | The `parent_code` argument can also be a :class:`SNOMEDConcept`
273 | instance.
274 |
275 | :returns: A bool on whether the receiver is a child of the given
276 | concept
277 | """
278 | if isinstance(parent_code, SNOMEDConcept):
279 | return self.__class__.uplooker.lookup_if_isa(self.code, parent_code.code)
280 | return self.__class__.uplooker.lookup_if_isa(self.code, parent_code)
281 |
282 |
283 | # find file function
284 | def _find_files(directory, prefix):
285 | for root, dirs, files in os.walk(directory):
286 | for name in files:
287 | if name.startswith(prefix):
288 | return os.path.join(directory, name)
289 |
290 | for name in dirs:
291 | found = _find_files(os.path.join(directory, name), prefix)
292 | if found:
293 | return found
294 | return None
295 |
296 |
297 | # running this as a script does the database setup/check
298 | if '__main__' == __name__:
299 | logging.basicConfig(level=logging.DEBUG)
300 |
301 | # if the database check fails, run import commands
302 | try:
303 | SNOMED.check_database()
304 | except SNOMEDDBNotPresentException as e:
305 | if len(sys.argv) < 2:
306 | print("Provide the path to the extracted SNOMED (RF2) directory as first argument.")
307 | print("Download SNOMED from http://www.nlm.nih.gov/research/umls/licensedcontent/snomedctfiles.html""")
308 | sys.exit(0)
309 |
310 | # import from files
311 | try:
312 | found = SNOMED.find_needed_files(sys.argv[1])
313 | SNOMED.sqlite_handle = None
314 | SNOMED.setup_tables()
315 | SNOMED.import_from_files(found)
316 | except Exception as e:
317 | print("SNOMED import failed: {}".format(e))
318 | sys.exit(0)
319 |
320 | # examples
321 | cpt = SNOMEDConcept('215350009')
322 | print('SNOMED code "{0}": {1}'.format(cpt.code, cpt.term))
323 |
324 | cpt = SNOMEDConcept('315004001') # -> 128462008 -> 363346000 -> 55342001 x> 215350009
325 | for other, expected in [('128462008', True), ('363346000', True), ('55342001', True), ('215350009', False)]:
326 | print('SNOMED code "{0}" refines "{1}": {2}'.format(cpt.code, other, cpt.isa(other)))
327 | assert expected == cpt.isa(other), '"{0}" refines "{1}" should return {2} or the database hasn’t been set up properly'.format(cpt.code, other, 'True' if expected else 'False')
328 |
329 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # py-umls documentation build configuration file, created by
4 | # sphinx-quickstart on Fri Apr 18 20:08:31 2014.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | import sys
16 | import os
17 |
18 | # If extensions (or modules to document with autodoc) are in another directory,
19 | # add these directories to sys.path here. If the directory is relative to the
20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
21 | sys.path.insert(0, os.path.abspath('..'))
22 |
23 | # -- General configuration ------------------------------------------------
24 |
25 | # If your documentation needs a minimal Sphinx version, state it here.
26 | #needs_sphinx = '1.0'
27 |
28 | # Add any Sphinx extension module names here, as strings. They can be
29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
30 | # ones.
31 | extensions = [
32 | 'sphinx.ext.autodoc',
33 | 'sphinx.ext.todo',
34 | ]
35 |
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 |
39 | # The suffix of source filenames.
40 | source_suffix = '.rst'
41 |
42 | # The encoding of source files.
43 | #source_encoding = 'utf-8-sig'
44 |
45 | # The master toctree document.
46 | master_doc = 'index'
47 |
48 | # General information about the project.
49 | project = u'py-umls'
50 | copyright = u'2014, Pascal Pfiffner'
51 |
52 | # The version info for the project you're documenting, acts as replacement for
53 | # |version| and |release|, also used in various other places throughout the
54 | # built documents.
55 | #
56 | # The short X.Y version.
57 | version = ''
58 | # The full version, including alpha/beta/rc tags.
59 | release = ''
60 |
61 | # The language for content autogenerated by Sphinx. Refer to documentation
62 | # for a list of supported languages.
63 | #language = None
64 |
65 | # There are two options for replacing |today|: either, you set today to some
66 | # non-false value, then it is used:
67 | #today = ''
68 | # Else, today_fmt is used as the format for a strftime call.
69 | #today_fmt = '%B %d, %Y'
70 |
71 | # List of patterns, relative to source directory, that match files and
72 | # directories to ignore when looking for source files.
73 | exclude_patterns = ['_build']
74 |
75 | # The reST default role (used for this markup: `text`) to use for all
76 | # documents.
77 | #default_role = None
78 |
79 | # If true, '()' will be appended to :func: etc. cross-reference text.
80 | #add_function_parentheses = True
81 |
82 | # If true, the current module name will be prepended to all description
83 | # unit titles (such as .. function::).
84 | #add_module_names = True
85 |
86 | # If true, sectionauthor and moduleauthor directives will be shown in the
87 | # output. They are ignored by default.
88 | #show_authors = False
89 |
90 | # The name of the Pygments (syntax highlighting) style to use.
91 | pygments_style = 'sphinx'
92 |
93 | # A list of ignored prefixes for module index sorting.
94 | #modindex_common_prefix = []
95 |
96 | # If true, keep warnings as "system message" paragraphs in the built documents.
97 | #keep_warnings = False
98 |
99 |
100 | # -- Options for HTML output ----------------------------------------------
101 |
102 | # The theme to use for HTML and HTML Help pages. See the documentation for
103 | # a list of builtin themes.
104 | html_theme = 'default'
105 |
106 | # Theme options are theme-specific and customize the look and feel of a theme
107 | # further. For a list of options available for each theme, see the
108 | # documentation.
109 | #html_theme_options = {}
110 |
111 | # Add any paths that contain custom themes here, relative to this directory.
112 | #html_theme_path = []
113 |
114 | # The name for this set of Sphinx documents. If None, it defaults to
115 | # " v documentation".
116 | #html_title = None
117 |
118 | # A shorter title for the navigation bar. Default is the same as html_title.
119 | #html_short_title = None
120 |
121 | # The name of an image file (relative to this directory) to place at the top
122 | # of the sidebar.
123 | #html_logo = None
124 |
125 | # The name of an image file (within the static path) to use as favicon of the
126 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
127 | # pixels large.
128 | #html_favicon = None
129 |
130 | # Add any paths that contain custom static files (such as style sheets) here,
131 | # relative to this directory. They are copied after the builtin static files,
132 | # so a file named "default.css" will overwrite the builtin "default.css".
133 | html_static_path = ['_static']
134 |
135 | # Add any extra paths that contain custom files (such as robots.txt or
136 | # .htaccess) here, relative to this directory. These files are copied
137 | # directly to the root of the documentation.
138 | #html_extra_path = []
139 |
140 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
141 | # using the given strftime format.
142 | #html_last_updated_fmt = '%b %d, %Y'
143 |
144 | # If true, SmartyPants will be used to convert quotes and dashes to
145 | # typographically correct entities.
146 | #html_use_smartypants = True
147 |
148 | # Custom sidebar templates, maps document names to template names.
149 | #html_sidebars = {}
150 |
151 | # Additional templates that should be rendered to pages, maps page names to
152 | # template names.
153 | #html_additional_pages = {}
154 |
155 | # If false, no module index is generated.
156 | #html_domain_indices = True
157 |
158 | # If false, no index is generated.
159 | #html_use_index = True
160 |
161 | # If true, the index is split into individual pages for each letter.
162 | #html_split_index = False
163 |
164 | # If true, links to the reST sources are added to the pages.
165 | #html_show_sourcelink = True
166 |
167 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
168 | #html_show_sphinx = True
169 |
170 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
171 | #html_show_copyright = True
172 |
173 | # If true, an OpenSearch description file will be output, and all pages will
174 | # contain a tag referring to it. The value of this option must be the
175 | # base URL from which the finished HTML is served.
176 | #html_use_opensearch = ''
177 |
178 | # This is the file name suffix for HTML files (e.g. ".xhtml").
179 | #html_file_suffix = None
180 |
181 | # Output file base name for HTML help builder.
182 | htmlhelp_basename = 'py-umlsdoc'
183 |
184 |
185 | # -- Options for LaTeX output ---------------------------------------------
186 |
187 | latex_elements = {
188 | # The paper size ('letterpaper' or 'a4paper').
189 | #'papersize': 'letterpaper',
190 |
191 | # The font size ('10pt', '11pt' or '12pt').
192 | #'pointsize': '10pt',
193 |
194 | # Additional stuff for the LaTeX preamble.
195 | #'preamble': '',
196 | }
197 |
198 | # Grouping the document tree into LaTeX files. List of tuples
199 | # (source start file, target name, title,
200 | # author, documentclass [howto, manual, or own class]).
201 | latex_documents = [
202 | ('index', 'py-umls.tex', u'py-umls Documentation',
203 | u'Pascal Pfiffner', 'manual'),
204 | ]
205 |
206 | # The name of an image file (relative to this directory) to place at the top of
207 | # the title page.
208 | #latex_logo = None
209 |
210 | # For "manual" documents, if this is true, then toplevel headings are parts,
211 | # not chapters.
212 | #latex_use_parts = False
213 |
214 | # If true, show page references after internal links.
215 | #latex_show_pagerefs = False
216 |
217 | # If true, show URL addresses after external links.
218 | #latex_show_urls = False
219 |
220 | # Documents to append as an appendix to all manuals.
221 | #latex_appendices = []
222 |
223 | # If false, no module index is generated.
224 | #latex_domain_indices = True
225 |
226 |
227 | # -- Options for manual page output ---------------------------------------
228 |
229 | # One entry per manual page. List of tuples
230 | # (source start file, name, description, authors, manual section).
231 | man_pages = [
232 | ('index', 'py-umls', u'py-umls Documentation',
233 | [u'Pascal Pfiffner'], 1)
234 | ]
235 |
236 | # If true, show URL addresses after external links.
237 | #man_show_urls = False
238 |
239 |
240 | # -- Options for Texinfo output -------------------------------------------
241 |
242 | # Grouping the document tree into Texinfo files. List of tuples
243 | # (source start file, target name, title, author,
244 | # dir menu entry, description, category)
245 | texinfo_documents = [
246 | ('index', 'py-umls', u'py-umls Documentation',
247 | u'Pascal Pfiffner', 'py-umls', 'One line description of project.',
248 | 'Miscellaneous'),
249 | ]
250 |
251 | # Documents to append as an appendix to all manuals.
252 | #texinfo_appendices = []
253 |
254 | # If false, no module index is generated.
255 | #texinfo_domain_indices = True
256 |
257 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
258 | #texinfo_show_urls = 'footnote'
259 |
260 | # If true, do not generate a @detailmenu in the "Top" node's menu.
261 | #texinfo_no_detailmenu = False
262 |
263 |
264 | # -- Options for Epub output ----------------------------------------------
265 |
266 | # Bibliographic Dublin Core info.
267 | epub_title = u'py-umls'
268 | epub_author = u'Pascal Pfiffner'
269 | epub_publisher = u'Pascal Pfiffner'
270 | epub_copyright = u'2014, Pascal Pfiffner'
271 |
272 | # The basename for the epub file. It defaults to the project name.
273 | #epub_basename = u'py-umls'
274 |
275 | # The HTML theme for the epub output. Since the default themes are not optimized
276 | # for small screen space, using the same theme for HTML and epub output is
277 | # usually not wise. This defaults to 'epub', a theme designed to save visual
278 | # space.
279 | #epub_theme = 'epub'
280 |
281 | # The language of the text. It defaults to the language option
282 | # or en if the language is not set.
283 | #epub_language = ''
284 |
285 | # The scheme of the identifier. Typical schemes are ISBN or URL.
286 | #epub_scheme = ''
287 |
288 | # The unique identifier of the text. This can be a ISBN number
289 | # or the project homepage.
290 | #epub_identifier = ''
291 |
292 | # A unique identification for the text.
293 | #epub_uid = ''
294 |
295 | # A tuple containing the cover image and cover page html template filenames.
296 | #epub_cover = ()
297 |
298 | # A sequence of (type, uri, title) tuples for the guide element of content.opf.
299 | #epub_guide = ()
300 |
301 | # HTML files that should be inserted before the pages created by sphinx.
302 | # The format is a list of tuples containing the path and title.
303 | #epub_pre_files = []
304 |
305 | # HTML files shat should be inserted after the pages created by sphinx.
306 | # The format is a list of tuples containing the path and title.
307 | #epub_post_files = []
308 |
309 | # A list of files that should not be packed into the epub file.
310 | epub_exclude_files = ['search.html']
311 |
312 | # The depth of the table of contents in toc.ncx.
313 | #epub_tocdepth = 3
314 |
315 | # Allow duplicate toc entries.
316 | #epub_tocdup = True
317 |
318 | # Choose between 'default' and 'includehidden'.
319 | #epub_tocscope = 'default'
320 |
321 | # Fix unsupported image types using the PIL.
322 | #epub_fix_images = False
323 |
324 | # Scale large images.
325 | #epub_max_image_width = 0
326 |
327 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
328 | #epub_show_urls = 'inline'
329 |
330 | # If false, no index is generated.
331 | #epub_use_index = True
332 |
--------------------------------------------------------------------------------
/rxnorm_link.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Precompute interesting RXCUI relationships into a dictionary. Use the script
5 | # `rxnorm_link_run.sh` to store these dictionaries into a JSON database. See
6 | # that script for parameters to change.
7 | #
8 | # 2012-09-28 Created by Josh Mandel
9 | # 2014-02-10 Stolen by Pascal Pfiffner
10 | #
11 | # For profiling: pycallgraph graphviz -- rxnorm_link.py
12 |
13 | import sys
14 | import os.path
15 | sys.path.insert(0, os.path.dirname(__file__))
16 |
17 | import json
18 | import signal
19 | import logging
20 | from datetime import datetime
21 |
22 | from rxnorm import RxNorm, RxNormLookup
23 |
24 |
25 | def doQ(rxhandle, q, p):
26 | return [x[0] for x in rxhandle.fetchAll(q, p)]
27 |
28 | def toBrandAndGeneric(rxhandle, rxcuis, tty):
29 | ret = set()
30 | for rxcui in rxcuis:
31 | ret.update(doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela='tradename_of'", (rxcui,)))
32 | return ret
33 |
34 | def toComponents(rxhandle, rxcuis, tty):
35 | ret = set()
36 |
37 | if tty not in ("SBD", "SCD"):
38 | return ret
39 |
40 | for rxcui in rxcuis:
41 | cs = doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela='consists_of'", (rxcui,))
42 | for c in cs:
43 | ret.update(doQ(rxhandle, "SELECT rxcui from rxnconso where rxcui=? and sab='RXNORM' and tty='SCDC'", (c,)))
44 |
45 | return ret
46 |
47 | def toTreatmentIntents(rxhandle, rxcuis, tty):
48 | ret = set()
49 | for rxcui in rxcuis:
50 | ret.update(toTreatmentIntents_helper(rxhandle, rxcui, tty))
51 | return ret
52 |
53 | def toTreatmentIntents_helper(rxhandle, rxcui, tty):
54 | assert tty=='IN'
55 | ret = []
56 | rxauis = doQ(rxhandle, "SELECT rxaui from rxnconso where rxcui=? and tty='FN' and sab='NDFRT'", (rxcui,))
57 | for rxaui in rxauis:
58 | rxauis1 = doQ(rxhandle, "SELECT rxaui1 from rxnrel where rxaui2=? and rela='may_treat'", (rxaui,))
59 | for rxaui1 in rxauis1:
60 | name = doQ(rxhandle, "SELECT str from rxnconso where rxaui=? and tty='FN' and sab='NDFRT'", (rxaui1,))
61 | name = map(lambda x: x.replace(" [Disease/Finding]", ""), name)
62 | ret.extend(name)
63 | return ret
64 |
65 | def toMechanism(rxhandle, rxcuis, tty):
66 | ret = set()
67 | for v in rxcuis:
68 | ret.update(toMechanism_helper(rxhandle, v, tty))
69 | return ret
70 |
71 | def toMechanism_helper(rxhandle, rxcui, tty):
72 | assert tty=='IN'
73 | ret = set()
74 | rxauis = doQ(rxhandle, "SELECT rxaui from rxnconso where rxcui=? and tty='FN' and sab='NDFRT'", (rxcui,))
75 | for a in rxauis:
76 | a1 = doQ(rxhandle, "SELECT rxaui1 from rxnrel where rxaui2=? and rela='has_mechanism_of_action'", (a,))
77 | if len(a1) > 0:
78 | moa = doQ(rxhandle, "SELECT str from rxnconso where rxaui=? and tty='FN' and sab='NDFRT'", (a1[0],))
79 | moa = map(lambda x: x.replace(" [MoA]", ""), moa)
80 | ret.update(moa)
81 | return ret
82 |
83 |
84 | def toIngredients(rxhandle, rxcuis, tty):
85 | ret = set()
86 | for v in rxcuis:
87 | ret.update(toIngredients_helper(rxhandle, v, tty))
88 | return ret
89 |
90 | def toIngredients_helper(rxhandle, rxcui, tty):
91 | if 'IN' == tty:
92 | return []
93 |
94 | # can lookup ingredient directly
95 | map_direct = {
96 | 'MIN': 'has_part',
97 | 'PIN': 'form_of',
98 | 'BN': 'tradename_of',
99 | 'SCDC': 'has_ingredient',
100 | 'SCDF': 'has_ingredient',
101 | 'SCDG': 'has_ingredient',
102 | }
103 |
104 | if tty in map_direct:
105 | return doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela=?", (rxcui, map_direct[tty]))
106 |
107 | # indirect ingredient lookup
108 | map_indirect = {
109 | 'BPCK': ('contains', 'SCD'),
110 | 'GPCK': ('contains', 'SCD'),
111 | 'SBD': ('tradename_of', 'SCD'),
112 | 'SBDC': ('tradename_of', 'SCDC'),
113 | 'SBDF': ('tradename_of', 'SCDF'),
114 | 'SBDG': ('tradename_of', 'SCDG'),
115 | 'SCD': ('consists_of', 'SCDC'),
116 | }
117 |
118 | if tty in map_indirect:
119 | val = map_indirect[tty]
120 | return toIngredients(rxhandle, doQ(rxhandle, "SELECT rxcui1 from rxnrel where rxcui2=? and rela=?", (rxcui, val[0])), val[1])
121 |
122 | logging.warn('TTY "{}" is not mapped, skipping ingredient lookup'.format(tty))
123 | return []
124 |
125 |
126 | def initVA(rxhandle):
127 | """ Initializes the VA drug class cache table and inserts all known drug
128 | classes by looking them up in the RXNSAT table (ATN = "VA_CLASS_NAME").
129 | """
130 | # SELECT DISTINCT tty, COUNT(tty) FROM rxnsat LEFT JOIN rxnconso AS r USING (rxcui) WHERE atn = "VA_CLASS_NAME" GROUP BY tty;
131 | rxhandle.execute('DROP TABLE IF EXISTS va_cache')
132 | rxhandle.execute('''CREATE TABLE va_cache
133 | (rxcui varchar UNIQUE, va text, from_rxcui varchar, rela varchar, level int)''')
134 | rxhandle.execute('''INSERT OR IGNORE INTO va_cache
135 | SELECT rxcui, atv, null, null, 0 FROM rxnsat
136 | WHERE atn = "VA_CLASS_NAME"''')
137 | rxhandle.sqlite.commit()
138 |
139 | def traverseVA(rxhandle, rounds=3, expect=203175):
140 | """ Drug classes are set for a couple of different TTYs, it seems however
141 | most consistently to be defined on CD, SCD and AB TTYs.
142 | We cache the classes in va_cache and loop over rxcuis with known classes,
143 | applying the known classes to certain relationships.
144 | """
145 | print("-> Starting VA class mapping")
146 |
147 | mapping = {
148 | 'CD': [
149 | 'has_tradename', # > BD, SBD, ... ; tiny impact on step 2, compensated for in steps 3+
150 | 'contained_in', # > BPCK; tiny impact in step 2, compansated for in steps 3+
151 | 'consists_of', # > SCDC; big impact step 2+, starting to be compensated for in steps 5+; NOT IDEAL
152 | #'quantified_form', # > SBD; no impact
153 | ],
154 | 'GPCK': [
155 | 'has_tradename', # > BPCK; small impact step 3
156 | ],
157 |
158 | 'SBD': [
159 | 'isa', # > SBDF; big impact step 2+, increasingly important (58% vs 75% coverage after step 5)
160 | 'has_ingredient', # > BN; small impact step 2+
161 | 'tradename_of', # > SCD; tiny impact step 2, fully compensated by step 4
162 | 'consists_of', # > SBDC; small impact step 4+
163 | ],
164 | 'SBDF': [
165 | #'tradename_of', # > SCDF; no impact
166 | 'has_ingredient', # > BN; tiny impact step 2+
167 | #'inverse_isa', # > SBD; no impact
168 | ],
169 | 'SBDG': [
170 | 'has_ingredient', # > BN; tiny impact step 2+
171 | #'tradename_of', # > SCDG; no impact
172 | ],
173 | 'SBDC': [
174 | 'tradename_of', # > SCDC; tiny impact step 3, compensated by step 5
175 | ],
176 |
177 | 'SCD': [
178 | 'isa', # > SCDF; big impact step 2+, not compensated (59% vs 75% coverage after step 5)
179 | 'has_quantified_form', # > SCD; tiny impact step 2, fully compensated in step 3
180 | 'contained_in', # > GPCK; tiny impact steps 4+
181 | 'has_tradename', # > SBD; small impact steps 3+
182 | ],
183 | 'SCDC': [
184 | 'constitutes', # > SCD; big impact steps 3+ (63% vs 75% coverage after step 5)
185 | 'has_tradename', # > SBDC; impact in step 3, partially compensated in step 4
186 | ],
187 | 'SCDF': [
188 | 'inverse_isa', # > SCD; large impact steps 3+
189 | ],
190 | 'SCDG': [
191 | #'tradename_of', # > SBDG; no impact
192 | ]
193 | }
194 |
195 | found = set()
196 | per_level_sql = 'SELECT rxcui, va FROM va_cache WHERE level = ?'
197 |
198 | for l in range(0,rounds):
199 | i = 0
200 | existing = rxhandle.fetchAll(per_level_sql, (l,))
201 | num_drugs = len(existing)
202 | this_round = set();
203 |
204 | # loop all rxcuis that already have a class and walk their relationships
205 | for rxcui, va_imp in existing:
206 | found.add(rxcui)
207 | this_round.add(rxcui)
208 | vas = va_imp.split('|')
209 | seekRelAndStoreSameVAs(rxhandle, rxcui, set(vas), mapping, l)
210 |
211 | # progress report
212 | i += 1
213 | print('--> Step {} {:.1%}'.format(l+1, i / num_drugs), end="\r")
214 |
215 | # commit after every round
216 | rxhandle.sqlite.commit()
217 | print('==> Step {}, found classes for {} of {} drugs, {:.2%} coverage'.format(l+1, len(this_round), expect, len(found) / expect))
218 |
219 | print('-> VA class mapping complete')
220 |
221 | def seekRelAndStoreSameVAs(rxhandle, rxcui, vas, mapping, at_level=0):
222 | """ For the given RXCUI retrieves all relations, as defined in `mapping`,
223 | and updates those concepts with the drug classes passed in in `vas`.
224 | """
225 | assert(rxcui)
226 | assert(len(vas) > 0)
227 |
228 | # get all possible relas by checking the concept's TTY against our mapping
229 | ttys = rxhandle.lookup_tty(rxcui)
230 | desired_relas = set()
231 | for tty in ttys:
232 | if tty in mapping:
233 | desired_relas.update(mapping[tty])
234 | if 0 == len(desired_relas):
235 | return
236 |
237 | # get all related rxcuis with the possible "rela" value(s)
238 | # Note: I had a "... AND rela IN (...)" in the following statement, but it
239 | # turns out just doing this in Python isn't slower and code is shorter
240 | rel_sql = 'SELECT DISTINCT rxcui1, rela FROM rxnrel WHERE rxcui2 = ?'
241 | for res in rxhandle.fetchAll(rel_sql, [rxcui]):
242 | if res[1] in desired_relas:
243 | storeVAs(rxhandle, res[0], vas, rxcui, res[1], at_level+1)
244 |
245 | def storeVAs(rxhandle, rxcui, vas, from_rxcui, via_rela, level=0):
246 | """ Stores the drug classes `vas` for the given concept id, checking first
247 | if that concept already has classes and updating the set.
248 | """
249 | assert(rxcui)
250 | assert(len(vas) > 0)
251 |
252 | # do we already have classes?
253 | exist_sql = 'SELECT va FROM va_cache WHERE rxcui = ?'
254 | exist_ret = doQ(rxhandle, exist_sql, [rxcui])
255 | if exist_ret and len(exist_ret) > 0:
256 |
257 | # bail out if we already have a class (!!!)
258 | return
259 |
260 | # split existing classes, decide if we all have them and if not, update
261 | exist_vas = set(exist_ret[0].split('|'))
262 | if vas <= exist_vas:
263 | return
264 | vas |= exist_vas
265 |
266 | # new, insert
267 | ins_sql = 'INSERT OR REPLACE INTO va_cache (rxcui, va, from_rxcui, rela, level) VALUES (?, ?, ?, ?, ?)'
268 | ins_val = '|'.join(vas)
269 | rxhandle.execute(ins_sql, (rxcui, ins_val, from_rxcui, via_rela, level))
270 |
271 | def toDrugClasses(rxhandle, rxcui):
272 | sql = 'SELECT va FROM va_cache WHERE rxcui = ?'
273 | res = rxhandle.fetchOne(sql, (rxcui,))
274 | return res[0].split('|') if res is not None else []
275 |
276 |
277 | def runImport(doc_handler=None):
278 | """ Run the actual linking.
279 |
280 | You can provide a :class:`DocHandler` subclass which will handle the JSON
281 | documents, for example store them to MongoDB for the MongoDocHandler. These
282 | classes are defined in `rxnorm_link_run.py` for now.
283 | """
284 |
285 | # install keyboard interrupt handler
286 | def signal_handler(signal, frame):
287 | print("\nx> Aborted")
288 | sys.exit(0)
289 | signal.signal(signal.SIGINT, signal_handler)
290 |
291 | # prepare RxNorm databases
292 | try:
293 | RxNorm.check_database()
294 | rxhandle = RxNormLookup()
295 | rxhandle.prepare_to_cache_classes()
296 | except Exception as e:
297 | logging.error(e)
298 | sys.exit(1)
299 |
300 | # fetch rxcui's for drug-type concepts (i.e. restrict by TTY)
301 | drug_types = ('SCD', 'SCDC', 'SBDG', 'SBD', 'SBDC', 'BN', 'SBDF', 'SCDG', 'SCDF', 'IN', 'MIN', 'PIN', 'BPCK', 'GPCK')
302 | param = ', '.join(['?' for d in drug_types])
303 | all_sql = "SELECT RXCUI, TTY from RXNCONSO where SAB='RXNORM' and TTY in ({})".format(param)
304 |
305 | all_drugs = rxhandle.fetchAll(all_sql, drug_types)
306 | num_drugs = len(all_drugs)
307 |
308 | # traverse VA classes; starts the VA drug class caching process if needed,
309 | # which runs a minute or two
310 | if rxhandle.can_cache():
311 | initVA(rxhandle)
312 | traverseVA(rxhandle, rounds=5, expect=num_drugs)
313 |
314 | # loop all concepts
315 | i = 0
316 | w_ti = 0
317 | w_va = 0
318 | w_either = 0
319 | last_report = datetime.now()
320 | print('-> Indexing {} items'.format(num_drugs))
321 |
322 | for res in all_drugs:
323 | params = [res[0]]
324 | params.extend(drug_types)
325 | label = rxhandle.lookup_rxcui_name(res[0]) # fast (indexed column)
326 | ndc = rxhandle.ndc_for_rxcui(res[0]) # fast (indexed column)
327 | ndc = RxNorm.ndc_normalize_list(ndc) # fast (string permutation)
328 |
329 | # find ingredients, drug classes and more
330 | ingr = toIngredients(rxhandle, [res[0]], res[1]) # rather slow
331 | ti = toTreatmentIntents(rxhandle, ingr, 'IN') # requires "ingr"
332 | va = toDrugClasses(rxhandle, res[0]) # fast, loads from our cached table
333 | gen = toBrandAndGeneric(rxhandle, [res[0]], res[1]) # fast
334 | comp = toComponents(rxhandle, [res[0]], res[1]) # fast
335 | mech = toMechanism(rxhandle, ingr, 'IN') # fast
336 |
337 | # create JSON-ready dictionary (save space by not adding empty properties)
338 | d = {
339 | 'rxcui': res[0],
340 | 'tty': res[1],
341 | 'label': label,
342 | }
343 | if len(ndc) > 0:
344 | d['ndc'] = list(ndc)
345 |
346 | if len(ingr) > 0:
347 | d['ingredients'] = list(ingr)
348 | if len(ti) > 0:
349 | d['treatmentIntents'] = list(ti)
350 | if len(va) > 0:
351 | d['drugClasses'] = list(va)
352 | if len(gen) > 0:
353 | d['generics'] = list(gen)
354 | if len(comp) > 0:
355 | d['components'] = list(comp)
356 | if len(mech) > 0:
357 | d['mechanisms'] = list(mech)
358 |
359 | # count
360 | i += 1
361 | if len(ti) > 0:
362 | w_ti += 1
363 | if len(va) > 0:
364 | w_va += 1
365 | if len(ti) > 0 or len(va) > 0:
366 | w_either += 1
367 |
368 | # The dictionary "d" at this point contains all the drug's precomputed
369 | # properties, to debug print this:
370 | #print(json.dumps(d, sort_keys=True, indent=2))
371 | if doc_handler:
372 | doc_handler.addDocument(d)
373 |
374 | # log progress every 2 seconds or so
375 | if (datetime.now() - last_report).seconds > 2:
376 | last_report = datetime.now()
377 | print('--> {:.1%} n: {}, ti: {}, va: {}, either: {}'.format(i / num_drugs, i, w_ti, w_va, w_either), end="\r")
378 |
379 | # loop done, finalize
380 | if doc_handler:
381 | doc_handler.finalize()
382 |
383 | print('--> {:.1%} n: {}, ti: {}, va: {}, either: {}'.format(i / num_drugs, i, w_ti, w_va, w_either))
384 | print('-> Done')
385 |
386 |
387 | if '__main__' == __name__:
388 | logging.basicConfig(level=logging.INFO)
389 | logging.warn(''' Running linking without document handler, meaning no RxNorm document will be stored.
390 | Adjust and run `rxnorm_link_run.sh` for more control.''')
391 | runImport()
392 |
--------------------------------------------------------------------------------
/rxnorm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Utilities to handle RxNorm
5 | #
6 | # 2014-01-28 Extracted from UMLS.py
7 |
8 | import os.path
9 | import logging
10 | import re
11 | import requests
12 | import xml.etree.ElementTree as ET
13 |
14 | from collections import Counter, OrderedDict
15 | from sqlite import SQLite
16 | from graphable import GraphableObject, GraphableRelation
17 |
18 |
19 | class RxNorm (object):
20 | """ A class for handling RxNorm in an SQLite database and performing a
21 | handful of RxNorm-related tasks.
22 | """
23 |
24 | @classmethod
25 | def check_database(cls):
26 | """ Check if our database is in place and if not, import them.
27 | Will raise on errors!
28 |
29 | RxNorm: (rxnorm.db)
30 | If missing prompt to use the `rxnorm.sh` script
31 | """
32 |
33 | # RxNorm
34 | rxnorm_db = os.path.join(os.path.dirname(__file__), 'databases/rxnorm.db')
35 | if not os.path.exists(rxnorm_db):
36 | raise Exception("The RxNorm database at {} does not exist. Run the import script `databases/rxnorm.sh`."
37 | .format(os.path.abspath(rxnorm_db)))
38 |
39 | @classmethod
40 | def ndc_normalize_list(cls, ndc_list):
41 | ndc_set = set([cls.ndc_normalize(ndc) for ndc in ndc_list])
42 | return list(ndc_set)
43 |
44 | @classmethod
45 | def ndc_normalize(cls, ndc):
46 | """ Normalizes an NDC (National Drug Code) number.
47 |
48 | The pseudo-code published by NIH
49 | (http://www.nlm.nih.gov/research/umls/rxnorm/NDC_Normalization_Code.rtf)
50 | first identifies the format (e.g. "6-3-2") and then normalizes based on
51 | that finding. However since the normalized string is always 5-4-2,
52 | padded with leading zeroes and removing all dashes afterwards, this
53 | implementation goes a much simpler route.
54 |
55 | NDCs that only contain one dash are treated as if they were missing the
56 | package specifier, so they get a "-00" appended before normalization.
57 |
58 |
59 | :param str ndc: The NDC to normalize as string
60 | :returns: A string with the normalized NDC, or `None` if the number
61 | couldn't be normalized
62 | """
63 | if ndc is None or 0 == len(ndc) or len(ndc) > 14:
64 | return None
65 |
66 | # replace '*' with '0' as some of the NDCs from MTHFDA contain * instead of 0
67 | norm = ndc.replace('*', '0')
68 |
69 | # split at dashes, pad with leading zeroes, cut to desired length
70 | parts = norm.split('-')
71 |
72 | # Code with only one dash; this is NOT mentioned in the above cited
73 | # reference but I see a lot of codes with 6-4 format.
74 | # These are likely codes without package specifier, though some that I
75 | # checked seem to not or no longer exist.
76 | # We append "-00" to get a 6-4-2 format and are done with it.
77 | if 2 == len(parts):
78 | parts.append('00')
79 |
80 | # two dashes, 6-4-1 or 5-3-2 or similar formats, concat to 5-4-2
81 | if 3 == len(parts):
82 | norm = '{}{}{}'.format(('00000'+parts[0])[-5:], ('0000'+parts[1])[-4:], ('00'+parts[2])[-2:])
83 |
84 | # no dashes
85 | elif 1 == len(parts):
86 |
87 | # "if NDC passed has 12 digits and first char is '0' and it's from
88 | # VANDF then trim first char". We do NOT check if it's from the VA
89 | # as this would require more information than just the NDC
90 | if 12 == len(norm) and '0' == norm[:1]:
91 | norm = norm[1:]
92 |
93 | # only valid if it's 11 digits
94 | elif 11 != len(norm):
95 | return None
96 |
97 | # reject NDCs that still contain non-numeric chars
98 | return norm if norm.isdigit() else None
99 |
100 |
101 | class RxNormLookup (object):
102 | """ Class for RxNorm lookup. """
103 |
104 | sqlite = None
105 | cache_drug_class = False # will be set to true when the prepare_to_cache_classes method gets called
106 |
107 |
108 | def __init__(self):
109 | absolute = os.path.dirname(os.path.realpath(__file__))
110 | self.sqlite = SQLite.get(os.path.join(absolute, 'databases/rxnorm.db'))
111 |
112 |
113 | # MARK: - "name" lookup
114 |
115 | def lookup_rxcui(self, rxcui, preferred=True):
116 | """ Return a tuple with (str, tty, rxcui, rxaui) or - if "preferred" is
117 | False - a tuple with (preferred-name, list-of-tuples)
118 | """
119 | if rxcui is None or len(rxcui) < 1:
120 | return None
121 |
122 | # retrieve all matches
123 | sql = 'SELECT str, tty, rxcui, rxaui FROM rxnconso WHERE rxcui = ? AND lat = "ENG"'
124 |
125 | found = []
126 | for res in self.sqlite.execute(sql, (rxcui,)):
127 | found.append(res)
128 |
129 | if 0 == len(found):
130 | logging.error("RxNormLookup.lookup_rxcui: RxCUI {} not found".format(rxcui))
131 | return None
132 |
133 | # preferred name
134 | pref_match = None
135 | for tty in ['SBDC', 'SCDC', 'SBD', 'SCD', 'CD', 'SBDF', 'SCDF', 'BN', 'IN', 'PIN', 'MIN']:
136 | for res in found:
137 | if tty == res[1]:
138 | pref_match = res
139 | break
140 | if pref_match is not None:
141 | break
142 |
143 | if preferred:
144 | return pref_match if pref_match is not None else found[0]
145 |
146 | return (pref_match[0] if pref_match is not None else None, found)
147 |
148 | def lookup_rxcui_name(self, rxcui, preferred=True, no_html=True):
149 | """ Return a string or HTML for the meaning of the given code.
150 | If preferred is True (the default), only one match will be returned,
151 | looking for specific TTY and using the "best" one.
152 | There is currently NO SUPPORT FOR preferred = False
153 | """
154 |
155 | res = self.lookup_rxcui(rxcui, preferred=True)
156 | if res is None:
157 | return ''
158 |
159 | if no_html:
160 | str_format = "{0} [{1}]"
161 | else:
162 | str_format = "{0} [{1}]"
163 |
164 | return str_format.format(*res)
165 |
166 |
167 | # MARK: - Relations
168 |
169 | def lookup_tty(self, rxcui):
170 | """ Returns a set of TTYs for the given RXCUI. """
171 | if rxcui is None:
172 | return None
173 |
174 | sql = 'SELECT tty FROM rxnconso WHERE rxcui = ?'
175 | ttys = set()
176 | for res in self.sqlite.execute(sql, (rxcui,)):
177 | ttys.add(res[0])
178 |
179 | return ttys
180 |
181 | def lookup_related(self, rxcui, relation=None, to_rxcui=None):
182 | """ Returns a set of tuples containing the RXCUI and the actual relation
183 | for the desired relation, or all if the relation is not specified.
184 |
185 | :param str rxcui: The RXCUI for which to look up relations
186 | :param str relation: Optional: the type of the relation, e.g. "has_ingredient"
187 | :param str to_rxcui: An optional second rxcui, to return all relations
188 | between the two given rxcuis. Ignored if `relation` is present.
189 | :returns: A set of tuples, where tuples are (rxcui, rela)
190 | """
191 | if rxcui is None:
192 | return None
193 |
194 | found = set()
195 | if relation is not None:
196 | sql = "SELECT rxcui1, rela FROM rxnrel WHERE rxcui2 = ? AND rela = ?"
197 | for res in self.sqlite.execute(sql, (rxcui, relation)):
198 | found.add(res)
199 | elif to_rxcui is not None:
200 | sql = "SELECT rxcui1, rela FROM rxnrel WHERE rxcui2 = ? AND rxcui1 = ?"
201 | for res in self.sqlite.execute(sql, (rxcui, to_rxcui)):
202 | found.add(res)
203 | else:
204 | sql = "SELECT rxcui1, rela FROM rxnrel WHERE rxcui2 = ?"
205 | for res in self.sqlite.execute(sql, (rxcui,)):
206 | found.add(res)
207 |
208 | return found
209 |
210 |
211 | # MARK: - RxCUI
212 |
213 | def rxcui_for_ndc(self, ndc):
214 | """ Find the RXCUI for the given NDC from our NDC-cache-table.
215 |
216 | This method only does exact lookup for now, it should be extended to
217 | use normalized NDC formats.
218 |
219 | :param str ndc: The NDC to look up
220 | :returns: The matching RXCUI as string, or None
221 | """
222 | if ndc is None:
223 | return None
224 | # TODO: ensure NDC normalization
225 |
226 | rxcuis = {}
227 | sql = "SELECT RXCUI FROM NDC WHERE NDC = ?"
228 | for res in self.sqlite.execute(sql, (ndc,)):
229 | rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1
230 |
231 | rxcui = list(rxcuis.keys())[0] if len(rxcuis) > 0 else None
232 | if len(rxcuis) > 1:
233 | popular = OrderedDict(Counter(rxcuis).most_common())
234 | rxcui = popular.popitem(False)[0]
235 |
236 | return str(rxcui) if rxcui is not None else None
237 |
238 | def ndc_for_rxcui(self, rxcui):
239 | """ Find the NDC from our NDC-cache-table for the given RXCUI.
240 | """
241 | if rxcui is None:
242 | return None
243 |
244 | sql = 'SELECT distinct ndc FROM ndc WHERE rxcui = ?'
245 | return [res[0] for res in self.sqlite.execute(sql, (rxcui,))]
246 |
247 | def rxcui_for_name(self, name, limit_tty=None):
248 | """ Tries to find an RXCUI for the concept name.
249 |
250 | Does this by performing a "starts with" against the STR column on
251 | RXNCONSO, then replaces any spaces with wildcards and finally chops off
252 | one word after the other until a match is found.
253 |
254 | This works but is slow and far from perfect. RxNav's ``approxMatch`` is
255 | definitely better, you can use ``rxcui_for_name_approx`` to get an
256 | RXCUI using that service.
257 |
258 | :param str name: The name to get an RXCUI for
259 | :param list limit_tty: Optional: limit search to a given list of TTYs
260 | :returns: The best matching rxcui, if any, as string
261 | """
262 | if name is None:
263 | return None
264 |
265 | rxcuis = {}
266 | lim = 'tty IN ("{}") AND'.format('","'.join(limit_tty)) if limit_tty else ''
267 | sql = 'SELECT rxcui, tty FROM rxnconso WHERE {} str LIKE ?'.format(lim)
268 |
269 | # try the full string, allowing wildcard at the trailing end
270 | for res in self.sqlite.execute(sql, (name + '%',)):
271 | rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1
272 |
273 | # nothing yet, replace spaces with '%'
274 | for res in self.sqlite.execute(sql, (name.replace(' ', '%') + '%',)):
275 | rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1
276 |
277 | # still nothing, try chopping off parts from the right
278 | if 0 == len(rxcuis):
279 | parts = name.split()
280 | for x in range(len(parts) - 1):
281 | comp = '%'.join(parts[:-(x+1)])
282 | for res in self.sqlite.execute(sql, (comp + '%',)):
283 | rxcuis[res[0]] = rxcuis.get(res[0], 0) + 1
284 | if len(rxcuis) > 0:
285 | break
286 |
287 | rxcui = list(rxcuis.keys())[0] if len(rxcuis) > 0 else None
288 | if len(rxcuis) > 1:
289 | popular = OrderedDict(Counter(rxcuis).most_common())
290 | rxcui = popular.popitem(False)[0]
291 |
292 | return str(rxcui) if rxcui is not None else None
293 |
294 | def rxcui_for_name_approx(self, name):
295 | """ Returns the best ``approxMatch`` RXCUI as found when using RxNav's
296 | service against the provided name. Runs synchronously.
297 |
298 | :param str name: The name to get an RXCUI for
299 | :returns: The top ranked rxcui, if any, as string
300 | """
301 | matches = self.rxnav_approx_match(name, nmax=1)
302 | return str(matches[0]) if matches is not None and len(matches) > 0 else None
303 |
304 | def rxnav_approx_match(self, name, nmax=10):
305 | """ Returns the top #nmax ``approximateTerm`` rxcuis as found when using
306 | RxNav's service against the provided name. Runs synchronously.
307 |
308 | :param str name: The name to get an RXCUI for
309 | :param int nmax: The maximum number of unique rxcuis to return, 10 by
310 | default
311 | :returns: The top ranked rxcuis, if any, as a list
312 | """
313 | if name is None:
314 | return None
315 |
316 | url = 'http://rxnav.nlm.nih.gov/REST/approximateTerm'
317 | r = requests.get(url, params={'term': name, 'option': 1}) # we don't use `maxEntries` as duplicate rxcuis count separately
318 | root = ET.fromstring(r.text)
319 | candidates = root.findall('.//candidate')
320 | rxcuis = []
321 | for cand in candidates:
322 | rxcui = cand.find('rxcui')
323 | if rxcui is not None and rxcui.text is not None:
324 | #rank = cand.find('rank') # rely on RxNav's order for now
325 | if rxcui.text not in rxcuis:
326 | rxcuis.append(rxcui.text)
327 |
328 | # stop after nmax
329 | if nmax is not None and len(rxcuis) >= nmax:
330 | break
331 |
332 | return rxcuis
333 |
334 |
335 | # MARK: - Drug Class OBSOLETE, WILL BE GONE
336 |
337 | def can_cache(self):
338 | return self.sqlite.hasTable('va_cache')
339 |
340 | def prepare_to_cache_classes(self):
341 | if self.sqlite.create('va_cache', '(rxcui primary key, va varchar)'):
342 | self.cache_drug_class = True
343 |
344 | def va_drug_class(self, rxcui):
345 | """ Returns a list of VA class names for a given RXCUI. EXPERIMENTAL.
346 | """
347 | #if not self.cache_drug_class:
348 | # return None
349 | if rxcui is None:
350 | return None
351 |
352 | # check dedicated dable
353 | sql = 'SELECT va FROM va_cache WHERE rxcui = ?'
354 | res = self.sqlite.executeOne(sql, (rxcui,))
355 | return res[0].split('|') if res else None
356 |
357 | def friendly_class_format(self, va_name):
358 | """ Tries to reformat the VA drug class name so it's suitable for
359 | display.
360 | """
361 | if va_name is None or 0 == len(va_name):
362 | return None
363 |
364 | # remove identifier
365 | if ']' in va_name:
366 | va_name = va_name[va_name.index(']')+1:]
367 | va_name = va_name.strip()
368 |
369 | # remove appended specificiers
370 | if ',' in va_name and va_name.index(',') > 2:
371 | va_name = va_name[0:va_name.index(',')]
372 |
373 | if '/' in va_name and va_name.index('/') > 2:
374 | va_name = va_name[0:va_name.index('/')]
375 |
376 | # capitalize nicely
377 | va_name = va_name.lower();
378 | va_name = re.sub(r'(^| )(\w)', lambda match: r'{}{}'.format(match.group(1), match.group(2).upper()), va_name)
379 |
380 | return va_name
381 |
382 |
383 | # MARK: - Bare Metal
384 |
385 | def execute(self, sql, params=()):
386 | """ Execute and return the pointer of an SQLite execute() query. """
387 | return self.sqlite.execute(sql, params)
388 |
389 | def fetchOne(self, sql, params=()):
390 | """ Execute and return the result of fetchone() on a raw SQL query. """
391 | return self.sqlite.execute(sql, params).fetchone()
392 |
393 | def fetchAll(self, sql, params=()):
394 | """ Execute and return the result of fetchall() on a raw SQL query. """
395 | return self.sqlite.execute(sql, params).fetchall()
396 |
397 |
398 | class RxNormCUI (GraphableObject):
399 | rxcui = None
400 | _ttys = None
401 | relations = None
402 | rxlookup = RxNormLookup()
403 |
404 | def __init__(self, rxcui, label=None):
405 | super().__init__(rxcui, rxcui)
406 | self.shape = 'box'
407 | self.rxcui = rxcui
408 |
409 | @property
410 | def ttys(self):
411 | return self._ttys
412 |
413 | @ttys.setter
414 | def ttys(self, val):
415 | self._ttys = val
416 | self.update_shape_from_ttys()
417 |
418 |
419 | def find_relations(self, to_rxcui=None, max_width=10):
420 | counted = {}
421 | for rxcui, rela in self.rxlookup.lookup_related(self.rxcui, None, to_rxcui):
422 | if rela in counted:
423 | counted[rela].append(rxcui)
424 | else:
425 | counted[rela] = [rxcui]
426 |
427 | found = []
428 | for rela, items in sorted(counted.items()): # sort to generate mostly consistent dot files
429 | if len(items) > max_width:
430 | proxy = GraphableObject(None, rela)
431 | rel = GraphableRelation(self, str(len(items)), proxy)
432 |
433 | if self.announced_via: # if our announcer is here, be nice and link back
434 | for rxcui in items:
435 | if rxcui == self.announced_via.rxcui1.rxcui:
436 | via = RxNormCUI(rxcui)
437 | found.append(RxNormConceptRelation(self, rela, via))
438 | else:
439 | for rxcui in sorted(items): # sort to generate mostly consistent dot files
440 | obj = RxNormCUI(rxcui)
441 | rel = RxNormConceptRelation(self, rela, obj)
442 | found.append(rel)
443 |
444 | return found
445 |
446 |
447 | def deliver_to(self, dot_context, is_leaf):
448 | self.update_self_from_rxcui()
449 | super().deliver_to(dot_context, is_leaf)
450 |
451 | # if we are a leaf, still fetch the relation going back to our announcer
452 | if is_leaf:
453 | if self.relations is None and self.announced_via:
454 | rela = self.find_relations(
455 | to_rxcui=self.announced_via.rxcui1.rxcui,
456 | max_width=dot_context.max_width
457 | )
458 | if rela:
459 | rela[0].announce_to(dot_context)
460 | else:
461 | if self.relations is None:
462 | self.relations = self.find_relations(max_width=dot_context.max_width)
463 |
464 | for rel in self.relations:
465 | rel.announce_to(dot_context)
466 |
467 |
468 | def update_self_from_rxcui(self):
469 | if self.rxcui:
470 | ret = self.rxlookup.lookup_rxcui(self.rxcui, preferred=False)
471 | if ret is not None and len(ret) > 1 and len(ret[1]) > 0:
472 | pref = ret[0]
473 | found = ret[1]
474 | self.ttys = set([res[1] for res in found])
475 | self.label = _splitted_string(pref if pref else found[0][0])
476 | self.label += "\n[{} - {}]".format(self.rxcui, ', '.join(sorted(self._ttys)))
477 |
478 | vas = self.rxlookup.va_drug_class(self.rxcui)
479 | if vas:
480 | self.style = 'bold'
481 | self.color = 'violet'
482 | self.label += "\n{}".format(_splitted_string(', '.join(vas)))
483 |
484 | def update_shape_from_ttys(self):
485 | if self._ttys:
486 | if 'BD' in self._ttys or 'BN' in self._ttys:
487 | self.style = 'bold'
488 | elif 'SBD' in [tty[:3] for tty in self._ttys]:
489 | self.shape = 'box,peripheries=2'
490 | elif 'MIN' in self._ttys:
491 | self.shape = 'polygon,sides=5,peripheries=2'
492 | elif 'IN' in self._ttys or 'PIN' in self._ttys:
493 | self.shape = 'polygon,sides=5'
494 |
495 | class RxNormConceptRelation (GraphableRelation):
496 | rxcui1 = None
497 | rxcui2 = None
498 |
499 | def __init__(self, rxcuiobj1, rela, rxcuiobj2):
500 | super().__init__(rxcuiobj1, rela, rxcuiobj2)
501 | self.rxcui1 = rxcuiobj1
502 | self.rxcui2 = rxcuiobj2
503 |
504 | if 'isa' == rela[-3:]:
505 | self.style = 'dashed'
506 |
507 |
508 | def _splitted_string(string, maxlen=60):
509 | if len(string) > maxlen:
510 | at = 0
511 | newstr = ''
512 | for word in string.split():
513 | if at > maxlen:
514 | newstr += "\n"
515 | at = 0
516 | if at > 0:
517 | newstr += ' '
518 | at += 1
519 | newstr += word
520 | at += len(word)
521 | return newstr
522 | return string
523 |
524 |
525 | # running this as a script does the database setup/check
526 | if '__main__' == __name__:
527 | RxNorm.check_database()
528 |
529 | import sys
530 | rxcuis = sys.argv[1:] if len(sys.argv) > 1 else None
531 | if rxcuis is None:
532 | print('x> Provide RXCUIs as arguments on the command line')
533 | sys.exit(0)
534 |
535 | look = RxNormLookup()
536 | for rxcui in rxcuis:
537 | print('-----')
538 | meaning = look.lookup_rxcui_name(rxcui, preferred=False)
539 | ttys = look.lookup_tty(rxcui)
540 | related = look.lookup_related(rxcui)
541 |
542 | print('RxCUI "{0}": {1}'.format(rxcui, meaning))
543 | print('Concept type "{0}": {1}'.format(rxcui, ', '.join(ttys)))
544 | print('Relationships "{0}":'.format(rxcui))
545 | for rrxcui, rrela in sorted(related, key=lambda x: x[1]):
546 | rname, rtty, a, b = look.lookup_rxcui(rrxcui)
547 | sp1 = ''.join([' ' for i in range(17+len(rxcui)-len(rrela))])
548 | sp2 = ''.join([' ' for i in range(9-len(rrxcui))])
549 | sp3 = ''.join([' ' for i in range(6-len(rtty))])
550 | print('{}{}:{}{}{}{} {}'.format(sp1, rrela, sp2, rrxcui, sp3, rtty, rname))
551 |
--------------------------------------------------------------------------------