├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── dist └── umls-graph-0.0.2.tar.gz ├── docs ├── README.md └── images │ ├── umls-mysql-import.png │ ├── umls-mysql.png │ └── umls-neo4j.png ├── setup.cfg ├── setup.py ├── src ├── umls_graph.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ ├── requires.txt │ └── top_level.txt └── umls_graph │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── dataset.cpython-36.pyc │ ├── umls_ambiglui.cpython-36.pyc │ ├── umls_ambigsui.cpython-36.pyc │ ├── umls_mrconso.cpython-36.pyc │ ├── umls_mrconso_concept.cpython-36.pyc │ ├── umls_mrconso_string.cpython-36.pyc │ ├── umls_mrconso_term.cpython-36.pyc │ ├── umls_mrdef.cpython-36.pyc │ ├── umls_mrrel.cpython-36.pyc │ ├── umls_mrsat.cpython-36.pyc │ ├── umls_mrsmap.cpython-36.pyc │ ├── umls_mrsty.cpython-36.pyc │ └── umls_srdef.cpython-36.pyc │ ├── dataset.py │ ├── umls_ambiglui.py │ ├── umls_ambigsui.py │ ├── umls_mrconso.py │ ├── umls_mrconso_concept.py │ ├── umls_mrconso_string.py │ ├── umls_mrconso_term.py │ ├── umls_mrdef.py │ ├── umls_mrrel.py │ ├── umls_mrsat.py │ ├── umls_mrsmap.py │ ├── umls_mrsty.py │ └── umls_srdef.py └── tests └── __init__.py /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 The Python Packaging Authority (PyPA) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pyproject.toml 2 | 3 | # Include the README 4 | include *.md 5 | 6 | # Include the license file 7 | include LICENSE.txt 8 | 9 | # Include setup.py 10 | include setup.py 11 | 12 | # Include the data files 13 | # recursive-include data * 14 | # recursive-include examples * 15 | recursive-include src * 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UMLS-Graph 2 | 3 | Build a medical knowledge graph based on Unified Language Medical System (UMLS) 4 | 5 | ## Requisite 6 | 7 | Install MySQL Server 5.6 and import UMLS data into MySQL database. Please refer to [UMLS](https://www.nlm.nih.gov/research/umls/index.html) websites on how to install the UMLS database. 8 | 9 | ## Installation 10 | 11 | ```pip 12 | pip install umls-graph 13 | ``` 14 | 15 | ## Let Codes Speak 16 | 17 | ```python 18 | from umls_graph.dataset import make_umls_all 19 | 20 | # MySQL database information 21 | mysql_info = {} 22 | mysql_info["database"] = "umls" 23 | mysql_info["username"] = "root" 24 | mysql_info["password"] = "{not gonna tell you}" 25 | mysql_info["hostname"] = "localhost" 26 | 27 | # read all UMLS table and save them to csv formatted files in a specific folder 28 | make_umls_all(mysql_info=mysql_info,save_folder="umls_datasets") 29 | 30 | ``` 31 | 32 | ## License 33 | The `umls-graph` project is provided by [Donghua Chen](https://github.com/dhchenx/umls-graph). 34 | 35 | NOTE: This project DOES NOT provide the UMLS data download due to the license issue. In addition, the processed data are not verified in actual clinical use. Please be response for any UMLS data use. -------------------------------------------------------------------------------- /dist/umls-graph-0.0.2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/dist/umls-graph-0.0.2.tar.gz -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Import UMLS into UMKG 2 | 3 | This document illustrates principles and steps of importing core data structures of UMLS into the UMKG database. 4 | 5 | ## Step 1: Install the UMLS database 6 | 7 | First, we need to follow guideline of UMLS to setup MySQL 5.6 server for convenience of fetching UMLS concepts, strings, atoms and their relationships. 8 | 9 | The steps of loading UMLS knowledge sources: 10 | 1. Download UMLS's 2020AB release files in the following [link](https://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html). 11 | 2. Unzip the UMLS release files and then also unzip `mmsys.zip` into the current folder. 12 | 3. click `run64.bat` to run MetamorphoSys for customizing your UMLS subset and browse the details of UMLS concepts. 13 | 4. Then we turn to the subset folder exported by MetamorphoSys and check if there is mysql scripts created in the `META` folder. If so, we are ready to import such scripts and data into MySQL 5.6 server. 14 | 15 | The steps of setting up MySQL 5.6 for storing UMLS knowledge sources: 16 | 1. Download [MySQL 5.6](https://dev.mysql.com/downloads/mysql/5.6.html) from Oracle website and install MySQL server by default settings of MySQL installation. Then we get the MySQL 5.6 running. 17 | 2. For convenience of browsing data, we also install [MySQL WorkBench](https://dev.mysql.com/downloads/workbench/) to have GUI for operating MySQL database's data. 18 | 3. Before running this script file, we need to create a schema or database inside the MySQL database server. Turn to the `META` folder that contains mysql script file `populate_mysql_db.bat` and change MySQL login information (db user,db password, db name) inside the `bat` file. 19 | 4. Then, we run the `populate_mysql_db.bat` and wait for its completion. 20 | 5. Finally, we got entire datasets of UMLS stored in MySQL 5.6 server so later we can easily fetch necessary contents from MySql server. 21 | 22 | ![MySql for UMLS](images/umls-mysql.png) 23 | 24 | ## Step 2: Run scripts to obtain UMLS data structures 25 | 26 | Then we write Python scripts to obtain UMLS core structures and store the fetched data into CSV format of importing graph databases. 27 | 28 | In this case, we ignore the procedure of setting up Python environment, which can be easily found online. 29 | 30 | With proper settings of Python environment and the installed PyCharm, we can simply write Python scripts to obtain key information from the UMLS database running on MySQL server. 31 | 32 | Here is the example of accessing the UMLS database: 33 | 34 | ```python 35 | #!/usr/bin/python3 36 | import pymysql 37 | 38 | db = pymysql.connect("localhost", "[YOUR DB USERNAME]", "[YOUR DB PASSWORD]", "umls2020") 39 | cursor = db.cursor() 40 | sql = "SELECT * from mrconso limit 1,100" 41 | cursor.execute(sql) 42 | results = cursor.fetchall() 43 | 44 | # obtain a list of column names 45 | cols=cursor.description 46 | list_cols = [] 47 | for i in range(len(cols)): 48 | list_cols.append(cols[i][0]) 49 | print(list_cols) 50 | 51 | for row in results: 52 | # ID 53 | AU = row[list_cols.index('AUI')] 54 | # Relationships 55 | CUI = row[list_cols.index('CUI')] 56 | LUI = row[list_cols.index('LUI')] 57 | SUI = row[list_cols.index('SUI')] 58 | # Properties 59 | STR = row[list_cols.index('STR')] 60 | # print results 61 | print(CUI+"\t"+LUI+"\t"+SUI+"\t"+STR) 62 | # close connection 63 | db.close() 64 | ``` 65 | 66 | Because UMLS contains complicated data structures to represent concepts, terms, strings, atoms, and their relationships. For better understanding of these concepts and structures, I highly recommend you reading the [UMLS Reference Manual](https://www.ncbi.nlm.nih.gov/books/NBK9676/). 67 | 68 | Here follows key UMLS structures to obtain: 69 | 70 | ### (1) Concepts, Terms, Strings, and Atoms 71 | 72 | We use the `mmrconso` table to obtain following information. 73 | 74 | 1. we retrieve distinct CUIs for representing ```Concept``` nodes. 75 | 2. we retrieve distinct LUIs for representing ```Term``` nodes. 76 | 3. we retrieve distinct SUIs for representing ```String``` nodes. 77 | 4. we retrieve distinct AUIs and their properties for representing ```Atom``` nodes. 78 | 79 | Thus, we get the nodes CSV file. 80 | 81 | ### (2) Relationships between Concepts, Terms, Strings, and Atoms 82 | 83 | We use the `mrrel` table to obtain the above entities' relationships and properties of relationship. 84 | 85 | Source node ID is obtained from CUI1 or AUI1 fields in the table; 86 | Target node ID is obtained from CUI2 or AUI2 fields in the table. 87 | 88 | The we create relationship between CUI1/AUI1 and CUI2/AUI2 and attach the rest of properties of relationship into the created relationship. 89 | 90 | Thus, we get the relationship CSV file. 91 | 92 | ### (3) Mapping Relationships 93 | 94 | To do... 95 | 96 | ### (4) Definitions 97 | 98 | To do... 99 | 100 | ### (5) Attributes 101 | 102 | To do... 103 | 104 | After we follow the aforementioned steps to obtain necessary information, the we are ready for importing UMLS data into the Neo4j database. 105 | 106 | 107 | ## Step 3: Import UMLS data into UMKG 108 | 109 | After we prepare all datasets from UMLS, then we execute Neo4j-admin import command to batch import UMLS data into the UMKG dataset we created in Neo4j Desktop. 110 | 111 | The Neo4j-Admin import Command can be useful for importing large amount of graph data into the database. 112 | 113 | Please follow the below steps: 114 | 1. Move the created datasets in the `import` folder in the Neo4j database root folder. 115 | 2. Open Windows Command Program (cmd.exe) and change root folder to the database root folder, which the parent folder of `bin` folder containing the `neo4j-admin.bat` file. 116 | 3. Enter the example importing scripts as follows: 117 | 118 | ``` 119 | "bin/neo4j-admin.bat" import --nodes "import/umls_aui_nodes.csv" --nodes "import/umls_cui_nodes.csv" --nodes "import/umls_lui_nodes.csv" --nodes "import/umls_sui_nodes.csv" --relationships "import/umls_rels.csv" --database UMLS.db 120 | ``` 121 | 122 | In the above script, `--nodes` represents specifying location of node tables used, and `--relationships` represents specifying location of relationship tables. You can specify multiple nodes or relationships table for importing various types of nodes and relationships. 123 | 124 | ![MySql for UMLS](images/umls-mysql-import.png) 125 | 126 | Please look for more information from [Neo4j Manual](https://neo4j.com/graphacademy/training-intro-40/19-using-neo4j-admin-tool-import/). 127 | 128 | After we execute the script and anything is prepared correctly, then we can get the UMLS data into the Neo4j database. 129 | 130 | ## Step 4: Query test 131 | 132 | After we finish importing UMLS data, we can use simple query statements to examine whether the imported data are correctly stored and organized. 133 | 134 | ![MySql for UMLS](images/umls-neo4j.png) 135 | 136 | -------------------------------------------------------------------------------- /docs/images/umls-mysql-import.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/docs/images/umls-mysql-import.png -------------------------------------------------------------------------------- /docs/images/umls-mysql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/docs/images/umls-mysql.png -------------------------------------------------------------------------------- /docs/images/umls-neo4j.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/docs/images/umls-neo4j.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | # This includes the license file(s) in the wheel. 3 | # https://wheel.readthedocs.io/en/stable/user_guide.html#including-license-files-in-the-generated-wheel-file 4 | license_files = LICENSE.txt 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """A setuptools based setup module. 2 | 3 | Medical Knowledge Graph Based on Unified Medical Language System (UMLS) 4 | 5 | """ 6 | 7 | # Always prefer setuptools over distutils 8 | from setuptools import setup, find_packages 9 | import pathlib 10 | 11 | here = pathlib.Path(__file__).parent.resolve() 12 | 13 | # Get the long description from the README file 14 | long_description = (here / 'README.md').read_text(encoding='utf-8') 15 | 16 | # Arguments marked as "Required" below must be included for upload to PyPI. 17 | # Fields marked as "Optional" may be commented out. 18 | 19 | setup( 20 | # This is the name of your project. The first time you publish this 21 | # package, this name will be registered for you. It will determine how 22 | # users can install this project, e.g.: 23 | # 24 | # $ pip install sampleproject 25 | # 26 | # And where it will live on PyPI: https://pypi.org/project/sampleproject/ 27 | # 28 | # There are some restrictions on what makes a valid project name 29 | # specification here: 30 | # https://packaging.python.org/specifications/core-metadata/#name 31 | name='umls-graph', # Required 32 | 33 | # Versions should comply with PEP 440: 34 | # https://www.python.org/dev/peps/pep-0440/ 35 | # 36 | # For a discussion on single-sourcing the version across setup.py and the 37 | # project code, see 38 | # https://packaging.python.org/en/latest/single_source_version.html 39 | version='0.0.2', # Required 40 | 41 | # This is a one-line description or tagline of what your project does. This 42 | # corresponds to the "Summary" metadata field: 43 | # https://packaging.python.org/specifications/core-metadata/#summary 44 | description='Build medical knowledge graph based on Unified Medical Language System (UMLS)', # Optional 45 | 46 | # This is an optional longer description of your project that represents 47 | # the body of text which users will see when they visit PyPI. 48 | # 49 | # Often, this is the same as your README, so you can just read it in from 50 | # that file directly (as we have already done above) 51 | # 52 | # This field corresponds to the "Description" metadata field: 53 | # https://packaging.python.org/specifications/core-metadata/#description-optional 54 | long_description=long_description, # Optional 55 | 56 | # Denotes that our long_description is in Markdown; valid values are 57 | # text/plain, text/x-rst, and text/markdown 58 | # 59 | # Optional if long_description is written in reStructuredText (rst) but 60 | # required for plain-text or Markdown; if unspecified, "applications should 61 | # attempt to render [the long_description] as text/x-rst; charset=UTF-8 and 62 | # fall back to text/plain if it is not valid rst" (see link below) 63 | # 64 | # This field corresponds to the "Description-Content-Type" metadata field: 65 | # https://packaging.python.org/specifications/core-metadata/#description-content-type-optional 66 | long_description_content_type='text/markdown', # Optional (see note above) 67 | 68 | # This should be a valid link to your project's main homepage. 69 | # 70 | # This field corresponds to the "Home-Page" metadata field: 71 | # https://packaging.python.org/specifications/core-metadata/#home-page-optional 72 | url='https://github.com/dhchenx/umls-graph', # Optional 73 | 74 | # This should be your name or the name of the organization which owns the 75 | # project. 76 | author='Donghua Chen', # Optional 77 | 78 | # This should be a valid email address corresponding to the author listed 79 | # above. 80 | author_email='douglaschan@126.com', # Optional 81 | 82 | # Classifiers help users find your project by categorizing it. 83 | # 84 | # For a list of valid classifiers, see https://pypi.org/classifiers/ 85 | classifiers=[ # Optional 86 | # How mature is this project? Common values are 87 | # 3 - Alpha 88 | # 4 - Beta 89 | # 5 - Production/Stable 90 | 'Development Status :: 3 - Alpha', 91 | 92 | # Indicate who your project is intended for 93 | 'Intended Audience :: Developers', 94 | 'Topic :: Software Development :: Build Tools', 95 | 96 | # Pick your license as you wish 97 | 'License :: OSI Approved :: MIT License', 98 | 99 | # Specify the Python versions you support here. In particular, ensure 100 | # that you indicate you support Python 3. These classifiers are *not* 101 | # checked by 'pip install'. See instead 'python_requires' below. 102 | 'Programming Language :: Python :: 3', 103 | 'Programming Language :: Python :: 3.6', 104 | 'Programming Language :: Python :: 3.7', 105 | 'Programming Language :: Python :: 3.8', 106 | 'Programming Language :: Python :: 3.9', 107 | 'Programming Language :: Python :: 3 :: Only', 108 | ], 109 | 110 | # This field adds keywords for your project which will appear on the 111 | # project page. What does your project relate to? 112 | # 113 | # Note that this is a list of additional keywords, separated 114 | # by commas, to be used to assist searching for the distribution in a 115 | # larger catalog. 116 | keywords="unified medical language system, UMLS, knowledge graph, medical knowledge, medical concept, neo4j", # Optional 117 | 118 | # When your source code is in a subdirectory under the project root, e.g. 119 | # `src/`, it is necessary to specify the `package_dir` argument. 120 | package_dir={'': 'src'}, # Optional 121 | 122 | # You can just specify package directories manually here if your project is 123 | # simple. Or you can use find_packages(). 124 | # 125 | # Alternatively, if you just want to distribute a single Python file, use 126 | # the `py_modules` argument instead as follows, which will expect a file 127 | # called `my_module.py` to exist: 128 | # 129 | # py_modules=["my_module"], 130 | # 131 | packages=find_packages(where='src'), # Required 132 | 133 | # Specify which Python versions you support. In contrast to the 134 | # 'Programming Language' classifiers above, 'pip install' will check this 135 | # and refuse to install the project if the version does not match. See 136 | # https://packaging.python.org/guides/distributing-packages-using-setuptools/#python-requires 137 | python_requires='>=3.6, <4', 138 | 139 | # This field lists other packages that your project depends on to run. 140 | # Any package you put here will be installed by pip when your project is 141 | # installed, so they must be valid existing projects. 142 | # 143 | # For an analysis of "install_requires" vs pip's requirements files see: 144 | # https://packaging.python.org/en/latest/requirements.html 145 | install_requires=[ 146 | "pymysql", 147 | # "bs4" 148 | ], # Optional 149 | 150 | # List additional groups of dependencies here (e.g. development 151 | # dependencies). Users will be able to install these using the "extras" 152 | # syntax, for example: 153 | # 154 | # $ pip install sampleproject[dev] 155 | # 156 | # Similar to `install_requires` above, these must be valid existing 157 | # projects. 158 | extras_require={ # Optional 159 | 'dev': ['check-manifest'], 160 | 'test': ['coverage'], 161 | }, 162 | 163 | # If there are data files included in your packages that need to be 164 | # installed, specify them here. 165 | include_package_data=True, 166 | package_data={ # Optional 167 | #'sample': ['package_data.dat'], 168 | }, 169 | 170 | # Although 'package_data' is the preferred approach, in some case you may 171 | # need to place data files outside of your packages. See: 172 | # http://docs.python.org/distutils/setupscript.html#installing-additional-files 173 | # 174 | # In this case, 'data_file' will be installed into '/my_data' 175 | data_files=[ 176 | # ('speech-test-data', ['data/english.wav']) 177 | ], # Optional 178 | 179 | # To provide executable scripts, use entry points in preference to the 180 | # "scripts" keyword. Entry points provide cross-platform support and allow 181 | # `pip` to create the appropriate form of executable for the target 182 | # platform. 183 | # 184 | # For example, the following would provide a command called `sample` which 185 | # executes the function `main` from this package when invoked: 186 | entry_points={ # Optional 187 | #'console_scripts': [ 188 | # 'sample=sample:main', 189 | #], 190 | }, 191 | 192 | # List additional URLs that are relevant to your project as a dict. 193 | # 194 | # This field corresponds to the "Project-URL" metadata fields: 195 | # https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use 196 | # 197 | # Examples listed include a pattern for specifying where the package tracks 198 | # issues, where the source is hosted, where to say thanks to the package 199 | # maintainers, and where to support the project financially. The key is 200 | # what's used to render the link text on PyPI. 201 | 202 | project_urls={ # Optional 203 | 'Bug Reports': 'https://github.com/dhchenx/umls-graph/issues' 204 | }, 205 | 206 | ) 207 | 208 | -------------------------------------------------------------------------------- /src/umls_graph.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: umls-graph 3 | Version: 0.0.2 4 | Summary: Build medical knowledge graph based on Unified Medical Language System (UMLS) 5 | Home-page: https://github.com/dhchenx/umls-graph 6 | Author: Donghua Chen 7 | Author-email: douglaschan@126.com 8 | License: UNKNOWN 9 | Project-URL: Bug Reports, https://github.com/dhchenx/umls-graph/issues 10 | Keywords: unified medical language system,UMLS,knowledge graph,medical knowledge,medical concept,neo4j 11 | Platform: UNKNOWN 12 | Classifier: Development Status :: 3 - Alpha 13 | Classifier: Intended Audience :: Developers 14 | Classifier: Topic :: Software Development :: Build Tools 15 | Classifier: License :: OSI Approved :: MIT License 16 | Classifier: Programming Language :: Python :: 3 17 | Classifier: Programming Language :: Python :: 3.6 18 | Classifier: Programming Language :: Python :: 3.7 19 | Classifier: Programming Language :: Python :: 3.8 20 | Classifier: Programming Language :: Python :: 3.9 21 | Classifier: Programming Language :: Python :: 3 :: Only 22 | Requires-Python: >=3.6, <4 23 | Description-Content-Type: text/markdown 24 | Provides-Extra: dev 25 | Provides-Extra: test 26 | License-File: LICENSE.txt 27 | 28 | # UMLS-Graph 29 | 30 | Build a medical knowledge graph based on Unified Language Medical System (UMLS) 31 | 32 | ## Requisite 33 | 34 | Install MySQL Server 5.6 and import UMLS data into MySQL database. Please refer to [UMLS](https://www.nlm.nih.gov/research/umls/index.html) websites on how to install the UMLS database. 35 | 36 | ## Installation 37 | 38 | ```pip 39 | pip install umls-graph 40 | ``` 41 | 42 | ## Let Codes Speak 43 | 44 | ```python 45 | from umls_graph.dataset import make_umls_all 46 | 47 | # MySQL database information 48 | mysql_info = {} 49 | mysql_info["database"] = "umls" 50 | mysql_info["username"] = "root" 51 | mysql_info["password"] = "{not gonna tell you}" 52 | mysql_info["hostname"] = "localhost" 53 | 54 | # read all UMLS table and save them to csv formatted files in a specific folder 55 | make_umls_all(mysql_info=mysql_info,save_folder="umls_datasets") 56 | 57 | ``` 58 | 59 | ## License 60 | The `umls-graph` project is provided by [Donghua Chen](https://github.com/dhchenx/umls-graph). 61 | 62 | NOTE: This project DOES NOT provide the UMLS data download due to the license issue. In addition, the processed data are not verified in actual clinical use. Please be response for any UMLS data use. 63 | 64 | -------------------------------------------------------------------------------- /src/umls_graph.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | LICENSE.txt 2 | MANIFEST.in 3 | README.md 4 | setup.cfg 5 | setup.py 6 | src/umls_graph/__init__.py 7 | src/umls_graph/dataset.py 8 | src/umls_graph/umls_ambiglui.py 9 | src/umls_graph/umls_ambigsui.py 10 | src/umls_graph/umls_mrconso.py 11 | src/umls_graph/umls_mrconso_concept.py 12 | src/umls_graph/umls_mrconso_string.py 13 | src/umls_graph/umls_mrconso_term.py 14 | src/umls_graph/umls_mrdef.py 15 | src/umls_graph/umls_mrrel.py 16 | src/umls_graph/umls_mrsat.py 17 | src/umls_graph/umls_mrsmap.py 18 | src/umls_graph/umls_mrsty.py 19 | src/umls_graph/umls_srdef.py 20 | src/umls_graph.egg-info/PKG-INFO 21 | src/umls_graph.egg-info/SOURCES.txt 22 | src/umls_graph.egg-info/dependency_links.txt 23 | src/umls_graph.egg-info/requires.txt 24 | src/umls_graph.egg-info/top_level.txt 25 | src/umls_graph/__pycache__/__init__.cpython-36.pyc 26 | src/umls_graph/__pycache__/dataset.cpython-36.pyc 27 | src/umls_graph/__pycache__/umls_ambiglui.cpython-36.pyc 28 | src/umls_graph/__pycache__/umls_ambigsui.cpython-36.pyc 29 | src/umls_graph/__pycache__/umls_mrconso.cpython-36.pyc 30 | src/umls_graph/__pycache__/umls_mrconso_concept.cpython-36.pyc 31 | src/umls_graph/__pycache__/umls_mrconso_string.cpython-36.pyc 32 | src/umls_graph/__pycache__/umls_mrconso_term.cpython-36.pyc 33 | src/umls_graph/__pycache__/umls_mrdef.cpython-36.pyc 34 | src/umls_graph/__pycache__/umls_mrrel.cpython-36.pyc 35 | src/umls_graph/__pycache__/umls_mrsat.cpython-36.pyc 36 | src/umls_graph/__pycache__/umls_mrsmap.cpython-36.pyc 37 | src/umls_graph/__pycache__/umls_mrsty.cpython-36.pyc 38 | src/umls_graph/__pycache__/umls_srdef.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/umls_graph.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | pymysql 2 | 3 | [dev] 4 | check-manifest 5 | 6 | [test] 7 | coverage 8 | -------------------------------------------------------------------------------- /src/umls_graph.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | umls_graph 2 | -------------------------------------------------------------------------------- /src/umls_graph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__init__.py -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/dataset.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_ambiglui.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_ambiglui.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_ambigsui.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_ambigsui.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_mrconso.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_mrconso.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_mrconso_concept.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_mrconso_concept.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_mrconso_string.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_mrconso_string.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_mrconso_term.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_mrconso_term.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_mrdef.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_mrdef.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_mrrel.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_mrrel.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_mrsat.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_mrsat.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_mrsmap.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_mrsmap.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_mrsty.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_mrsty.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/__pycache__/umls_srdef.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/src/umls_graph/__pycache__/umls_srdef.cpython-36.pyc -------------------------------------------------------------------------------- /src/umls_graph/dataset.py: -------------------------------------------------------------------------------- 1 | from umls_graph.umls_ambiglui import * 2 | from umls_graph.umls_ambigsui import * 3 | from umls_graph.umls_mrconso import * 4 | from umls_graph.umls_mrconso_concept import * 5 | from umls_graph.umls_mrconso_string import * 6 | from umls_graph.umls_mrconso_term import * 7 | from umls_graph.umls_mrdef import * 8 | from umls_graph.umls_mrrel import * 9 | from umls_graph.umls_mrsat import * 10 | from umls_graph.umls_mrsmap import * 11 | from umls_graph.umls_mrsty import * 12 | from umls_graph.umls_srdef import * 13 | 14 | def make_umls_all(save_folder,mysql_info=None): 15 | if mysql_info==None: 16 | mysql_info = {} 17 | mysql_info["database"] = "umls" 18 | mysql_info["username"] = "root" 19 | mysql_info["password"] = "123456" 20 | mysql_info["hostname"] = "localhost" 21 | 22 | make_umls_ambiglui(mysql_info,save_folder=save_folder) 23 | make_umls_ambigsui(mysql_info, save_folder=save_folder) 24 | make_umls_mrconso(mysql_info, save_folder=save_folder) 25 | make_umls_mrconso_concept(mysql_info, save_folder=save_folder) 26 | make_umls_mrconso_string(mysql_info, save_folder=save_folder) 27 | make_umls_mrconso_term(mysql_info, save_folder=save_folder) 28 | make_umls_mrdef(mysql_info,save_folder=save_folder) 29 | make_umls_mrrel(mysql_info, save_folder=save_folder) 30 | make_umls_mrsat(mysql_info, save_folder=save_folder) 31 | make_umls_mrsmap(mysql_info, save_folder=save_folder) 32 | make_umls_mrsty(mysql_info, save_folder=save_folder) 33 | make_umls_srdef(mysql_info, save_folder=save_folder) 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/umls_graph/umls_ambiglui.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import pymysql 3 | import os 4 | 5 | def make_umls_ambiglui(mysql_info,save_path="umls_ambig_lui.csv",save_folder=""): 6 | 7 | if save_folder!="": 8 | save_path=os.path.join(save_folder,save_path) 9 | 10 | if os.path.exists(save_path): 11 | print(f"{save_path} exists!") 12 | return 13 | 14 | # open db connection 15 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 16 | 17 | cursor = db.cursor() 18 | 19 | sql = "SELECT * from ambiglui" 20 | sql_count = "SELECT count(*) from ambiglui" 21 | 22 | # write file 23 | header = ":START_ID,:END_ID,:TYPE,LUI,CUI\n" 24 | 25 | fo = open(save_path, 'w', encoding='utf-8') 26 | 27 | cursor.execute(sql_count) 28 | count = cursor.fetchone()[0] 29 | batch_size = 20 * 10000 # whatever 30 | 31 | counter = 0 32 | fo.write(header) 33 | for offset in range(0, count, batch_size): 34 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 35 | 36 | # obtain a list of column names 37 | cols = cursor.description 38 | list_cols = [] 39 | for i in range(len(cols)): 40 | list_cols.append(cols[i][0]) 41 | # print(list_cols) 42 | 43 | for row in cursor: 44 | # ID 45 | CUI = row[list_cols.index('CUI')] 46 | LUI = row[list_cols.index('LUI')] 47 | 48 | # print results 49 | line = str(LUI) + "," + str(CUI) + ",\"" + str("AMBIGLUI") + "\",\"" + LUI + "\",\"" + CUI + "\"" 50 | # print(line) 51 | fo.write(line + "\n") 52 | counter = counter + 1 53 | print('counter = ' + str(counter)) 54 | 55 | fo.close() 56 | 57 | # close connection 58 | db.close() 59 | 60 | if __name__=="__main__": 61 | mysql_info = {} 62 | mysql_info["database"] = "umls" 63 | mysql_info["username"] = "root" 64 | mysql_info["password"] = "123456" 65 | mysql_info["hostname"] = "localhost" 66 | make_umls_ambiglui(mysql_info,"../../examples/umls_datasets/umls_ambig_lui.csv") -------------------------------------------------------------------------------- /src/umls_graph/umls_ambigsui.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | 6 | def make_umls_ambigsui(mysql_info,save_path="umls_ambig_sui.csv",save_folder=""): 7 | if save_folder!="": 8 | save_path=os.path.join(save_folder,save_path) 9 | if os.path.exists(save_path): 10 | print(f"{save_path} exists!") 11 | return 12 | # open db connection 13 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 14 | 15 | cursor = db.cursor() 16 | 17 | sql = "SELECT * from ambigsui" 18 | sql_count = "SELECT count(*) from ambigsui" 19 | 20 | # write file 21 | header = ":START_ID,:END_ID,:TYPE,SUI,CUI\n" 22 | 23 | fo = open(save_path, 'w', encoding='utf-8') 24 | 25 | cursor.execute(sql_count) 26 | count = cursor.fetchone()[0] 27 | batch_size = 20 * 10000 # whatever 28 | 29 | counter = 0 30 | fo.write(header) 31 | for offset in range(0, count, batch_size): 32 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 33 | 34 | # obtain a list of column names 35 | cols = cursor.description 36 | list_cols = [] 37 | for i in range(len(cols)): 38 | list_cols.append(cols[i][0]) 39 | # print(list_cols) 40 | 41 | for row in cursor: 42 | # ID 43 | CUI = row[list_cols.index('CUI')] 44 | SUI = row[list_cols.index('SUI')] 45 | 46 | # print results 47 | line = str(SUI) + "," + str(CUI) + ",\"" + str("AMBIGSUI") + "\",\"" + SUI + "\",\"" + CUI + "\"" 48 | # print(line) 49 | fo.write(line + "\n") 50 | counter = counter + 1 51 | print('counter = ' + str(counter)) 52 | 53 | fo.close() 54 | 55 | # close connection 56 | db.close() 57 | 58 | if __name__=="__main__": 59 | mysql_info = {} 60 | mysql_info["database"] = "umls" 61 | mysql_info["username"] = "root" 62 | mysql_info["password"] = "123456" 63 | mysql_info["hostname"] = "localhost" 64 | make_umls_ambigsui(mysql_info,"../../examples/umls_datasets/umls_ambig_sui.csv") 65 | -------------------------------------------------------------------------------- /src/umls_graph/umls_mrconso.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | 6 | def make_umls_mrconso(mysql_info,save_path="umls_aui_nodes.csv",save_folder=""): 7 | if save_folder!="": 8 | save_path=os.path.join(save_folder,save_path) 9 | if os.path.exists(save_path): 10 | print(f"{save_path} exists!") 11 | return 12 | # open db connection 13 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 14 | 15 | cursor = db.cursor() 16 | 17 | sql = "SELECT * from mrconso" 18 | sql_count = "SELECT count(*) from mrconso" 19 | 20 | # write file 21 | header = ":ID,:LABEL,SAB,CUI,LUI,SUI,STR,AUI,TTY,CODE,SRL,SUPPRESS,CVF,SAUI,SCUI,SDUI,ISPREF,LAT,TS\n" 22 | 23 | fo = open(save_path, 'w', encoding='utf-8') 24 | 25 | cursor.execute(sql_count) 26 | count = cursor.fetchone()[0] 27 | batch_size = 10 * 10000 # whatever 28 | 29 | counter = 0 30 | fo.write(header) 31 | for offset in range(0, count, batch_size): 32 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 33 | 34 | # obtain a list of column names 35 | cols = cursor.description 36 | list_cols = [] 37 | for i in range(len(cols)): 38 | list_cols.append(cols[i][0]) 39 | print(list_cols) 40 | 41 | for row in cursor: 42 | # ID 43 | ID = row[list_cols.index('AUI')] 44 | # Label 45 | SAB = row[list_cols.index('SAB')] 46 | label = "Atom" 47 | 48 | # Properties 49 | CUI = row[list_cols.index('CUI')] 50 | LUI = row[list_cols.index('LUI')] 51 | AUI = row[list_cols.index('AUI')] 52 | SUI = row[list_cols.index('SUI')] 53 | TTY = row[list_cols.index('TTY')] 54 | CODE = row[list_cols.index('CODE')] 55 | SRL = row[list_cols.index('SRL')] 56 | SUPPRESS = row[list_cols.index('SUPPRESS')] 57 | CVF = row[list_cols.index('CVF')] 58 | SAUI = row[list_cols.index('SAUI')] 59 | SCUI = row[list_cols.index('SCUI')] 60 | SDUI = row[list_cols.index('SDUI')] 61 | ISPREF = row[list_cols.index('ISPREF')] 62 | LAT = row[list_cols.index('LAT')] 63 | TS = row[list_cols.index('TS')] 64 | 65 | if SAUI == None: 66 | SAUI = '' 67 | if SCUI == None: 68 | SCUI = '' 69 | if SDUI == None: 70 | SDUI = '' 71 | if TS == None: 72 | TS = "" 73 | if LAT == None: 74 | LAT = "" 75 | 76 | STR = row[list_cols.index('STR')].replace("\"", "'") 77 | 78 | # pring results 79 | line = ID + "," + label + "," + SAB + "," + CUI + "," + LUI + "," + SUI + ",\"" + STR + "\"," + AUI + ",\"" \ 80 | + TTY + "\",\"" + str(CODE) + "\",\"" + str(SRL) + "\",\"" + SUPPRESS + "\",\"" + str( 81 | CVF) + "\",\"" + str(SAUI) + "\",\"" + str(SCUI) + "\",\"" + str(SDUI) + "\",\"" + str( 82 | ISPREF) + "\",\"" + str(LAT) + "\",\"" + str(TS) + "\"" 83 | # print(line) 84 | fo.write(line + "\n") 85 | counter = counter + 1 86 | print('counter = ' + str(counter)) 87 | 88 | fo.close() 89 | 90 | # close connection 91 | db.close() -------------------------------------------------------------------------------- /src/umls_graph/umls_mrconso_concept.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | 6 | def make_umls_mrconso_concept(mysql_info,save_path="umls_cui_nodes.csv",save_folder=""): 7 | if save_folder!="": 8 | save_path=os.path.join(save_folder,save_path) 9 | if os.path.exists(save_path): 10 | print(f"{save_path} exists!") 11 | return 12 | # open db connection 13 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 14 | 15 | cursor = db.cursor() 16 | 17 | sql = "SELECT distinct CUI FROM mrconso" 18 | sql_count = "SELECT count(distinct CUI) FROM mrconso" 19 | 20 | # write file 21 | header = ":ID,:LABEL,CUI\n" 22 | 23 | fo = open(save_path, 'w', encoding='utf-8') 24 | 25 | cursor.execute(sql_count) 26 | count = cursor.fetchone()[0] 27 | 28 | print("total record count: " + str(count)) 29 | 30 | batch_size = 10 * 10000 # whatever 31 | 32 | counter = 0 33 | fo.write(header) 34 | for offset in range(0, count, batch_size): 35 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 36 | 37 | # obtain a list of column names 38 | cols = cursor.description 39 | list_cols = [] 40 | for i in range(len(cols)): 41 | list_cols.append(cols[i][0]) 42 | print(list_cols) 43 | 44 | for row in cursor: 45 | # ID 46 | CUI = row[list_cols.index('CUI')] 47 | # Label 48 | label = "Concept" 49 | 50 | # Properties 51 | 52 | # LAT = row[list_cols.index('LAT')] 53 | # TS = row[list_cols.index('TS')] 54 | 55 | # print results 56 | line = CUI + "," + label + "," + CUI 57 | # print(line) 58 | fo.write(line + "\n") 59 | counter = counter + 1 60 | print('counter = ' + str(counter)) 61 | 62 | fo.close() 63 | 64 | # close connection 65 | db.close() 66 | -------------------------------------------------------------------------------- /src/umls_graph/umls_mrconso_string.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | 6 | def make_umls_mrconso_string(mysql_info,save_path="umls_sui_nodes.csv",save_folder=""): 7 | if save_folder!="": 8 | save_path=os.path.join(save_folder,save_path) 9 | if os.path.exists(save_path): 10 | print(f"{save_path} exists!") 11 | return 12 | # open db connection 13 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 14 | 15 | cursor = db.cursor() 16 | 17 | sql = "SELECT distinct SUI FROM mrconso" 18 | sql_count = "SELECT count(distinct SUI) FROM mrconso" 19 | 20 | # write file 21 | header = ":ID,:LABEL,SUI\n" 22 | 23 | fo = open(save_path, 'w', encoding='utf-8') 24 | 25 | cursor.execute(sql_count) 26 | count = cursor.fetchone()[0] 27 | batch_size = 10 * 10000 # whatever 28 | 29 | print("total record count: " + str(count)) 30 | 31 | counter = 0 32 | fo.write(header) 33 | for offset in range(0, count, batch_size): 34 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 35 | 36 | # obtain a list of column names 37 | cols = cursor.description 38 | list_cols = [] 39 | for i in range(len(cols)): 40 | list_cols.append(cols[i][0]) 41 | print(list_cols) 42 | 43 | for row in cursor: 44 | # ID 45 | SUI = row[list_cols.index('SUI')] 46 | # Label 47 | label = "String" 48 | 49 | # Properties 50 | 51 | # LAT = row[list_cols.index('LAT')] 52 | # TS = row[list_cols.index('TS')] 53 | 54 | # print results 55 | line = SUI + "," + label + "," + SUI 56 | # print(line) 57 | fo.write(line + "\n") 58 | counter = counter + 1 59 | print('counter = ' + str(counter)) 60 | 61 | fo.close() 62 | 63 | # close connection 64 | db.close() 65 | 66 | 67 | -------------------------------------------------------------------------------- /src/umls_graph/umls_mrconso_term.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | 6 | def make_umls_mrconso_term(mysql_info,save_path="umls_lui_nodes.csv",save_folder=""): 7 | if save_folder!="": 8 | save_path=os.path.join(save_folder,save_path) 9 | if os.path.exists(save_path): 10 | print(f"{save_path} exists!") 11 | return 12 | # open db connection 13 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 14 | 15 | cursor = db.cursor() 16 | 17 | sql = "SELECT distinct LUI FROM mrconso" 18 | sql_count = "SELECT count(distinct LUI) FROM mrconso" 19 | 20 | # write file 21 | header = ":ID,:LABEL,LUI\n" 22 | 23 | fo = open(save_path, 'w', encoding='utf-8') 24 | 25 | cursor.execute(sql_count) 26 | count = cursor.fetchone()[0] 27 | batch_size = 10 * 10000 # whatever 28 | 29 | print("total record count: " + str(count)) 30 | 31 | counter = 0 32 | fo.write(header) 33 | for offset in range(0, count, batch_size): 34 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 35 | 36 | # obtain a list of column names 37 | cols = cursor.description 38 | list_cols = [] 39 | for i in range(len(cols)): 40 | list_cols.append(cols[i][0]) 41 | print(list_cols) 42 | 43 | for row in cursor: 44 | # ID 45 | LUI = row[list_cols.index('LUI')] 46 | # Label 47 | label = "Term" 48 | 49 | # Properties 50 | 51 | # LAT = row[list_cols.index('LAT')] 52 | # TS = row[list_cols.index('TS')] 53 | 54 | # print results 55 | line = LUI + "," + label + "," + LUI 56 | # print(line) 57 | fo.write(line + "\n") 58 | counter = counter + 1 59 | print('counter = ' + str(counter)) 60 | 61 | fo.close() 62 | 63 | # close connection 64 | db.close() 65 | 66 | -------------------------------------------------------------------------------- /src/umls_graph/umls_mrdef.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | 6 | def make_umls_mrdef(mysql_info,save_path_rels="umls_def_rels.csv",save_path_nodes="umls_def_nodes.csv",save_folder=""): 7 | if save_folder!="": 8 | save_path_rels=os.path.join(save_folder,save_path_rels) 9 | save_path_nodes = os.path.join(save_folder, save_path_nodes) 10 | if os.path.exists(save_path_rels): 11 | print(f"{save_path_rels} exists!") 12 | return 13 | # open db connection 14 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 15 | 16 | cursor = db.cursor() 17 | 18 | sql = "SELECT * from mrdef" 19 | sql_count = "SELECT count(*) from mrdef" 20 | 21 | # write file 22 | header = ":START_ID,:END_ID,:TYPE\n" 23 | header_n = ":ID,:LABEL,CUI,AUI,ATUI,SATUI,SAB,DEF,SUPPRESS,CVF\n" 24 | 25 | fo = open(save_path_rels, 'w', encoding='utf-8') 26 | fo_n = open(save_path_nodes, 'w', encoding='utf-8') 27 | 28 | cursor.execute(sql_count) 29 | count = cursor.fetchone()[0] 30 | batch_size = 20 * 10000 # whatever 31 | 32 | print("total record count: " + str(count)) 33 | 34 | counter = 0 35 | fo.write(header) 36 | fo_n.write(header_n) 37 | for offset in range(0, count, batch_size): 38 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 39 | 40 | # obtain a list of column names 41 | cols = cursor.description 42 | list_cols = [] 43 | for i in range(len(cols)): 44 | list_cols.append(cols[i][0]) 45 | # print(list_cols) 46 | 47 | for row in cursor: 48 | # ID 49 | CUI = row[list_cols.index('CUI')] 50 | AUI = row[list_cols.index('AUI')] 51 | ATUI = row[list_cols.index('ATUI')] 52 | SATUI = row[list_cols.index('SATUI')] 53 | SAB = row[list_cols.index('SAB')] 54 | DEF = row[list_cols.index('DEF')].replace("\"", "'") 55 | SUPPRESS = row[list_cols.index('SUPPRESS')] 56 | CVF = row[list_cols.index('CVF')] 57 | 58 | if SUPPRESS == None: 59 | SUPPRESS = "" 60 | if CVF == None: 61 | CVF = "" 62 | if SATUI == None: 63 | SATUI = "" 64 | 65 | # print results 66 | line = str(AUI) + "," + str(ATUI) + ",\"" + str("DEF") + "\"" 67 | # +str(ATUI)+"\",\""+str(SATUI)+"\",\""+SAB+"\",\""+ATV.replace('\"',"'")+"\",\""+str(SUPPRESS)+"\",\""+str(CVF)+"\"" 68 | # print(line) 69 | fo.write(line + "\n") 70 | 71 | line2 = str(ATUI) + "," + str("Definition") + ",\"" + str(CUI) + "\",\"" + str(AUI) + "\",\"" + str( 72 | ATUI) + "\",\"" + str(SATUI) + "\",\"" + str(SAB) + "\",\"" + str(DEF) + "\",\"" + str( 73 | SUPPRESS) + "\",\"" + str(CVF) + "\"" 74 | fo_n.write(line2 + "\n") 75 | 76 | counter = counter + 1 77 | print('counter = ' + str(counter)) 78 | 79 | fo.close() 80 | fo_n.close() 81 | 82 | # close connection 83 | db.close() -------------------------------------------------------------------------------- /src/umls_graph/umls_mrrel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | def make_umls_mrrel(mysql_info,save_path="umls_rels.csv",save_folder=""): 6 | if save_folder!="": 7 | save_path=os.path.join(save_folder,save_path) 8 | if os.path.exists(save_path): 9 | print(f"{save_path} exists!") 10 | return 11 | # open db connection 12 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 13 | 14 | cursor = db.cursor() 15 | 16 | sql = "SELECT * from mrrel" 17 | sql_count = "SELECT count(*) from mrrel" 18 | 19 | # write file 20 | header = ":START_ID,:END_ID,:TYPE,RELA,RUI,SAB,SL,RG,STYPE1,STYPE2,SRUI,SUPPRESS,CVF\n" 21 | 22 | fo = open(save_path, 'w', encoding='utf-8') 23 | 24 | cursor.execute(sql_count) 25 | count = cursor.fetchone()[0] 26 | batch_size = 20 * 10000 # whatever 27 | 28 | counter = 0 29 | fo.write(header) 30 | for offset in range(0, count, batch_size): 31 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 32 | 33 | # obtain a list of column names 34 | cols = cursor.description 35 | list_cols = [] 36 | for i in range(len(cols)): 37 | list_cols.append(cols[i][0]) 38 | # print(list_cols) 39 | 40 | for row in cursor: 41 | # ID 42 | CUI1 = row[list_cols.index('CUI1')] 43 | CUI2 = row[list_cols.index('CUI2')] 44 | AUI1 = row[list_cols.index('AUI1')] 45 | AUI2 = row[list_cols.index('AUI2')] 46 | STYPE1 = row[list_cols.index('STYPE1')] 47 | STYPE2 = row[list_cols.index('STYPE2')] 48 | start_node = CUI1 49 | end_node = CUI2 50 | if STYPE1 == 'AUI': 51 | start_node = AUI1 52 | if STYPE2 == 'AUI': 53 | end_node = AUI2 54 | if AUI1 == None: 55 | start_node = CUI1 56 | if AUI2 == None: 57 | end_node = CUI2 58 | 59 | REL = row[list_cols.index('REL')] 60 | RELA = row[list_cols.index('RELA')] 61 | RUI = row[list_cols.index('RUI')] 62 | SAB = row[list_cols.index('SAB')] 63 | SL = row[list_cols.index('SL')] 64 | RG = row[list_cols.index('RG')] 65 | 66 | SRUI = row[list_cols.index('SRUI')] 67 | SUPPRESS = row[list_cols.index('SUPPRESS')] 68 | CVF = row[list_cols.index('CVF')] 69 | 70 | if SRUI == None: 71 | SRUI = "" 72 | if SUPPRESS == None: 73 | SUPPRESS = "" 74 | if CVF == None: 75 | CVF = "" 76 | 77 | if SAB == None: 78 | SAB = "" 79 | if RUI == None: 80 | RUI = "" 81 | if REL == None: 82 | REL = "" 83 | if SL == None: 84 | SL = "" 85 | if RG == None: 86 | RG = "" 87 | if RELA == None: 88 | RELA = "" 89 | 90 | # print results 91 | line = str(start_node) + "," + str(end_node) + ",\"" + str(REL) + "\",\"" + str(RELA) + "\",\"" + str( 92 | RUI) + "\",\"" + str(SAB) + "\",\"" + str(SL) + "\",\"" + \ 93 | RG + "\",\"" + STYPE1 + "\",\"" + STYPE2 + "\",\"" + str(SRUI) + "\",\"" + str( 94 | SUPPRESS) + "\",\"" + str(CVF) + "\"" 95 | # print(line) 96 | fo.write(line + "\n") 97 | counter = counter + 1 98 | print('counter = ' + str(counter)) 99 | 100 | fo.close() 101 | 102 | # close connection 103 | db.close() -------------------------------------------------------------------------------- /src/umls_graph/umls_mrsat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | 6 | def make_umls_mrsat(mysql_info,save_path_rels="umls_rels.csv",save_path_nodes="umls_atui_nodes.csv",save_folder=""): 7 | if save_folder!="": 8 | save_path_rels=os.path.join(save_folder,save_path_rels) 9 | save_path_nodes = os.path.join(save_folder, save_path_nodes) 10 | if os.path.exists(save_path_rels): 11 | print(f"{save_path_rels} exists!") 12 | return 13 | # open db connection 14 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 15 | 16 | cursor = db.cursor() 17 | 18 | sql = "SELECT * from mrsat" 19 | sql_count = "SELECT count(*) from mrsat" 20 | 21 | # write file 22 | header = ":START_ID,:END_ID,:TYPE\n" 23 | header_n = ":ID,:LABEL,ATUI,SATUI,ATN,SAB,ATV,SUPPRESS,CVF\n" 24 | 25 | fo = open(save_path_rels, 'w', encoding='utf-8') 26 | fo_n = open(save_path_nodes, 'w', encoding='utf-8') 27 | 28 | cursor.execute(sql_count) 29 | count = cursor.fetchone()[0] 30 | batch_size = 20 * 10000 # whatever 31 | 32 | print("total record count: " + str(count)) 33 | 34 | counter = 0 35 | fo.write(header) 36 | fo_n.write(header_n) 37 | for offset in range(0, count, batch_size): 38 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 39 | 40 | # obtain a list of column names 41 | cols = cursor.description 42 | list_cols = [] 43 | for i in range(len(cols)): 44 | list_cols.append(cols[i][0]) 45 | # print(list_cols) 46 | 47 | for row in cursor: 48 | # ID 49 | CUI = row[list_cols.index('CUI')] 50 | LUI = row[list_cols.index('LUI')] 51 | SUI = row[list_cols.index('SUI')] 52 | METAUI = row[list_cols.index('METAUI')] 53 | STYPE = row[list_cols.index('STYPE')] 54 | CODE = row[list_cols.index('CODE')] 55 | ATUI = row[list_cols.index('ATUI')] 56 | SATUI = row[list_cols.index('SATUI')] 57 | ATN = row[list_cols.index('ATN')] 58 | SAB = row[list_cols.index('SAB')] 59 | ATV = row[list_cols.index('ATV')].replace("\"", "'") 60 | SUPPRESS = row[list_cols.index('SUPPRESS')] 61 | CVF = row[list_cols.index('CVF')] 62 | 63 | start_node = CUI 64 | if STYPE == 'CUI': 65 | start_node = CUI 66 | if STYPE == "AUI" or STYPE == "RUI" or STYPE == "SDUI" or STYPE == "SCUI": 67 | start_node = METAUI 68 | if STYPE == "CODE": 69 | start_node = CODE 70 | 71 | end_node = ATUI 72 | 73 | if CVF == None: 74 | CVF = "" 75 | if SUPPRESS == None: 76 | SUPPRESS = "" 77 | if SATUI == None: 78 | SATUI = "" 79 | 80 | # print results 81 | line = str(start_node) + "," + str(end_node) + ",\"" + str(ATN) + "\"" 82 | # +str(ATUI)+"\",\""+str(SATUI)+"\",\""+SAB+"\",\""+ATV.replace('\"',"'")+"\",\""+str(SUPPRESS)+"\",\""+str(CVF)+"\"" 83 | # print(line) 84 | fo.write(line + "\n") 85 | 86 | line2 = str(ATUI) + "," + str("Attribute") + ",\"" + str(ATUI) + "\",\"" + str(SATUI) + "\",\"" + str( 87 | ATN) + "\",\"" + str(SAB) + "\",\"" + str(ATV) + "\",\"" + str(SUPPRESS) + "\",\"" + str(CVF) + "\"" 88 | fo_n.write(line2 + "\n") 89 | 90 | counter = counter + 1 91 | print('counter = ' + str(counter)) 92 | 93 | fo.close() 94 | fo_n.close() 95 | 96 | # close connection 97 | db.close() -------------------------------------------------------------------------------- /src/umls_graph/umls_mrsmap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | 6 | def make_umls_mrsmap(mysql_info,save_path_rels="umls_smap_rels.csv",save_path_nodes="umls_smap_nodes.csv",save_folder=""): 7 | if save_folder!="": 8 | save_path_rels=os.path.join(save_folder,save_path_rels) 9 | save_path_nodes = os.path.join(save_folder, save_path_nodes) 10 | if os.path.exists(save_path_rels): 11 | print(f"{save_path_rels} exists!") 12 | return 13 | # open db connection 14 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 15 | 16 | cursor = db.cursor() 17 | 18 | sql = "SELECT * from mrsmap" 19 | sql_count = "SELECT count(*) from mrsmap" 20 | 21 | # write file 22 | header = ":START_ID,:END_ID,:TYPE,MAPSETCUI,MAPSETSAB,MAPID,MAPSID,FROMEXPR,FROMTYPE,REL,RELA,TOEXPR,TOTYPE,CVF\n" 23 | header_n = ":ID,:LABEL,EXPR,EXPR_TYPE\n" 24 | 25 | fo = open(save_path_rels, 'w', encoding='utf-8') 26 | fo_n = open(save_path_nodes, 'w', encoding='utf-8') 27 | 28 | cursor.execute(sql_count) 29 | count = cursor.fetchone()[0] 30 | batch_size = 20 * 10000 # whatever 31 | 32 | print("total record count: " + str(count)) 33 | 34 | dict_expr = {} 35 | 36 | counter = 0 37 | fo.write(header) 38 | fo_n.write(header_n) 39 | for offset in range(0, count, batch_size): 40 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 41 | 42 | # obtain a list of column names 43 | cols = cursor.description 44 | list_cols = [] 45 | for i in range(len(cols)): 46 | list_cols.append(cols[i][0]) 47 | # print(list_cols) 48 | 49 | for row in cursor: 50 | # ID 51 | 52 | MAPSETCUI = row[list_cols.index('MAPSETCUI')] 53 | MAPSETSAB = row[list_cols.index('MAPSETSAB')] 54 | MAPID = row[list_cols.index('MAPID')] 55 | MAPSID = row[list_cols.index('MAPSID')] 56 | FROMEXPR = row[list_cols.index('FROMEXPR')] 57 | FROMTYPE = row[list_cols.index('FROMTYPE')] 58 | REL = row[list_cols.index('REL')] 59 | RELA = row[list_cols.index('RELA')] 60 | TOEXPR = row[list_cols.index('TOEXPR')] 61 | TOTYPE = row[list_cols.index('TOTYPE')] 62 | CVF = row[list_cols.index('CVF')] 63 | 64 | if CVF == None: 65 | CVF = "" 66 | if MAPSID == None: 67 | MAPSID = "" 68 | 69 | # print results 70 | line = "\"" + str(FROMEXPR) + "\",\"" + str(TOEXPR) + "\",\"" + str("SMAP") + "\",\"" + str( 71 | MAPSETCUI) + "\",\"" + str(MAPSETSAB) + "\",\"" + MAPID + "\",\"" + MAPSID + "\",\"" \ 72 | + str(FROMEXPR) + "\",\"" + str(FROMTYPE) + "\",\"" + str(REL) + "\",\"" + str(RELA) + "\",\"" + str( 73 | TOEXPR) + "\",\"" + str(TOTYPE) + "\",\"" + str(CVF) + "\"" 74 | # print(line) 75 | fo.write(line + "\n") 76 | 77 | # line2=str(FROMEXPR)+","+str("MapEntity")+",\""+str(FROMEXPR)+"\"" 78 | # fo_n.write(line2+"\n") 79 | 80 | if FROMTYPE != "CUI" and not dict_expr.keys().__contains__(FROMEXPR): 81 | line2 = "\"" + str(FROMEXPR) + "\"," + str("MapEntity") + ",\"" + str( 82 | FROMEXPR) + "\",\"" + FROMTYPE + "\"" 83 | fo_n.write(line2 + "\n") 84 | dict_expr[FROMEXPR] = FROMTYPE 85 | if TOTYPE != "CUI" and not dict_expr.keys().__contains__(TOEXPR): 86 | line2 = "\"" + str(TOEXPR) + "\"," + str("MapEntity") + ",\"" + str(TOEXPR) + "\",\"" + TOTYPE + "\"" 87 | fo_n.write(line2 + "\n") 88 | dict_expr[TOEXPR] = TOTYPE 89 | 90 | counter = counter + 1 91 | print('counter = ' + str(counter)) 92 | 93 | fo.close() 94 | fo_n.close() 95 | 96 | # close connection 97 | db.close() -------------------------------------------------------------------------------- /src/umls_graph/umls_mrsty.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | 6 | def make_umls_mrsty(mysql_info,save_path="umls_sty_rels.csv",save_folder=""): 7 | if save_folder!="": 8 | save_path=os.path.join(save_folder,save_path) 9 | if os.path.exists(save_path): 10 | print(f"{save_path} exists!") 11 | return 12 | # open db connection 13 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 14 | 15 | cursor = db.cursor() 16 | 17 | sql = "SELECT * from mrsty" 18 | sql_count = "SELECT count(*) from mrsty" 19 | 20 | # write file 21 | header = ":START_ID,:END_ID,:TYPE,CUI,TUI,STN,STY,ATUI,CVF\n" 22 | 23 | fo = open(save_path, 'w', encoding='utf-8') 24 | 25 | cursor.execute(sql_count) 26 | count = cursor.fetchone()[0] 27 | batch_size = 20 * 10000 # whatever 28 | 29 | counter = 0 30 | fo.write(header) 31 | for offset in range(0, count, batch_size): 32 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 33 | 34 | # obtain a list of column names 35 | cols = cursor.description 36 | list_cols = [] 37 | for i in range(len(cols)): 38 | list_cols.append(cols[i][0]) 39 | # print(list_cols) 40 | 41 | for row in cursor: 42 | # ID 43 | CUI = row[list_cols.index('CUI')] 44 | TUI = row[list_cols.index('TUI')] 45 | STN = row[list_cols.index('STN')] 46 | STY = row[list_cols.index('STY')].replace("\"", "'") 47 | ATUI = row[list_cols.index('ATUI')] 48 | CVF = row[list_cols.index('CVF')] 49 | 50 | if CVF == None: 51 | CVF = "" 52 | 53 | # print results 54 | line = str(CUI) + "," + str(TUI) + ",\"" + str("HAS_SR") + "\",\"" + str(CUI) + "\",\"" + str( 55 | TUI) + "\",\"" + str(STN) + "\",\"" + str(STY) + "\",\"" + \ 56 | str(ATUI) + "\",\"" + str(CVF) + "\"" 57 | # print(line) 58 | fo.write(line + "\n") 59 | counter = counter + 1 60 | print('counter = ' + str(counter)) 61 | 62 | fo.close() 63 | 64 | # close connection 65 | db.close() -------------------------------------------------------------------------------- /src/umls_graph/umls_srdef.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pymysql 4 | import os 5 | def make_umls_srdef(mysql_info,save_path="umls_sr_nodes.csv",save_folder=""): 6 | if save_folder!="": 7 | save_path=os.path.join(save_folder,save_path) 8 | if os.path.exists(save_path): 9 | print(f"{save_path} exists!") 10 | return 11 | # open db connection 12 | db = pymysql.connect(host=mysql_info["hostname"], user=mysql_info["username"], password=mysql_info["password"], database=mysql_info["database"]) 13 | 14 | cursor = db.cursor() 15 | 16 | sql = "SELECT * FROM srdef" 17 | sql_count = "SELECT count(*) FROM srdef" 18 | 19 | # write file 20 | header = ":ID,:LABEL,UI,RT,STY_RL,STN_RTN,DEF,EX,UN,NH,ABR,RIN\n" 21 | 22 | fo = open(save_path, 'w', encoding='utf-8') 23 | 24 | cursor.execute(sql_count) 25 | count = cursor.fetchone()[0] 26 | 27 | print("total record count: " + str(count)) 28 | 29 | batch_size = 10 * 10000 # whatever 30 | 31 | counter = 0 32 | fo.write(header) 33 | for offset in range(0, count, batch_size): 34 | cursor.execute(sql + " LIMIT %s OFFSET %s", (batch_size, offset)) 35 | 36 | # obtain a list of column names 37 | cols = cursor.description 38 | list_cols = [] 39 | for i in range(len(cols)): 40 | list_cols.append(cols[i][0]) 41 | print(list_cols) 42 | 43 | for row in cursor: 44 | # ID 45 | RT = row[list_cols.index('RT')] 46 | # Label 47 | UI = row[list_cols.index('UI')] 48 | STY_RL = row[list_cols.index('STY_RL')] 49 | STN_RTN = row[list_cols.index('STN_RTN')] 50 | DEF = row[list_cols.index('DEF')].replace("\"", "'") 51 | EX = row[list_cols.index('EX')] 52 | UN = row[list_cols.index('UN')] 53 | NH = row[list_cols.index('NH')] 54 | ABR = row[list_cols.index('ABR')] 55 | RIN = row[list_cols.index('RIN')] 56 | print(row) 57 | 58 | if EX == None: 59 | EX = "" 60 | if UN == None or UN == 'NULL': 61 | UN = "" 62 | if NH == None: 63 | NH = "" 64 | if ABR == None: 65 | ABR = "" 66 | if RIN == None: 67 | RIN = "" 68 | if RT == None: 69 | RT = "" 70 | if STY_RL == None: 71 | STY_RL = "" 72 | if STN_RTN == None: 73 | STN_RTN = "" 74 | if DEF == None: 75 | DEF = "" 76 | if UI == None: 77 | UI = "" 78 | 79 | UN = UN.replace("\"", "'") 80 | 81 | # print(UI,RT,STY_RL,STN_RTN,DEF,EX,UN+","+NH+","+ABR+","+RIN) 82 | print(EX) 83 | 84 | line = UI + "," + RT + ",\"" + STY_RL + "\",\"" + STN_RTN + "\",\"" + DEF + "\",\"" + EX + "\",\"" + UN + "\",\"" + NH + "\",\"" + ABR + "\",\"" + RIN + "\"" 85 | fo.write(line + "\n") 86 | 87 | counter = counter + 1 88 | print('counter = ' + str(counter)) 89 | 90 | fo.close() 91 | 92 | # close connection 93 | db.close() -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhchenx/umls-graph/9bf415c727f77e7859aaac522d1996d7bb2b6f9c/tests/__init__.py --------------------------------------------------------------------------------